From cfcc8a20e862b758c32bd3f152186e6df0591a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Thu, 13 Oct 2016 19:40:40 +0200 Subject: theora: Move to a module and split thirdparty lib Same rationale as the previous commits. --- SConstruct | 10 +- drivers/SCsub | 2 - drivers/register_driver_types.cpp | 22 +- drivers/theora/COPYING | 28 - drivers/theora/LICENSE | 18 - drivers/theora/SCsub | 65 - drivers/theora/analyze.c | 2709 -------------------- drivers/theora/apiwrapper.c | 166 -- drivers/theora/apiwrapper.h | 54 - drivers/theora/bitpack.c | 111 - drivers/theora/bitpack.h | 59 - drivers/theora/codec.h | 591 ----- drivers/theora/cpu.c | 226 -- drivers/theora/cpu.h | 34 - drivers/theora/dct.h | 31 - drivers/theora/decapiwrapper.c | 193 -- drivers/theora/decinfo.c | 246 -- drivers/theora/decint.h | 107 - drivers/theora/decode.c | 2943 ---------------------- drivers/theora/dequant.c | 182 -- drivers/theora/dequant.h | 27 - drivers/theora/encapiwrapper.c | 168 -- drivers/theora/encfrag.c | 388 --- drivers/theora/encinfo.c | 121 - drivers/theora/encint.h | 493 ---- drivers/theora/encode.c | 1615 ------------ drivers/theora/encoder_disabled.c | 67 - drivers/theora/enquant.c | 274 -- drivers/theora/enquant.h | 27 - drivers/theora/fdct.c | 422 ---- drivers/theora/fragment.c | 87 - drivers/theora/huffdec.c | 489 ---- drivers/theora/huffdec.h | 92 - drivers/theora/huffenc.c | 910 ------- drivers/theora/huffenc.h | 19 - drivers/theora/huffman.h | 70 - drivers/theora/idct.c | 335 --- drivers/theora/info.c | 131 - drivers/theora/internal.c | 262 -- drivers/theora/internal.h | 509 ---- drivers/theora/mathops.c | 296 --- drivers/theora/mathops.h | 141 -- drivers/theora/mcenc.c | 767 ------ drivers/theora/modedec.h | 4027 ------------------------------ drivers/theora/ocintrin.h | 128 - drivers/theora/quant.c | 119 - drivers/theora/quant.h | 33 - drivers/theora/rate.c | 1137 --------- drivers/theora/state.c | 1227 --------- drivers/theora/theora.exp | 55 - drivers/theora/theora.h | 784 ------ drivers/theora/theoradec.h | 325 --- drivers/theora/theoraenc.h | 486 ---- drivers/theora/tokenize.c | 1072 -------- drivers/theora/video_stream_theora.cpp | 945 ------- drivers/theora/video_stream_theora.h | 204 -- drivers/theora/x86/mmxencfrag.c | 900 ------- drivers/theora/x86/mmxfdct.c | 665 ----- drivers/theora/x86/mmxfrag.c | 293 --- drivers/theora/x86/mmxfrag.h | 64 - drivers/theora/x86/mmxidct.c | 564 ----- drivers/theora/x86/mmxloop.h | 215 -- drivers/theora/x86/mmxstate.c | 188 -- drivers/theora/x86/sse2fdct.c | 523 ---- drivers/theora/x86/x86enc.c | 49 - drivers/theora/x86/x86enc.h | 47 - drivers/theora/x86/x86int.h | 42 - drivers/theora/x86/x86state.c | 62 - drivers/theora/x86_vc/mmxencfrag.c | 969 ------- drivers/theora/x86_vc/mmxfdct.c | 670 ----- drivers/theora/x86_vc/mmxfrag.c | 337 --- drivers/theora/x86_vc/mmxfrag.h | 61 - drivers/theora/x86_vc/mmxidct.c | 562 ----- drivers/theora/x86_vc/mmxloop.h | 219 -- drivers/theora/x86_vc/mmxstate.c | 211 -- drivers/theora/x86_vc/x86enc.c | 49 - drivers/theora/x86_vc/x86enc.h | 47 - drivers/theora/x86_vc/x86int.h | 42 - drivers/theora/x86_vc/x86state.c | 62 - drivers/theora/yuv2rgb.h | 1121 --------- modules/theora/SCsub | 81 + modules/theora/config.py | 6 + modules/theora/register_types.cpp | 45 + modules/theora/register_types.h | 30 + modules/theora/video_stream_theora.cpp | 940 +++++++ modules/theora/video_stream_theora.h | 199 ++ modules/theora/yuv2rgb.h | 1121 +++++++++ platform/bb10/detect.py | 3 +- platform/javascript/detect.py | 2 +- platform/x11/detect.py | 17 +- thirdparty/README.md | 13 + thirdparty/libtheora/COPYING | 28 + thirdparty/libtheora/LICENSE | 18 + thirdparty/libtheora/analyze.c | 2709 ++++++++++++++++++++ thirdparty/libtheora/apiwrapper.c | 166 ++ thirdparty/libtheora/apiwrapper.h | 54 + thirdparty/libtheora/bitpack.c | 111 + thirdparty/libtheora/bitpack.h | 59 + thirdparty/libtheora/cpu.c | 226 ++ thirdparty/libtheora/cpu.h | 34 + thirdparty/libtheora/dct.h | 31 + thirdparty/libtheora/decapiwrapper.c | 193 ++ thirdparty/libtheora/decinfo.c | 246 ++ thirdparty/libtheora/decint.h | 107 + thirdparty/libtheora/decode.c | 2943 ++++++++++++++++++++++ thirdparty/libtheora/dequant.c | 182 ++ thirdparty/libtheora/dequant.h | 27 + thirdparty/libtheora/encapiwrapper.c | 168 ++ thirdparty/libtheora/encfrag.c | 388 +++ thirdparty/libtheora/encinfo.c | 121 + thirdparty/libtheora/encint.h | 493 ++++ thirdparty/libtheora/encode.c | 1615 ++++++++++++ thirdparty/libtheora/encoder_disabled.c | 67 + thirdparty/libtheora/enquant.c | 274 ++ thirdparty/libtheora/enquant.h | 27 + thirdparty/libtheora/fdct.c | 422 ++++ thirdparty/libtheora/fragment.c | 87 + thirdparty/libtheora/huffdec.c | 489 ++++ thirdparty/libtheora/huffdec.h | 92 + thirdparty/libtheora/huffenc.c | 910 +++++++ thirdparty/libtheora/huffenc.h | 19 + thirdparty/libtheora/huffman.h | 70 + thirdparty/libtheora/idct.c | 335 +++ thirdparty/libtheora/info.c | 131 + thirdparty/libtheora/internal.c | 262 ++ thirdparty/libtheora/internal.h | 509 ++++ thirdparty/libtheora/mathops.c | 296 +++ thirdparty/libtheora/mathops.h | 141 ++ thirdparty/libtheora/mcenc.c | 767 ++++++ thirdparty/libtheora/modedec.h | 4027 ++++++++++++++++++++++++++++++ thirdparty/libtheora/ocintrin.h | 128 + thirdparty/libtheora/quant.c | 119 + thirdparty/libtheora/quant.h | 33 + thirdparty/libtheora/rate.c | 1137 +++++++++ thirdparty/libtheora/state.c | 1227 +++++++++ thirdparty/libtheora/theora/codec.h | 591 +++++ thirdparty/libtheora/theora/theora.h | 784 ++++++ thirdparty/libtheora/theora/theoradec.h | 325 +++ thirdparty/libtheora/theora/theoraenc.h | 486 ++++ thirdparty/libtheora/tokenize.c | 1072 ++++++++ thirdparty/libtheora/x86/mmxencfrag.c | 900 +++++++ thirdparty/libtheora/x86/mmxfdct.c | 665 +++++ thirdparty/libtheora/x86/mmxfrag.c | 293 +++ thirdparty/libtheora/x86/mmxfrag.h | 64 + thirdparty/libtheora/x86/mmxidct.c | 564 +++++ thirdparty/libtheora/x86/mmxloop.h | 215 ++ thirdparty/libtheora/x86/mmxstate.c | 188 ++ thirdparty/libtheora/x86/sse2fdct.c | 523 ++++ thirdparty/libtheora/x86/x86enc.c | 49 + thirdparty/libtheora/x86/x86enc.h | 47 + thirdparty/libtheora/x86/x86int.h | 42 + thirdparty/libtheora/x86/x86state.c | 62 + thirdparty/libtheora/x86_vc/mmxencfrag.c | 969 +++++++ thirdparty/libtheora/x86_vc/mmxfdct.c | 670 +++++ thirdparty/libtheora/x86_vc/mmxfrag.c | 337 +++ thirdparty/libtheora/x86_vc/mmxfrag.h | 61 + thirdparty/libtheora/x86_vc/mmxidct.c | 562 +++++ thirdparty/libtheora/x86_vc/mmxloop.h | 219 ++ thirdparty/libtheora/x86_vc/mmxstate.c | 211 ++ thirdparty/libtheora/x86_vc/x86enc.c | 49 + thirdparty/libtheora/x86_vc/x86enc.h | 47 + thirdparty/libtheora/x86_vc/x86int.h | 42 + thirdparty/libtheora/x86_vc/x86state.c | 62 + 163 files changed, 34038 insertions(+), 34017 deletions(-) delete mode 100644 drivers/theora/COPYING delete mode 100644 drivers/theora/LICENSE delete mode 100644 drivers/theora/SCsub delete mode 100644 drivers/theora/analyze.c delete mode 100644 drivers/theora/apiwrapper.c delete mode 100644 drivers/theora/apiwrapper.h delete mode 100644 drivers/theora/bitpack.c delete mode 100644 drivers/theora/bitpack.h delete mode 100644 drivers/theora/codec.h delete mode 100644 drivers/theora/cpu.c delete mode 100644 drivers/theora/cpu.h delete mode 100644 drivers/theora/dct.h delete mode 100644 drivers/theora/decapiwrapper.c delete mode 100644 drivers/theora/decinfo.c delete mode 100644 drivers/theora/decint.h delete mode 100644 drivers/theora/decode.c delete mode 100644 drivers/theora/dequant.c delete mode 100644 drivers/theora/dequant.h delete mode 100644 drivers/theora/encapiwrapper.c delete mode 100644 drivers/theora/encfrag.c delete mode 100644 drivers/theora/encinfo.c delete mode 100644 drivers/theora/encint.h delete mode 100644 drivers/theora/encode.c delete mode 100644 drivers/theora/encoder_disabled.c delete mode 100644 drivers/theora/enquant.c delete mode 100644 drivers/theora/enquant.h delete mode 100644 drivers/theora/fdct.c delete mode 100644 drivers/theora/fragment.c delete mode 100644 drivers/theora/huffdec.c delete mode 100644 drivers/theora/huffdec.h delete mode 100644 drivers/theora/huffenc.c delete mode 100644 drivers/theora/huffenc.h delete mode 100644 drivers/theora/huffman.h delete mode 100644 drivers/theora/idct.c delete mode 100644 drivers/theora/info.c delete mode 100644 drivers/theora/internal.c delete mode 100644 drivers/theora/internal.h delete mode 100644 drivers/theora/mathops.c delete mode 100644 drivers/theora/mathops.h delete mode 100644 drivers/theora/mcenc.c delete mode 100644 drivers/theora/modedec.h delete mode 100644 drivers/theora/ocintrin.h delete mode 100644 drivers/theora/quant.c delete mode 100644 drivers/theora/quant.h delete mode 100644 drivers/theora/rate.c delete mode 100644 drivers/theora/state.c delete mode 100644 drivers/theora/theora.exp delete mode 100644 drivers/theora/theora.h delete mode 100644 drivers/theora/theoradec.h delete mode 100644 drivers/theora/theoraenc.h delete mode 100644 drivers/theora/tokenize.c delete mode 100644 drivers/theora/video_stream_theora.cpp delete mode 100644 drivers/theora/video_stream_theora.h delete mode 100644 drivers/theora/x86/mmxencfrag.c delete mode 100644 drivers/theora/x86/mmxfdct.c delete mode 100644 drivers/theora/x86/mmxfrag.c delete mode 100644 drivers/theora/x86/mmxfrag.h delete mode 100644 drivers/theora/x86/mmxidct.c delete mode 100644 drivers/theora/x86/mmxloop.h delete mode 100644 drivers/theora/x86/mmxstate.c delete mode 100644 drivers/theora/x86/sse2fdct.c delete mode 100644 drivers/theora/x86/x86enc.c delete mode 100644 drivers/theora/x86/x86enc.h delete mode 100644 drivers/theora/x86/x86int.h delete mode 100644 drivers/theora/x86/x86state.c delete mode 100644 drivers/theora/x86_vc/mmxencfrag.c delete mode 100644 drivers/theora/x86_vc/mmxfdct.c delete mode 100644 drivers/theora/x86_vc/mmxfrag.c delete mode 100644 drivers/theora/x86_vc/mmxfrag.h delete mode 100644 drivers/theora/x86_vc/mmxidct.c delete mode 100644 drivers/theora/x86_vc/mmxloop.h delete mode 100644 drivers/theora/x86_vc/mmxstate.c delete mode 100644 drivers/theora/x86_vc/x86enc.c delete mode 100644 drivers/theora/x86_vc/x86enc.h delete mode 100644 drivers/theora/x86_vc/x86int.h delete mode 100644 drivers/theora/x86_vc/x86state.c delete mode 100644 drivers/theora/yuv2rgb.h create mode 100644 modules/theora/SCsub create mode 100644 modules/theora/config.py create mode 100644 modules/theora/register_types.cpp create mode 100644 modules/theora/register_types.h create mode 100644 modules/theora/video_stream_theora.cpp create mode 100644 modules/theora/video_stream_theora.h create mode 100644 modules/theora/yuv2rgb.h create mode 100644 thirdparty/libtheora/COPYING create mode 100644 thirdparty/libtheora/LICENSE create mode 100644 thirdparty/libtheora/analyze.c create mode 100644 thirdparty/libtheora/apiwrapper.c create mode 100644 thirdparty/libtheora/apiwrapper.h create mode 100644 thirdparty/libtheora/bitpack.c create mode 100644 thirdparty/libtheora/bitpack.h create mode 100644 thirdparty/libtheora/cpu.c create mode 100644 thirdparty/libtheora/cpu.h create mode 100644 thirdparty/libtheora/dct.h create mode 100644 thirdparty/libtheora/decapiwrapper.c create mode 100644 thirdparty/libtheora/decinfo.c create mode 100644 thirdparty/libtheora/decint.h create mode 100644 thirdparty/libtheora/decode.c create mode 100644 thirdparty/libtheora/dequant.c create mode 100644 thirdparty/libtheora/dequant.h create mode 100644 thirdparty/libtheora/encapiwrapper.c create mode 100644 thirdparty/libtheora/encfrag.c create mode 100644 thirdparty/libtheora/encinfo.c create mode 100644 thirdparty/libtheora/encint.h create mode 100644 thirdparty/libtheora/encode.c create mode 100644 thirdparty/libtheora/encoder_disabled.c create mode 100644 thirdparty/libtheora/enquant.c create mode 100644 thirdparty/libtheora/enquant.h create mode 100644 thirdparty/libtheora/fdct.c create mode 100644 thirdparty/libtheora/fragment.c create mode 100644 thirdparty/libtheora/huffdec.c create mode 100644 thirdparty/libtheora/huffdec.h create mode 100644 thirdparty/libtheora/huffenc.c create mode 100644 thirdparty/libtheora/huffenc.h create mode 100644 thirdparty/libtheora/huffman.h create mode 100644 thirdparty/libtheora/idct.c create mode 100644 thirdparty/libtheora/info.c create mode 100644 thirdparty/libtheora/internal.c create mode 100644 thirdparty/libtheora/internal.h create mode 100644 thirdparty/libtheora/mathops.c create mode 100644 thirdparty/libtheora/mathops.h create mode 100644 thirdparty/libtheora/mcenc.c create mode 100644 thirdparty/libtheora/modedec.h create mode 100644 thirdparty/libtheora/ocintrin.h create mode 100644 thirdparty/libtheora/quant.c create mode 100644 thirdparty/libtheora/quant.h create mode 100644 thirdparty/libtheora/rate.c create mode 100644 thirdparty/libtheora/state.c create mode 100644 thirdparty/libtheora/theora/codec.h create mode 100644 thirdparty/libtheora/theora/theora.h create mode 100644 thirdparty/libtheora/theora/theoradec.h create mode 100644 thirdparty/libtheora/theora/theoraenc.h create mode 100644 thirdparty/libtheora/tokenize.c create mode 100644 thirdparty/libtheora/x86/mmxencfrag.c create mode 100644 thirdparty/libtheora/x86/mmxfdct.c create mode 100644 thirdparty/libtheora/x86/mmxfrag.c create mode 100644 thirdparty/libtheora/x86/mmxfrag.h create mode 100644 thirdparty/libtheora/x86/mmxidct.c create mode 100644 thirdparty/libtheora/x86/mmxloop.h create mode 100644 thirdparty/libtheora/x86/mmxstate.c create mode 100644 thirdparty/libtheora/x86/sse2fdct.c create mode 100644 thirdparty/libtheora/x86/x86enc.c create mode 100644 thirdparty/libtheora/x86/x86enc.h create mode 100644 thirdparty/libtheora/x86/x86int.h create mode 100644 thirdparty/libtheora/x86/x86state.c create mode 100644 thirdparty/libtheora/x86_vc/mmxencfrag.c create mode 100644 thirdparty/libtheora/x86_vc/mmxfdct.c create mode 100644 thirdparty/libtheora/x86_vc/mmxfrag.c create mode 100644 thirdparty/libtheora/x86_vc/mmxfrag.h create mode 100644 thirdparty/libtheora/x86_vc/mmxidct.c create mode 100644 thirdparty/libtheora/x86_vc/mmxloop.h create mode 100644 thirdparty/libtheora/x86_vc/mmxstate.c create mode 100644 thirdparty/libtheora/x86_vc/x86enc.c create mode 100644 thirdparty/libtheora/x86_vc/x86enc.h create mode 100644 thirdparty/libtheora/x86_vc/x86int.h create mode 100644 thirdparty/libtheora/x86_vc/x86state.c diff --git a/SConstruct b/SConstruct index e52e224de7..4ed3845f97 100644 --- a/SConstruct +++ b/SConstruct @@ -121,11 +121,10 @@ opts.Add('tools','Build Tools (Including Editor): (yes/no)','yes') opts.Add('gdscript','Build GDSCript support: (yes/no)','yes') opts.Add('libogg','Ogg library for ogg container support (system/builtin)','builtin') opts.Add('libvorbis','Ogg Vorbis library for vorbis support (system/builtin)','builtin') +opts.Add('libtheora','Theora library for theora module (system/builtin)','builtin') opts.Add('opus','Opus and opusfile library for Opus format support: (system/builtin)','builtin') opts.Add('minizip','Build Minizip Archive Support: (yes/no)','yes') opts.Add('squish','Squish BC Texture Compression in editor (yes/no)','yes') -opts.Add('theora','Theora Video (yes/no)','yes') -opts.Add('theoralib','Theora Video (yes/no)','no') opts.Add('freetype','Freetype support in editor','builtin') opts.Add('xml','XML Save/Load support (yes/no)','yes') opts.Add('libpng','libpng library for image loader support (system/builtin)','builtin') @@ -331,13 +330,6 @@ if selected_platform in platform_list: if (env_base['squish']=='yes'): env.Append(CPPFLAGS=['-DSQUISH_ENABLED']); - - if (env['theora']=='yes'): - env['theoralib']='yes' - env.Append(CPPFLAGS=['-DTHEORA_ENABLED']); - if (env['theoralib']=='yes'): - env.Append(CPPFLAGS=['-DTHEORALIB_ENABLED']); - if (env['tools']=='yes'): env.Append(CPPFLAGS=['-DTOOLS_ENABLED']) if (env['disable_3d']=='yes'): diff --git a/drivers/SCsub b/drivers/SCsub index 4ce95c7970..8b7ec80dcd 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -24,8 +24,6 @@ SConscript("chibi/SCsub"); if (env["tools"]=="yes"): SConscript("convex_decomp/SCsub"); -if (env["theoralib"]=="yes"): - SConscript("theora/SCsub"); if (env['musepack']=='yes'): SConscript("mpc/SCsub"); if (env["squish"]=="yes" and env["tools"]=="yes"): diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp index 8aead0e2fc..6d48d94d44 100644 --- a/drivers/register_driver_types.cpp +++ b/drivers/register_driver_types.cpp @@ -28,6 +28,7 @@ /*************************************************************************/ #include "register_driver_types.h" +#include "core/math/geometry.h" #include "png/image_loader_png.h" #include "png/resource_saver_png.h" #include "chibi/event_stream_chibi.h" @@ -45,11 +46,6 @@ #include "platform/windows/export/export.h" #endif -#ifdef THEORA_ENABLED -#include "theora/video_stream_theora.h" -#endif - - #include "drivers/nrex/regex.h" #ifdef MUSEPACK_ENABLED @@ -59,10 +55,6 @@ static ImageLoaderPNG *image_loader_png=NULL; static ResourceSaverPNG *resource_saver_png=NULL; -#ifdef THEORA_ENABLED -static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL; -#endif - #ifdef MUSEPACK_ENABLED static ResourceFormatLoaderAudioStreamMPC * mpc_stream_loader=NULL; #endif @@ -103,13 +95,6 @@ void register_driver_types() { #endif -#ifdef THEORA_ENABLED - theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora ); - ResourceLoader::add_resource_format_loader(theora_stream_loader); - ObjectTypeDB::register_type(); -#endif - - #ifdef TOOLS_ENABLED #ifdef SQUISH_ENABLED @@ -123,11 +108,6 @@ void register_driver_types() { void unregister_driver_types() { -#ifdef THEORA_ENABLED - memdelete (theora_stream_loader); -#endif - - #ifdef MUSEPACK_ENABLED memdelete (mpc_stream_loader); diff --git a/drivers/theora/COPYING b/drivers/theora/COPYING deleted file mode 100644 index c8ccce4ffb..0000000000 --- a/drivers/theora/COPYING +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (C) 2002-2009 Xiph.org Foundation - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -- Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -- Neither the name of the Xiph.org Foundation nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/drivers/theora/LICENSE b/drivers/theora/LICENSE deleted file mode 100644 index 5e5ec08469..0000000000 --- a/drivers/theora/LICENSE +++ /dev/null @@ -1,18 +0,0 @@ -Please see the file COPYING for the copyright license for this software. - -In addition to and irrespective of the copyright license associated -with this software, On2 Technologies, Inc. makes the following statement -regarding technology used in this software: - - On2 represents and warrants that it shall not assert any rights - relating to infringement of On2's registered patents, nor initiate - any litigation asserting such rights, against any person who, or - entity which utilizes the On2 VP3 Codec Software, including any - use, distribution, and sale of said Software; which make changes, - modifications, and improvements in said Software; and to use, - distribute, and sell said changes as well as applications for other - fields of use. - -This reference implementation is originally derived from the On2 VP3 -Codec Software, and the Theora video format is essentially compatible -with the VP3 video format, consisting of a backward-compatible superset. diff --git a/drivers/theora/SCsub b/drivers/theora/SCsub deleted file mode 100644 index 94477d2827..0000000000 --- a/drivers/theora/SCsub +++ /dev/null @@ -1,65 +0,0 @@ -Import('env') - -sources = [ - #"theora/analyze.c", - #"theora/apiwrapper.c", - "theora/bitpack.c", - "theora/cpu.c", - #"theora/decapiwrapper.c", - "theora/decinfo.c", - "theora/decode.c", - "theora/dequant.c", - #"theora/encapiwrapper.c", - #"theora/encfrag.c", - #"theora/encinfo.c", - #"theora/encode.c", - #"theora/encoder_disabled.c", - #"theora/enquant.c", - #"theora/fdct.c", - "theora/fragment.c", - "theora/huffdec.c", - #"theora/huffenc.c", - "theora/idct.c", - "theora/info.c", - "theora/internal.c", - #"theora/mathops.c", - #"theora/mcenc.c", - "theora/quant.c", - #"theora/rate.c", - "theora/state.c", - #"theora/tokenize.c", - "theora/video_stream_theora.cpp", -] - -sources_x86 = [ - #"theora/x86/mmxencfrag.c", - #"theora/x86/mmxfdct.c", - "theora/x86/mmxfrag.c", - "theora/x86/mmxidct.c", - "theora/x86/mmxstate.c", - #"theora/x86/sse2fdct.c", - #"theora/x86/x86enc.c", - "theora/x86/x86state.c", -] - -sources_x86_vc = [ - #"theora/x86_vc/mmxencfrag.c", - #"theora/x86_vc/mmxfdct.c", - "theora/x86_vc/mmxfrag.c", - "theora/x86_vc/mmxidct.c", - "theora/x86_vc/mmxstate.c", - #"theora/x86_vc/x86enc.c", - "theora/x86_vc/x86state.c", -] - -env.drivers_sources += sources - -if (env["x86_opt_gcc"]): - env.drivers_sources += sources_x86 - -if (env["x86_opt_vc"]): - env.drivers_sources += sources_x86_vc - -if (env["x86_opt_gcc"] or env["x86_opt_vc"]): - Import('env_drivers') - env_drivers.Append(CCFLAGS=["-DOC_X86_ASM"]) diff --git a/drivers/theora/analyze.c b/drivers/theora/analyze.c deleted file mode 100644 index af01b60dff..0000000000 --- a/drivers/theora/analyze.c +++ /dev/null @@ -1,2709 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: mode selection code - last mod: $Id$ - - ********************************************************************/ -#include -#include -#include "encint.h" -#include "modedec.h" - - - -typedef struct oc_fr_state oc_fr_state; -typedef struct oc_qii_state oc_qii_state; -typedef struct oc_enc_pipeline_state oc_enc_pipeline_state; -typedef struct oc_rd_metric oc_rd_metric; -typedef struct oc_mode_choice oc_mode_choice; - - - -/*There are 8 possible schemes used to encode macro block modes. - Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes. - The same set of Huffman codes is used for each of these 7 schemes, but the - mode assigned to each codeword varies. - Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream, - while schemes 1-6 have a fixed mapping. - Scheme 7 just encodes each mode directly in 3 bits.*/ - -/*The mode orderings for the various mode coding schemes. - Scheme 0 uses a custom alphabet, which is not stored in this table. - This is the inverse of the equivalent table OC_MODE_ALPHABETS in the - decoder.*/ -static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={ - /*Last MV dominates.*/ - /*L P M N I G GM 4*/ - {3,4,2,0,1,5,6,7}, - /*L P N M I G GM 4*/ - {2,4,3,0,1,5,6,7}, - /*L M P N I G GM 4*/ - {3,4,1,0,2,5,6,7}, - /*L M N P I G GM 4*/ - {2,4,1,0,3,5,6,7}, - /*No MV dominates.*/ - /*N L P M I G GM 4*/ - {0,4,3,1,2,5,6,7}, - /*N G L P M I GM 4*/ - {0,5,4,2,3,1,6,7}, - /*Default ordering.*/ - /*N I M L P G GM 4*/ - {0,1,2,3,4,5,6,7} -}; - - - -/*Initialize the mode scheme chooser. - This need only be called once per encoder.*/ -void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){ - int si; - _chooser->mode_ranks[0]=_chooser->scheme0_ranks; - for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1]; -} - -/*Reset the mode scheme chooser. - This needs to be called once for each frame, including the first.*/ -static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){ - int si; - memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts)); - /*Scheme 0 starts with 24 bits to store the mode list in.*/ - _chooser->scheme_bits[0]=24; - memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits)); - for(si=0;si<8;si++){ - /*Scheme 7 should always start first, and scheme 0 should always start - last.*/ - _chooser->scheme_list[si]=7-si; - _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si; - } -} - - -/*This is the real purpose of this data structure: not actually selecting a - mode scheme, but estimating the cost of coding a given mode given all the - modes selected so far. - This is done via opportunity cost: the cost is defined as the number of bits - required to encode all the modes selected so far including the current one - using the best possible scheme, minus the number of bits required to encode - all the modes selected so far not including the current one using the best - possible scheme. - The computational expense of doing this probably makes it overkill. - Just be happy we take a greedy approach instead of trying to solve the - global mode-selection problem (which is NP-hard). - _mb_mode: The mode to determine the cost of. - Return: The number of bits required to code this mode.*/ -static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser, - int _mb_mode){ - int scheme0; - int scheme1; - int best_bits; - int mode_bits; - int si; - int scheme_bits; - scheme0=_chooser->scheme_list[0]; - scheme1=_chooser->scheme_list[1]; - best_bits=_chooser->scheme_bits[scheme0]; - mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]]; - /*Typical case: If the difference between the best scheme and the next best - is greater than 6 bits, then adding just one mode cannot change which - scheme we use.*/ - if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits; - /*Otherwise, check to see if adding this mode selects a different scheme as - the best.*/ - si=1; - best_bits+=mode_bits; - do{ - /*For any scheme except 0, we can just use the bit cost of the mode's rank - in that scheme.*/ - if(scheme1!=0){ - scheme_bits=_chooser->scheme_bits[scheme1]+ - OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]]; - } - else{ - int ri; - /*For scheme 0, incrementing the mode count could potentially change the - mode's rank. - Find the index where the mode would be moved to in the optimal list, - and use its bit cost instead of the one for the mode's current - position in the list.*/ - /*We don't recompute scheme bits; this is computing opportunity cost, not - an update.*/ - for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&& - _chooser->mode_counts[_mb_mode]>= - _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--); - scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri]; - } - if(scheme_bits=8)break; - scheme1=_chooser->scheme_list[si]; - } - while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6); - return best_bits-_chooser->scheme_bits[scheme0]; -} - -/*Incrementally update the mode counts and per-scheme bit counts and re-order - the scheme lists once a mode has been selected. - _mb_mode: The mode that was chosen.*/ -static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser, - int _mb_mode){ - int ri; - int si; - _chooser->mode_counts[_mb_mode]++; - /*Re-order the scheme0 mode list if necessary.*/ - for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){ - int pmode; - pmode=_chooser->scheme0_list[ri-1]; - if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break; - /*Reorder the mode ranking.*/ - _chooser->scheme0_ranks[pmode]++; - _chooser->scheme0_list[ri]=pmode; - } - _chooser->scheme0_ranks[_mb_mode]=ri; - _chooser->scheme0_list[ri]=_mb_mode; - /*Now add the bit cost for the mode to each scheme.*/ - for(si=0;si<8;si++){ - _chooser->scheme_bits[si]+= - OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]]; - } - /*Finally, re-order the list of schemes.*/ - for(si=1;si<8;si++){ - int sj; - int scheme0; - int bits0; - sj=si; - scheme0=_chooser->scheme_list[si]; - bits0=_chooser->scheme_bits[scheme0]; - do{ - int scheme1; - scheme1=_chooser->scheme_list[sj-1]; - if(bits0>=_chooser->scheme_bits[scheme1])break; - _chooser->scheme_list[sj]=scheme1; - } - while(--sj>0); - _chooser->scheme_list[sj]=scheme0; - } -} - - - -/*The number of bits required to encode a super block run. - _run_count: The desired run count; must be positive and less than 4130.*/ -static int oc_sb_run_bits(int _run_count){ - int i; - for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++); - return OC_SB_RUN_CODE_NBITS[i]; -} - -/*The number of bits required to encode a block run. - _run_count: The desired run count; must be positive and less than 30.*/ -static int oc_block_run_bits(int _run_count){ - return OC_BLOCK_RUN_CODE_NBITS[_run_count-1]; -} - - - -/*State to track coded block flags and their bit cost.*/ -struct oc_fr_state{ - ptrdiff_t bits; - unsigned sb_partial_count:16; - unsigned sb_full_count:16; - unsigned b_coded_count_prev:8; - unsigned b_coded_count:8; - unsigned b_count:8; - signed int sb_partial:2; - signed int sb_full:2; - signed int b_coded_prev:2; - signed int b_coded:2; -}; - - - -static void oc_fr_state_init(oc_fr_state *_fr){ - _fr->bits=0; - _fr->sb_partial_count=0; - _fr->sb_full_count=0; - _fr->b_coded_count_prev=0; - _fr->b_coded_count=0; - _fr->b_count=0; - _fr->sb_partial=-1; - _fr->sb_full=-1; - _fr->b_coded_prev=-1; - _fr->b_coded=-1; -} - - -static void oc_fr_state_advance_sb(oc_fr_state *_fr, - int _sb_partial,int _sb_full){ - ptrdiff_t bits; - int sb_partial_count; - int sb_full_count; - bits=_fr->bits; - /*Extend the sb_partial run, or start a new one.*/ - sb_partial_count=_fr->sb_partial; - if(_fr->sb_partial==_sb_partial){ - if(sb_partial_count>=4129){ - bits++; - sb_partial_count=0; - } - else bits-=oc_sb_run_bits(sb_partial_count); - } - else sb_partial_count=0; - sb_partial_count++; - bits+=oc_sb_run_bits(sb_partial_count); - if(!_sb_partial){ - /*Extend the sb_full run, or start a new one.*/ - sb_full_count=_fr->sb_full_count; - if(_fr->sb_full==_sb_full){ - if(sb_full_count>=4129){ - bits++; - sb_full_count=0; - } - else bits-=oc_sb_run_bits(sb_full_count); - } - else sb_full_count=0; - sb_full_count++; - bits+=oc_sb_run_bits(sb_full_count); - _fr->sb_full=_sb_full; - _fr->sb_full_count=sb_full_count; - } - _fr->bits=bits; - _fr->sb_partial=_sb_partial; - _fr->sb_partial_count=sb_partial_count; -} - -/*Flush any outstanding block flags for a SB (e.g., one with fewer than 16 - blocks).*/ -static void oc_fr_state_flush_sb(oc_fr_state *_fr){ - ptrdiff_t bits; - int sb_partial; - int sb_full=sb_full; - int b_coded_count; - int b_coded; - int b_count; - b_count=_fr->b_count; - if(b_count>0){ - bits=_fr->bits; - b_coded=_fr->b_coded; - b_coded_count=_fr->b_coded_count; - if(b_coded_count>=b_count){ - /*This SB was fully coded/uncoded; roll back the partial block flags.*/ - bits-=oc_block_run_bits(b_coded_count); - if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count); - sb_partial=0; - sb_full=b_coded; - b_coded=_fr->b_coded_prev; - b_coded_count=_fr->b_coded_count_prev; - } - else{ - /*It was partially coded.*/ - sb_partial=1; - /*sb_full is unused.*/ - } - _fr->bits=bits; - _fr->b_coded_count=b_coded_count; - _fr->b_coded_count_prev=b_coded_count; - _fr->b_count=0; - _fr->b_coded=b_coded; - _fr->b_coded_prev=b_coded; - oc_fr_state_advance_sb(_fr,sb_partial,sb_full); - } -} - -static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){ - ptrdiff_t bits; - int b_coded_count; - int b_count; - int sb_partial; - int sb_full=sb_full; - bits=_fr->bits; - /*Extend the b_coded run, or start a new one.*/ - b_coded_count=_fr->b_coded_count; - if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count); - else b_coded_count=0; - b_coded_count++; - b_count=_fr->b_count+1; - if(b_count>=16){ - /*We finished a superblock.*/ - if(b_coded_count>=16){ - /*It was fully coded/uncoded; roll back the partial block flags.*/ - if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16); - sb_partial=0; - sb_full=_b_coded; - _b_coded=_fr->b_coded_prev; - b_coded_count=_fr->b_coded_count_prev; - } - else{ - bits+=oc_block_run_bits(b_coded_count); - /*It was partially coded.*/ - sb_partial=1; - /*sb_full is unused.*/ - } - _fr->bits=bits; - _fr->b_coded_count=b_coded_count; - _fr->b_coded_count_prev=b_coded_count; - _fr->b_count=0; - _fr->b_coded=_b_coded; - _fr->b_coded_prev=_b_coded; - oc_fr_state_advance_sb(_fr,sb_partial,sb_full); - } - else{ - bits+=oc_block_run_bits(b_coded_count); - _fr->bits=bits; - _fr->b_coded_count=b_coded_count; - _fr->b_count=b_count; - _fr->b_coded=_b_coded; - } -} - -static void oc_fr_skip_block(oc_fr_state *_fr){ - oc_fr_state_advance_block(_fr,0); -} - -static void oc_fr_code_block(oc_fr_state *_fr){ - oc_fr_state_advance_block(_fr,1); -} - -static int oc_fr_cost1(const oc_fr_state *_fr){ - oc_fr_state tmp; - ptrdiff_t bits; - *&tmp=*_fr; - oc_fr_skip_block(&tmp); - bits=tmp.bits; - *&tmp=*_fr; - oc_fr_code_block(&tmp); - return (int)(tmp.bits-bits); -} - -static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){ - oc_fr_state tmp; - *&tmp=*_pre; - oc_fr_skip_block(&tmp); - oc_fr_skip_block(&tmp); - oc_fr_skip_block(&tmp); - oc_fr_skip_block(&tmp); - return (int)(_post->bits-tmp.bits); -} - - - -struct oc_qii_state{ - ptrdiff_t bits; - unsigned qi01_count:14; - signed int qi01:2; - unsigned qi12_count:14; - signed int qi12:2; -}; - - - -static void oc_qii_state_init(oc_qii_state *_qs){ - _qs->bits=0; - _qs->qi01_count=0; - _qs->qi01=-1; - _qs->qi12_count=0; - _qs->qi12=-1; -} - - -static void oc_qii_state_advance(oc_qii_state *_qd, - const oc_qii_state *_qs,int _qii){ - ptrdiff_t bits; - int qi01; - int qi01_count; - int qi12; - int qi12_count; - bits=_qs->bits; - qi01=_qii+1>>1; - qi01_count=_qs->qi01_count; - if(qi01==_qs->qi01){ - if(qi01_count>=4129){ - bits++; - qi01_count=0; - } - else bits-=oc_sb_run_bits(qi01_count); - } - else qi01_count=0; - qi01_count++; - bits+=oc_sb_run_bits(qi01_count); - qi12_count=_qs->qi12_count; - if(_qii){ - qi12=_qii>>1; - if(qi12==_qs->qi12){ - if(qi12_count>=4129){ - bits++; - qi12_count=0; - } - else bits-=oc_sb_run_bits(qi12_count); - } - else qi12_count=0; - qi12_count++; - bits+=oc_sb_run_bits(qi12_count); - } - else qi12=_qs->qi12; - _qd->bits=bits; - _qd->qi01=qi01; - _qd->qi01_count=qi01_count; - _qd->qi12=qi12; - _qd->qi12_count=qi12_count; -} - - - -/*Temporary encoder state for the analysis pipeline.*/ -struct oc_enc_pipeline_state{ - int bounding_values[256]; - oc_fr_state fr[3]; - oc_qii_state qs[3]; - /*Condensed dequantization tables.*/ - const ogg_uint16_t *dequant[3][3][2]; - /*Condensed quantization tables.*/ - const oc_iquant *enquant[3][3][2]; - /*Skip SSD storage for the current MCU in each plane.*/ - unsigned *skip_ssd[3]; - /*Coded/uncoded fragment lists for each plane for the current MCU.*/ - ptrdiff_t *coded_fragis[3]; - ptrdiff_t *uncoded_fragis[3]; - ptrdiff_t ncoded_fragis[3]; - ptrdiff_t nuncoded_fragis[3]; - /*The starting fragment for the current MCU in each plane.*/ - ptrdiff_t froffset[3]; - /*The starting row for the current MCU in each plane.*/ - int fragy0[3]; - /*The ending row for the current MCU in each plane.*/ - int fragy_end[3]; - /*The starting superblock for the current MCU in each plane.*/ - unsigned sbi0[3]; - /*The ending superblock for the current MCU in each plane.*/ - unsigned sbi_end[3]; - /*The number of tokens for zzi=1 for each color plane.*/ - int ndct_tokens1[3]; - /*The outstanding eob_run count for zzi=1 for each color plane.*/ - int eob_run1[3]; - /*Whether or not the loop filter is enabled.*/ - int loop_filter; -}; - - -static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){ - ptrdiff_t *coded_fragis; - unsigned mcu_nvsbs; - ptrdiff_t mcu_nfrags; - int hdec; - int vdec; - int pli; - int qii; - int qti; - /*Initialize the per-plane coded block flag trackers. - These are used for bit-estimation purposes only; the real flag bits span - all three planes, so we can't compute them in parallel.*/ - for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli); - for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli); - /*Set up the per-plane skip SSD storage pointers.*/ - mcu_nvsbs=_enc->mcu_nvsbs; - mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16; - hdec=!(_enc->state.info.pixel_fmt&1); - vdec=!(_enc->state.info.pixel_fmt&2); - _pipe->skip_ssd[0]=_enc->mcu_skip_ssd; - _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags; - _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec); - /*Set up per-plane pointers to the coded and uncoded fragments lists. - Unlike the decoder, each planes' coded and uncoded fragment list is kept - separate during the analysis stage; we only make the coded list for all - three planes contiguous right before the final packet is output - (destroying the uncoded lists, which are no longer needed).*/ - coded_fragis=_enc->state.coded_fragis; - for(pli=0;pli<3;pli++){ - _pipe->coded_fragis[pli]=coded_fragis; - coded_fragis+=_enc->state.fplanes[pli].nfrags; - _pipe->uncoded_fragis[pli]=coded_fragis; - } - memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis)); - memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis)); - /*Set up condensed quantizer tables.*/ - for(pli=0;pli<3;pli++){ - for(qii=0;qii<_enc->state.nqis;qii++){ - int qi; - qi=_enc->state.qis[qii]; - for(qti=0;qti<2;qti++){ - _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti]; - _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti]; - } - } - } - /*Initialize the tokenization state.*/ - for(pli=0;pli<3;pli++){ - _pipe->ndct_tokens1[pli]=0; - _pipe->eob_run1[pli]=0; - } - /*Initialize the bounding value array for the loop filter.*/ - _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state, - _pipe->bounding_values); -} - -/*Sets the current MCU stripe to super block row _sby. - Return: A non-zero value if this was the last MCU.*/ -static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc, - oc_enc_pipeline_state *_pipe,int _sby){ - const oc_fragment_plane *fplane; - unsigned mcu_nvsbs; - int sby_end; - int notdone; - int vdec; - int pli; - mcu_nvsbs=_enc->mcu_nvsbs; - sby_end=_enc->state.fplanes[0].nvsbs; - notdone=_sby+mcu_nvsbsstate.fplanes+pli; - _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs; - _pipe->fragy0[pli]=_sby<<2-vdec; - _pipe->froffset[pli]=fplane->froffset - +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags; - if(notdone){ - _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs; - _pipe->fragy_end[pli]=sby_end<<2-vdec; - } - else{ - _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs; - _pipe->fragy_end[pli]=fplane->nvfrags; - } - vdec=!(_enc->state.info.pixel_fmt&2); - } - return notdone; -} - -static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc, - oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){ - int refi; - /*Copy over all the uncoded fragments from this plane and advance the uncoded - fragment list.*/ - _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; - oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli], - _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); - _pipe->nuncoded_fragis[_pli]=0; - /*Perform DC prediction.*/ - oc_enc_pred_dc_frag_rows(_enc,_pli, - _pipe->fragy0[_pli],_pipe->fragy_end[_pli]); - /*Finish DC tokenization.*/ - oc_enc_tokenize_dc_frag_list(_enc,_pli, - _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli], - _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]); - _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1]; - _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1]; - /*And advance the coded fragment list.*/ - _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli]; - _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli]; - _pipe->ncoded_fragis[_pli]=0; - /*Apply the loop filter if necessary.*/ - refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; - if(_pipe->loop_filter){ - oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values, - refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay); - } - else _sdelay=_edelay=0; - /*To fill borders, we have an additional two pixel delay, since a fragment - in the next row could filter its top edge, using two pixels from a - fragment in this row. - But there's no reason to delay a full fragment between the two.*/ - oc_state_borders_fill_rows(&_enc->state,refi,_pli, - (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1), - (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1)); -} - - - -/*Cost information about the coded blocks in a MB.*/ -struct oc_rd_metric{ - int uncoded_ac_ssd; - int coded_ac_ssd; - int ac_bits; - int dc_flag; -}; - - - -static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc, - oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits, - oc_rd_metric *_mo,oc_token_checkpoint **_stack){ - OC_ALIGN16(ogg_int16_t dct[64]); - OC_ALIGN16(ogg_int16_t data[64]); - ogg_uint16_t dc_dequant; - const ogg_uint16_t *dequant; - const oc_iquant *enquant; - ptrdiff_t frag_offs; - int ystride; - const unsigned char *src; - const unsigned char *ref; - unsigned char *dst; - int frame_type; - int nonzero; - unsigned uncoded_ssd; - unsigned coded_ssd; - int coded_dc; - oc_token_checkpoint *checkpoint; - oc_fragment *frags; - int mb_mode; - int mv_offs[2]; - int nmv_offs; - int ac_bits; - int borderi; - int qti; - int qii; - int pi; - int zzi; - int v; - int val; - int d; - int s; - int dc; - frags=_enc->state.frags; - frag_offs=_enc->state.frag_buf_offs[_fragi]; - ystride=_enc->state.ref_ystride[_pli]; - src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs; - borderi=frags[_fragi].borderi; - qii=frags[_fragi].qii; - if(qii&~3){ -#if !defined(OC_COLLECT_METRICS) - if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){ - /*Enable early skip detection.*/ - frags[_fragi].coded=0; - return 0; - } -#endif - /*Try and code this block anyway.*/ - qii&=3; - frags[_fragi].qii=qii; - } - mb_mode=frags[_fragi].mb_mode; - ref=_enc->state.ref_frame_data[ - _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs; - dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]] - +frag_offs; - /*Motion compensation:*/ - switch(mb_mode){ - case OC_MODE_INTRA:{ - nmv_offs=0; - oc_enc_frag_sub_128(_enc,data,src,ystride); - }break; - case OC_MODE_GOLDEN_NOMV: - case OC_MODE_INTER_NOMV:{ - nmv_offs=1; - mv_offs[0]=0; - oc_enc_frag_sub(_enc,data,src,ref,ystride); - }break; - default:{ - const oc_mv *frag_mvs; - frag_mvs=(const oc_mv *)_enc->state.frag_mvs; - nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli, - frag_mvs[_fragi][0],frag_mvs[_fragi][1]); - if(nmv_offs>1){ - oc_enc_frag_copy2(_enc,dst, - ref+mv_offs[0],ref+mv_offs[1],ystride); - oc_enc_frag_sub(_enc,data,src,dst,ystride); - } - else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride); - }break; - } -#if defined(OC_COLLECT_METRICS) - { - unsigned satd; - switch(nmv_offs){ - case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break; - case 1:{ - satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX); - }break; - default:{ - satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX); - } - } - _enc->frag_satd[_fragi]=satd; - } -#endif - /*Transform:*/ - oc_enc_fdct8x8(_enc,dct,data); - /*Quantize the DC coefficient:*/ - qti=mb_mode!=OC_MODE_INTRA; - enquant=_pipe->enquant[_pli][0][qti]; - dc_dequant=_pipe->dequant[_pli][0][qti][0]; - v=dct[0]; - val=v<<1; - s=OC_SIGNMASK(val); - val+=dc_dequant+s^s; - val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s; - dc=OC_CLAMPI(-580,val,580); - nonzero=0; - /*Quantize the AC coefficients:*/ - dequant=_pipe->dequant[_pli][qii][qti]; - enquant=_pipe->enquant[_pli][qii][qti]; - for(zzi=1;zzi<64;zzi++){ - v=dct[OC_FZIG_ZAG[zzi]]; - d=dequant[zzi]; - val=v<<1; - v=abs(val); - if(v>=d){ - s=OC_SIGNMASK(val); - /*The bias added here rounds ties away from zero, since token - optimization can only decrease the magnitude of the quantized - value.*/ - val+=d+s^s; - /*Note the arithmetic right shift is not guaranteed by ANSI C. - Hopefully no one still uses ones-complement architectures.*/ - val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s; - data[zzi]=OC_CLAMPI(-580,val,580); - nonzero=zzi; - } - else data[zzi]=0; - } - /*Tokenize.*/ - checkpoint=*_stack; - ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1, - _stack,qti?0:3); - /*Reconstruct. - TODO: nonzero may need to be adjusted after tokenization.*/ - if(nonzero==0){ - ogg_int16_t p; - int ci; - /*We round this dequant product (and not any of the others) because there's - no iDCT rounding.*/ - p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5); - /*LOOP VECTORIZES.*/ - for(ci=0;ci<64;ci++)data[ci]=p; - } - else{ - data[0]=dc*dc_dequant; - oc_idct8x8(&_enc->state,data,nonzero+1); - } - if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data); - else{ - oc_enc_frag_recon_inter(_enc,dst, - nmv_offs==1?ref+mv_offs[0]:dst,ystride,data); - } - frame_type=_enc->state.frame_type; -#if !defined(OC_COLLECT_METRICS) - if(frame_type!=OC_INTRA_FRAME) -#endif - { - /*In retrospect, should we have skipped this block?*/ - oc_enc_frag_sub(_enc,data,src,dst,ystride); - coded_ssd=coded_dc=0; - if(borderi<0){ - for(pi=0;pi<64;pi++){ - coded_ssd+=data[pi]*data[pi]; - coded_dc+=data[pi]; - } - } - else{ - ogg_int64_t mask; - mask=_enc->state.borders[borderi].mask; - for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ - coded_ssd+=data[pi]*data[pi]; - coded_dc+=data[pi]; - } - } - /*Scale to match DCT domain.*/ - coded_ssd<<=4; - /*We actually only want the AC contribution to the SSD.*/ - coded_ssd-=coded_dc*coded_dc>>2; -#if defined(OC_COLLECT_METRICS) - _enc->frag_ssd[_fragi]=coded_ssd; - } - if(frame_type!=OC_INTRA_FRAME){ -#endif - uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]]; - if(uncoded_ssdlambda&& - /*Don't allow luma blocks to be skipped in 4MV mode when VP3 - compatibility is enabled.*/ - (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){ - /*Hm, not worth it; roll back.*/ - oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint); - *_stack=checkpoint; - frags[_fragi].coded=0; - return 0; - } - } - else _mo->dc_flag=1; - _mo->uncoded_ac_ssd+=uncoded_ssd; - _mo->coded_ac_ssd+=coded_ssd; - _mo->ac_bits+=ac_bits; - } - oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii); - frags[_fragi].dc=dc; - frags[_fragi].coded=1; - return 1; -} - -static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc, - oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){ - /*Worst case token stack usage for 4 fragments.*/ - oc_token_checkpoint stack[64*4]; - oc_token_checkpoint *stackptr; - const oc_sb_map *sb_maps; - signed char *mb_modes; - oc_fragment *frags; - ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t *uncoded_fragis; - ptrdiff_t nuncoded_fragis; - oc_rd_metric mo; - oc_fr_state fr_checkpoint; - oc_qii_state qs_checkpoint; - int mb_mode; - int ncoded; - ptrdiff_t fragi; - int bi; - *&fr_checkpoint=*(_pipe->fr+0); - *&qs_checkpoint=*(_pipe->qs+0); - sb_maps=(const oc_sb_map *)_enc->state.sb_maps; - mb_modes=_enc->state.mb_modes; - frags=_enc->state.frags; - coded_fragis=_pipe->coded_fragis[0]; - ncoded_fragis=_pipe->ncoded_fragis[0]; - uncoded_fragis=_pipe->uncoded_fragis[0]; - nuncoded_fragis=_pipe->nuncoded_fragis[0]; - mb_mode=mb_modes[_mbi]; - ncoded=0; - stackptr=stack; - memset(&mo,0,sizeof(mo)); - for(bi=0;bi<4;bi++){ - fragi=sb_maps[_mbi>>2][_mbi&3][bi]; - frags[fragi].mb_mode=mb_mode; - if(oc_enc_block_transform_quantize(_enc, - _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){ - oc_fr_code_block(_pipe->fr+0); - coded_fragis[ncoded_fragis++]=fragi; - ncoded++; - } - else{ - *(uncoded_fragis-++nuncoded_fragis)=fragi; - oc_fr_skip_block(_pipe->fr+0); - } - } - if(_enc->state.frame_type!=OC_INTRA_FRAME){ - if(ncoded>0&&!mo.dc_flag){ - int cost; - /*Some individual blocks were worth coding. - See if that's still true when accounting for mode and MV overhead.*/ - cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits - +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead); - if(mo.uncoded_ac_ssd<=cost){ - /*Taking macroblock overhead into account, it is not worth coding this - MB.*/ - oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack); - *(_pipe->fr+0)=*&fr_checkpoint; - *(_pipe->qs+0)=*&qs_checkpoint; - for(bi=0;bi<4;bi++){ - fragi=sb_maps[_mbi>>2][_mbi&3][bi]; - if(frags[fragi].coded){ - *(uncoded_fragis-++nuncoded_fragis)=fragi; - frags[fragi].coded=0; - } - oc_fr_skip_block(_pipe->fr+0); - } - ncoded_fragis-=ncoded; - ncoded=0; - } - } - /*If no luma blocks coded, the mode is forced.*/ - if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV; - /*Assume that a 1MV with a single coded block is always cheaper than a 4MV - with a single coded block. - This may not be strictly true: a 4MV computes chroma MVs using (0,0) for - skipped blocks, while a 1MV does not.*/ - else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){ - mb_modes[_mbi]=OC_MODE_INTER_MV; - } - } - _pipe->ncoded_fragis[0]=ncoded_fragis; - _pipe->nuncoded_fragis[0]=nuncoded_fragis; - return ncoded; -} - -static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc, - oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){ - const oc_sb_map *sb_maps; - oc_sb_flags *sb_flags; - ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t *uncoded_fragis; - ptrdiff_t nuncoded_fragis; - int sbi; - sb_maps=(const oc_sb_map *)_enc->state.sb_maps; - sb_flags=_enc->state.sb_flags; - coded_fragis=_pipe->coded_fragis[_pli]; - ncoded_fragis=_pipe->ncoded_fragis[_pli]; - uncoded_fragis=_pipe->uncoded_fragis[_pli]; - nuncoded_fragis=_pipe->nuncoded_fragis[_pli]; - for(sbi=_sbi_start;sbi<_sbi_end;sbi++){ - /*Worst case token stack usage for 1 fragment.*/ - oc_token_checkpoint stack[64]; - oc_rd_metric mo; - int quadi; - int bi; - memset(&mo,0,sizeof(mo)); - for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){ - ptrdiff_t fragi; - fragi=sb_maps[sbi][quadi][bi]; - if(fragi>=0){ - oc_token_checkpoint *stackptr; - stackptr=stack; - if(oc_enc_block_transform_quantize(_enc, - _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){ - coded_fragis[ncoded_fragis++]=fragi; - oc_fr_code_block(_pipe->fr+_pli); - } - else{ - *(uncoded_fragis-++nuncoded_fragis)=fragi; - oc_fr_skip_block(_pipe->fr+_pli); - } - } - } - oc_fr_state_flush_sb(_pipe->fr+_pli); - sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full; - sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial; - } - _pipe->ncoded_fragis[_pli]=ncoded_fragis; - _pipe->nuncoded_fragis[_pli]=nuncoded_fragis; -} - -/*Mode decision is done by exhaustively examining all potential choices. - Obviously, doing the motion compensation, fDCT, tokenization, and then - counting the bits each token uses is computationally expensive. - Theora's EOB runs can also split the cost of these tokens across multiple - fragments, and naturally we don't know what the optimal choice of Huffman - codes will be until we know all the tokens we're going to encode in all the - fragments. - So we use a simple approach to estimating the bit cost and distortion of each - mode based upon the SATD value of the residual before coding. - The mathematics behind the technique are outlined by Kim \cite{Kim03}, but - the process (modified somewhat from that of the paper) is very simple. - We build a non-linear regression of the mappings from - (pre-transform+quantization) SATD to (post-transform+quantization) bits and - SSD for each qi. - A separate set of mappings is kept for each quantization type and color - plane. - The mappings are constructed by partitioning the SATD values into a small - number of bins (currently 24) and using a linear regression in each bin - (as opposed to the 0th-order regression used by Kim). - The bit counts and SSD measurements are obtained by examining actual encoded - frames, with appropriate lambda values and optimal Huffman codes selected. - EOB bits are assigned to the fragment that started the EOB run (as opposed to - dividing them among all the blocks in the run; though the latter approach - seems more theoretically correct, Monty's testing showed a small improvement - with the former, though that may have been merely statistical noise). - - @ARTICLE{Kim03, - author="Hyun Mun Kim", - title="Adaptive Rate Control Using Nonlinear Regression", - journal="IEEE Transactions on Circuits and Systems for Video Technology", - volume=13, - number=5, - pages="432--439", - month=May, - year=2003 - }*/ - -/*Computes (_ssd+_lambda*_rate)/(1<>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \ - +(((_ssd)&(1<>1)>>OC_BIT_SCALE) - -/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after - prediction.*/ -static unsigned oc_dct_cost2(unsigned *_ssd, - int _qi,int _pli,int _qti,int _satd){ - unsigned rmse; - int bin; - int dx; - int y0; - int z0; - int dy; - int dz; - /*SATD metrics for chroma planes vary much less than luma, so we scale them - by 4 to distribute them into the mode decision bins more evenly.*/ - _satd<<=_pli+1&2; - bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2); - dx=_satd-(bin<>OC_SAD_SHIFT),0); - *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE; - return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0); -} - -/*Select luma block-level quantizers for a MB in an INTRA frame.*/ -static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc, - const oc_qii_state *_qs,unsigned _mbi){ - const unsigned char *src; - const ptrdiff_t *frag_buf_offs; - const oc_sb_map *sb_maps; - oc_fragment *frags; - ptrdiff_t frag_offs; - ptrdiff_t fragi; - oc_qii_state qs[4][3]; - unsigned cost[4][3]; - unsigned ssd[4][3]; - unsigned rate[4][3]; - int prev[3][3]; - unsigned satd; - unsigned best_cost; - unsigned best_ssd; - unsigned best_rate; - int best_qii; - int qii; - int lambda; - int ystride; - int nqis; - int bi; - frag_buf_offs=_enc->state.frag_buf_offs; - sb_maps=(const oc_sb_map *)_enc->state.sb_maps; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ystride=_enc->state.ref_ystride[0]; - fragi=sb_maps[_mbi>>2][_mbi&3][0]; - frag_offs=frag_buf_offs[fragi]; - satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); - nqis=_enc->state.nqis; - lambda=_enc->lambda; - for(qii=0;qiistate.qis[qii],0,0,satd) - +(qs[0][qii].bits-_qs->bits<>2][_mbi&3][bi]; - frag_offs=frag_buf_offs[fragi]; - satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); - for(qii=0;qiistate.qis[qii],0,0,satd); - best_ssd=ssd[bi-1][0]+cur_ssd; - best_rate=rate[bi-1][0]+cur_rate - +(qt[0].bits-qs[bi-1][0].bits<state.frags; - for(bi=3;;){ - fragi=sb_maps[_mbi>>2][_mbi&3][bi]; - frags[fragi].qii=best_qii; - if(bi--<=0)break; - best_qii=prev[bi][best_qii]; - } - return best_cost; -} - -/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/ -static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc, - const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){ - const unsigned char *src; - oc_fragment *frags; - ptrdiff_t frag_offs; - oc_qii_state qt[3]; - unsigned cost[3]; - unsigned satd; - unsigned best_cost; - int best_qii; - int qii; - int lambda; - int ystride; - int nqis; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ystride=_enc->state.ref_ystride[_pli]; - frag_offs=_enc->state.frag_buf_offs[_fragi]; - satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); - nqis=_enc->state.nqis; - lambda=_enc->lambda; - best_qii=0; - for(qii=0;qiistate.qis[qii],_pli,0,satd) - +(qt[qii].bits-_qs->bits<state.frags; - frags[_fragi].qii=best_qii; - return best_cost; -} - -static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc, - oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){ - const oc_sb_map *sb_maps; - oc_sb_flags *sb_flags; - ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - int sbi; - sb_maps=(const oc_sb_map *)_enc->state.sb_maps; - sb_flags=_enc->state.sb_flags; - coded_fragis=_pipe->coded_fragis[_pli]; - ncoded_fragis=_pipe->ncoded_fragis[_pli]; - for(sbi=_sbi_start;sbi<_sbi_end;sbi++){ - /*Worst case token stack usage for 1 fragment.*/ - oc_token_checkpoint stack[64]; - int quadi; - int bi; - for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){ - ptrdiff_t fragi; - fragi=sb_maps[sbi][quadi][bi]; - if(fragi>=0){ - oc_token_checkpoint *stackptr; - oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi); - stackptr=stack; - oc_enc_block_transform_quantize(_enc, - _pipe,_pli,fragi,0,NULL,&stackptr); - coded_fragis[ncoded_fragis++]=fragi; - } - } - } - _pipe->ncoded_fragis[_pli]=ncoded_fragis; -} - -/*Analysis stage for an INTRA frame.*/ -void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){ - oc_enc_pipeline_state pipe; - const unsigned char *map_idxs; - int nmap_idxs; - oc_sb_flags *sb_flags; - signed char *mb_modes; - const oc_mb_map *mb_maps; - oc_mb_enc_info *embs; - oc_fragment *frags; - unsigned stripe_sby; - unsigned mcu_nvsbs; - int notstart; - int notdone; - int refi; - int pli; - _enc->state.frame_type=OC_INTRA_FRAME; - oc_enc_tokenize_start(_enc); - oc_enc_pipeline_init(_enc,&pipe); - /*Choose MVs and MB modes and quantize and code luma. - Must be done in Hilbert order.*/ - map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; - nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - _enc->state.ncoded_fragis[0]=0; - _enc->state.ncoded_fragis[1]=0; - _enc->state.ncoded_fragis[2]=0; - sb_flags=_enc->state.sb_flags; - mb_modes=_enc->state.mb_modes; - mb_maps=(const oc_mb_map *)_enc->state.mb_maps; - embs=_enc->mb_info; - frags=_enc->state.frags; - notstart=0; - notdone=1; - mcu_nvsbs=_enc->mcu_nvsbs; - for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){ - unsigned sbi; - unsigned sbi_end; - notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby); - sbi_end=pipe.sbi_end[0]; - for(sbi=pipe.sbi0[0];sbistate.curframe_num>0)oc_mcenc_search(_enc,mbi); - oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi); - mb_modes[mbi]=OC_MODE_INTRA; - oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0); - /*Propagate final MB mode and MVs to the chroma blocks.*/ - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_maps[mbi][pli][bi]; - frags[fragi].mb_mode=OC_MODE_INTRA; - } - } - } - oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone); - /*Code chroma planes.*/ - for(pli=1;pli<3;pli++){ - oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe, - pli,pipe.sbi0[pli],pipe.sbi_end[pli]); - oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone); - } - notstart=1; - } - /*Finish filling in the reference frame borders.*/ - refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; - for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli); - _enc->state.ntotal_coded_fragis=_enc->state.nfrags; -} - - - -/*Cost information about a MB mode.*/ -struct oc_mode_choice{ - unsigned cost; - unsigned ssd; - unsigned rate; - unsigned overhead; - unsigned char qii[12]; -}; - - - -static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){ - _modec->cost=OC_MODE_RD_COST(_modec->ssd, - _modec->rate+_modec->overhead,_lambda); -} - -/*A set of skip SSD's to use to disable early skipping.*/ -static const unsigned OC_NOSKIP[12]={ - UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX, - UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX, - UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX -}; - -/*The estimated number of bits used by a coded chroma block to specify the AC - quantizer. - TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression); - measurements suggest this is in the right ballpark, but it varies somewhat - with lambda.*/ -#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1) - -static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc, - oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs, - const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){ - oc_fr_state fr; - oc_qii_state qs; - unsigned ssd; - unsigned rate; - int overhead; - unsigned satd; - unsigned best_ssd; - unsigned best_rate; - int best_overhead; - int best_fri; - int best_qii; - unsigned cur_cost; - unsigned cur_ssd; - unsigned cur_rate; - int cur_overhead; - int lambda; - int nqis; - int nskipped; - int bi; - int qii; - lambda=_enc->lambda; - nqis=_enc->state.nqis; - /*We could do a trellis optimization here, but we don't make final skip - decisions until after transform+quantization, so the result wouldn't be - optimal anyway. - Instead we just use a greedy approach; for most SATD values, the - differences between the qiis are large enough to drown out the cost to - code the flags, anyway.*/ - *&fr=*_fr; - *&qs=*_qs; - ssd=rate=overhead=nskipped=0; - for(bi=0;bi<4;bi++){ - oc_fr_state ft[2]; - oc_qii_state qt[3]; - unsigned best_cost; - satd=_frag_satd[bi]; - *(ft+0)=*&fr; - oc_fr_code_block(ft+0); - oc_qii_state_advance(qt+0,&qs,0); - best_overhead=(ft[0].bits-fr.bits<state.qis[0],0,_qti,satd) - +(qt[0].bits-qs.bits<state.qis[qii],0,_qti,satd) - +(qt[qii].bits-qs.bits<qii[bi]=best_qii; - } - _modec->ssd=ssd; - _modec->rate=rate; - _modec->overhead=OC_MAXI(overhead,0); -} - -static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc, - oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs, - const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){ - unsigned ssd; - unsigned rate; - unsigned satd; - unsigned best_ssd; - unsigned best_rate; - int best_qii; - unsigned cur_cost; - unsigned cur_ssd; - unsigned cur_rate; - int lambda; - int nblocks; - int nqis; - int pli; - int bi; - int qii; - lambda=_enc->lambda; - nqis=_enc->state.nqis; - ssd=_modec->ssd; - rate=_modec->rate; - /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded - order, we assume a constant overhead for coded block and qii flags.*/ - nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - nblocks=(nblocks-4>>1)+4; - bi=4; - for(pli=1;pli<3;pli++){ - for(;bistate.qis[0],pli,_qti,satd) - +OC_CHROMA_QII_RATE; - best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda); - best_qii=0; - for(qii=1;qiistate.qis[qii],0,_qti,satd) - +OC_CHROMA_QII_RATE; - cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda); - if(cur_costqii[bi]=best_qii; - } - nblocks=(nblocks-4<<1)+4; - } - _modec->ssd=ssd; - _modec->rate=rate; -} - -static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe, - unsigned _mbi,unsigned _ssd[12]){ - OC_ALIGN16(ogg_int16_t buffer[64]); - const unsigned char *src; - const unsigned char *ref; - int ystride; - const oc_fragment *frags; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *sb_map; - const oc_mb_map_plane *mb_map; - const unsigned char *map_idxs; - int map_nidxs; - ogg_int64_t mask; - unsigned uncoded_ssd; - int uncoded_dc; - unsigned dc_dequant; - int dc_flag; - int mapii; - int mapi; - int pli; - int bi; - ptrdiff_t fragi; - ptrdiff_t frag_offs; - int borderi; - int pi; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; - ystride=_enc->state.ref_ystride[0]; - frags=_enc->state.frags; - frag_buf_offs=_enc->state.frag_buf_offs; - sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; - dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0]; - for(bi=0;bi<4;bi++){ - fragi=sb_map[bi]; - frag_offs=frag_buf_offs[fragi]; - oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride); - borderi=frags[fragi].borderi; - uncoded_ssd=uncoded_dc=0; - if(borderi<0){ - for(pi=0;pi<64;pi++){ - uncoded_ssd+=buffer[pi]*buffer[pi]; - uncoded_dc+=buffer[pi]; - } - } - else{ - ogg_int64_t mask; - mask=_enc->state.borders[borderi].mask; - for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ - uncoded_ssd+=buffer[pi]*buffer[pi]; - uncoded_dc+=buffer[pi]; - } - } - /*Scale to match DCT domain.*/ - uncoded_ssd<<=4; - /*We actually only want the AC contribution to the SSD.*/ - uncoded_ssd-=uncoded_dc*uncoded_dc>>2; - /*DC is a special case; if there's more than a full-quantizer improvement - in the effective DC component, always force-code the block.*/ - dc_flag=abs(uncoded_dc)>dc_dequant<<1; - uncoded_ssd|=-dc_flag; - _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd; - } - mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; - map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; - map_nidxs=(map_nidxs-4>>1)+4; - mapii=4; - for(pli=1;pli<3;pli++){ - ystride=_enc->state.ref_ystride[pli]; - dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0]; - for(;mapiistate.borders[borderi].mask; - for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ - uncoded_ssd+=buffer[pi]*buffer[pi]; - uncoded_dc+=buffer[pi]; - } - } - /*Scale to match DCT domain.*/ - uncoded_ssd<<=4; - /*We actually only want the AC contribution to the SSD.*/ - uncoded_ssd-=uncoded_dc*uncoded_dc>>2; - /*DC is a special case; if there's more than a full-quantizer improvement - in the effective DC component, always force-code the block.*/ - dc_flag=abs(uncoded_dc)>dc_dequant<<1; - uncoded_ssd|=-dc_flag; - _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd; - } - map_nidxs=(map_nidxs-4<<1)+4; - } -} - -static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi, - unsigned _frag_satd[12]){ - const unsigned char *src; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *sb_map; - const oc_mb_map_plane *mb_map; - const unsigned char *map_idxs; - int map_nidxs; - int mapii; - int mapi; - int ystride; - int pli; - int bi; - ptrdiff_t fragi; - ptrdiff_t frag_offs; - frag_buf_offs=_enc->state.frag_buf_offs; - sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ystride=_enc->state.ref_ystride[0]; - for(bi=0;bi<4;bi++){ - fragi=sb_map[bi]; - frag_offs=frag_buf_offs[fragi]; - _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); - } - mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; - map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; - map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ - ystride=_enc->state.ref_ystride[1]; - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_map[pli][bi]; - frag_offs=frag_buf_offs[fragi]; - _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); - } -} - -static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec, - unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs, - const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){ - oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0); - oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0); - _modec->overhead+= - oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<lambda); -} - -static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec, - unsigned _mbi,int _mb_mode,const signed char *_mv, - const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){ - unsigned frag_satd[12]; - const unsigned char *src; - const unsigned char *ref; - int ystride; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *sb_map; - const oc_mb_map_plane *mb_map; - const unsigned char *map_idxs; - int map_nidxs; - int mapii; - int mapi; - int mv_offs[2]; - int dx; - int dy; - int pli; - int bi; - ptrdiff_t fragi; - ptrdiff_t frag_offs; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[ - _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]]; - ystride=_enc->state.ref_ystride[0]; - frag_buf_offs=_enc->state.frag_buf_offs; - sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; - dx=_mv[0]; - dy=_mv[1]; - _modec->rate=_modec->ssd=0; - if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){ - for(bi=0;bi<4;bi++){ - fragi=sb_map[bi]; - frag_offs=frag_buf_offs[fragi]; - frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); - } - } - else{ - for(bi=0;bi<4;bi++){ - fragi=sb_map[bi]; - frag_offs=frag_buf_offs[fragi]; - frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ystride,UINT_MAX); - } - } - mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; - map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; - map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ - ystride=_enc->state.ref_ystride[1]; - if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){ - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_map[pli][bi]; - frag_offs=frag_buf_offs[fragi]; - frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); - } - } - else{ - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_map[pli][bi]; - frag_offs=frag_buf_offs[fragi]; - frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ystride,UINT_MAX); - } - } - oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); - oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); - _modec->overhead+= - oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<lambda); -} - -static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec, - unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs, - const unsigned _skip_ssd[12]){ - static const oc_mv OC_MV_ZERO; - oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd); -} - -static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec, - unsigned _mbi,int _mb_mode,const signed char *_mv, - const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){ - int bits0; - oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd); - bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31]; - _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12) - -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda); - return bits0; -} - -/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/ -static const unsigned char OC_MB_PHASE[4][4]={ - {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0} -}; - -static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec, - unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs, - const unsigned _skip_ssd[12]){ - unsigned frag_satd[12]; - oc_mv lbmvs[4]; - oc_mv cbmvs[4]; - const unsigned char *src; - const unsigned char *ref; - int ystride; - const ptrdiff_t *frag_buf_offs; - oc_mv *frag_mvs; - const oc_mb_map_plane *mb_map; - const unsigned char *map_idxs; - int map_nidxs; - int nqis; - int mapii; - int mapi; - int mv_offs[2]; - int dx; - int dy; - int pli; - int bi; - ptrdiff_t fragi; - ptrdiff_t frag_offs; - int bits0; - int bits1; - unsigned satd; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; - ystride=_enc->state.ref_ystride[0]; - frag_buf_offs=_enc->state.frag_buf_offs; - frag_mvs=_enc->state.frag_mvs; - mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; - _modec->rate=_modec->ssd=0; - for(bi=0;bi<4;bi++){ - fragi=mb_map[0][bi]; - dx=_mv[bi][0]; - dy=_mv[bi][1]; - /*Save the block MVs as the current ones while we're here; we'll replace - them if we don't ultimately choose 4MV mode.*/ - frag_mvs[fragi][0]=(signed char)dx; - frag_mvs[fragi][1]=(signed char)dy; - frag_offs=frag_buf_offs[fragi]; - if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){ - satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); - } - else{ - satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ystride,UINT_MAX); - } - frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd; - } - oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd, - _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1); - /*Figure out which blocks are being skipped and give them (0,0) MVs.*/ - bits0=0; - bits1=0; - nqis=_enc->state.nqis; - for(bi=0;bi<4;bi++){ - if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){ - memset(lbmvs+bi,0,sizeof(*lbmvs)); - } - else{ - memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs)); - bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31]; - bits1+=12; - } - } - (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs, - (const oc_mv *)lbmvs); - map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; - map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ - ystride=_enc->state.ref_ystride[1]; - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_map[pli][bi]; - dx=cbmvs[bi][0]; - dy=cbmvs[bi][1]; - frag_offs=frag_buf_offs[fragi]; - /*TODO: We could save half these calls by re-using the results for the Cb - and Cr planes; is it worth it?*/ - if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){ - satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); - } - else{ - satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs, - ref+frag_offs+mv_offs[0],ystride,UINT_MAX); - } - frag_satd[mapii]=satd; - } - oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); - _modec->overhead+= - oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR) - +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1) - -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda); -} - -int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){ - oc_set_chroma_mvs_func set_chroma_mvs; - oc_enc_pipeline_state pipe; - oc_qii_state intra_luma_qs; - oc_mv last_mv; - oc_mv prior_mv; - ogg_int64_t interbits; - ogg_int64_t intrabits; - const unsigned char *map_idxs; - int nmap_idxs; - unsigned *coded_mbis; - unsigned *uncoded_mbis; - size_t ncoded_mbis; - size_t nuncoded_mbis; - oc_sb_flags *sb_flags; - signed char *mb_modes; - const oc_sb_map *sb_maps; - const oc_mb_map *mb_maps; - oc_mb_enc_info *embs; - oc_fragment *frags; - oc_mv *frag_mvs; - int qi; - unsigned stripe_sby; - unsigned mcu_nvsbs; - int notstart; - int notdone; - int vdec; - unsigned sbi; - unsigned sbi_end; - int refi; - int pli; - set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt]; - _enc->state.frame_type=OC_INTER_FRAME; - oc_mode_scheme_chooser_reset(&_enc->chooser); - oc_enc_tokenize_start(_enc); - oc_enc_pipeline_init(_enc,&pipe); - if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs); - _enc->mv_bits[0]=_enc->mv_bits[1]=0; - interbits=intrabits=0; - last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0; - /*Choose MVs and MB modes and quantize and code luma. - Must be done in Hilbert order.*/ - map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; - nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; - qi=_enc->state.qis[0]; - coded_mbis=_enc->coded_mbis; - uncoded_mbis=coded_mbis+_enc->state.nmbs; - ncoded_mbis=0; - nuncoded_mbis=0; - _enc->state.ncoded_fragis[0]=0; - _enc->state.ncoded_fragis[1]=0; - _enc->state.ncoded_fragis[2]=0; - sb_flags=_enc->state.sb_flags; - mb_modes=_enc->state.mb_modes; - sb_maps=(const oc_sb_map *)_enc->state.sb_maps; - mb_maps=(const oc_mb_map *)_enc->state.mb_maps; - embs=_enc->mb_info; - frags=_enc->state.frags; - frag_mvs=_enc->state.frag_mvs; - vdec=!(_enc->state.info.pixel_fmt&2); - notstart=0; - notdone=1; - mcu_nvsbs=_enc->mcu_nvsbs; - for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){ - notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby); - sbi_end=pipe.sbi_end[0]; - for(sbi=pipe.sbi0[0];sbisp_levelsp_levellambda*3; - if(modes[OC_MODE_INTER_MV_FOUR].cost>2][mbi&3][bi]; - frags[fragi].qii=modes[mb_mode].qii[bi]; - } - if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi, - modes[mb_mode].overhead>>OC_BIT_SCALE)>0){ - int orig_mb_mode; - orig_mb_mode=mb_mode; - mb_mode=mb_modes[mbi]; - switch(mb_mode){ - case OC_MODE_INTER_MV:{ - memcpy(prior_mv,last_mv,sizeof(prior_mv)); - /*If we're backing out from 4MV, find the MV we're actually - using.*/ - if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){ - for(bi=0;;bi++){ - fragi=mb_maps[mbi][0][bi]; - if(frags[fragi].coded){ - memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv)); - dx=frag_mvs[fragi][0]; - dy=frag_mvs[fragi][1]; - break; - } - } - mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31]; - } - /*Otherwise we used the original analysis MV.*/ - else{ - memcpy(last_mv, - embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv)); - } - _enc->mv_bits[0]+=mb_mv_bits_0; - _enc->mv_bits[1]+=12; - }break; - case OC_MODE_INTER_MV_LAST2:{ - oc_mv tmp_mv; - memcpy(tmp_mv,prior_mv,sizeof(tmp_mv)); - memcpy(prior_mv,last_mv,sizeof(prior_mv)); - memcpy(last_mv,tmp_mv,sizeof(last_mv)); - }break; - case OC_MODE_GOLDEN_MV:{ - _enc->mv_bits[0]+=mb_gmv_bits_0; - _enc->mv_bits[1]+=12; - }break; - case OC_MODE_INTER_MV_FOUR:{ - oc_mv lbmvs[4]; - oc_mv cbmvs[4]; - memcpy(prior_mv,last_mv,sizeof(prior_mv)); - for(bi=0;bi<4;bi++){ - fragi=mb_maps[mbi][0][bi]; - if(frags[fragi].coded){ - memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv)); - memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi])); - _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31] - +OC_MV_BITS[0][frag_mvs[fragi][1]+31]; - _enc->mv_bits[1]+=12; - } - /*Replace the block MVs for not-coded blocks with (0,0).*/ - else memset(lbmvs[bi],0,sizeof(lbmvs[bi])); - } - (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs); - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_maps[mbi][pli][bi]; - frags[fragi].mb_mode=mb_mode; - frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii]; - memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi])); - } - }break; - } - coded_mbis[ncoded_mbis++]=mbi; - oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode); - interbits+=modes[mb_mode].rate+modes[mb_mode].overhead; - } - else{ - *(uncoded_mbis-++nuncoded_mbis)=mbi; - mb_mode=OC_MODE_INTER_NOMV; - dx=dy=0; - } - /*Propagate final MB mode and MVs to the chroma blocks. - This has already been done for 4MV mode, since it requires individual - block motion vectors.*/ - if(mb_mode!=OC_MODE_INTER_MV_FOUR){ - for(mapii=4;mapii>2; - bi=mapi&3; - fragi=mb_maps[mbi][pli][bi]; - frags[fragi].mb_mode=mb_mode; - /*If we switched from 4MV mode to INTER_MV mode, then the qii - values won't have been chosen with the right MV, but it's - probaby not worth re-estimating them.*/ - frags[fragi].qii=modes[mb_mode].qii[mapii]; - frag_mvs[fragi][0]=(signed char)dx; - frag_mvs[fragi][1]=(signed char)dy; - } - } - } - oc_fr_state_flush_sb(pipe.fr+0); - sb_flags[sbi].coded_fully=pipe.fr[0].sb_full; - sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial; - } - oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone); - /*Code chroma planes.*/ - for(pli=1;pli<3;pli++){ - oc_enc_sb_transform_quantize_chroma(_enc,&pipe, - pli,pipe.sbi0[pli],pipe.sbi_end[pli]); - oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone); - } - notstart=1; - } - /*Finish filling in the reference frame borders.*/ - refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; - for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli); - /*Finish adding flagging overhead costs to inter bit counts to determine if - we should have coded a key frame instead.*/ - if(_allow_keyframe){ - if(interbits>intrabits)return 1; - /*Technically the chroma plane counts are over-estimations, because they - don't account for continuing runs from the luma planes, but the - inaccuracy is small.*/ - for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<mv_bits[0],_enc->mv_bits[1])<chooser.scheme_bits[_enc->chooser.scheme_list[0]]<intrabits)return 1; - } - _enc->ncoded_mbis=ncoded_mbis; - /*Compact the coded fragment list.*/ - { - ptrdiff_t ncoded_fragis; - ncoded_fragis=_enc->state.ncoded_fragis[0]; - for(pli=1;pli<3;pli++){ - memmove(_enc->state.coded_fragis+ncoded_fragis, - _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset, - _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis)); - ncoded_fragis+=_enc->state.ncoded_fragis[pli]; - } - _enc->state.ntotal_coded_fragis=ncoded_fragis; - } - return 0; -} - -#if defined(OC_COLLECT_METRICS) -# include -# include - -/*TODO: It may be helpful (for block-level quantizers especially) to separate - out the contributions from AC and DC into separate tables.*/ - -# define OC_ZWEIGHT (0.25) - -static void oc_mode_metrics_add(oc_mode_metrics *_metrics, - double _w,int _satd,int _rate,double _rmse){ - double rate; - /*Accumulate statistics without the scaling; this lets us change the scale - factor yet still use old data.*/ - rate=ldexp(_rate,-OC_BIT_SCALE); - if(_metrics->fragw>0){ - double dsatd; - double drate; - double drmse; - double w; - dsatd=_satd-_metrics->satd/_metrics->fragw; - drate=rate-_metrics->rate/_metrics->fragw; - drmse=_rmse-_metrics->rmse/_metrics->fragw; - w=_metrics->fragw*_w/(_metrics->fragw+_w); - _metrics->satd2+=dsatd*dsatd*w; - _metrics->satdrate+=dsatd*drate*w; - _metrics->rate2+=drate*drate*w; - _metrics->satdrmse+=dsatd*drmse*w; - _metrics->rmse2+=drmse*drmse*w; - } - _metrics->fragw+=_w; - _metrics->satd+=_satd*_w; - _metrics->rate+=rate*_w; - _metrics->rmse+=_rmse*_w; -} - -static void oc_mode_metrics_merge(oc_mode_metrics *_dst, - const oc_mode_metrics *_src,int _n){ - int i; - /*Find a non-empty set of metrics.*/ - for(i=0;i<_n&&_src[i].fragw<=0;i++); - if(i>=_n){ - memset(_dst,0,sizeof(*_dst)); - return; - } - memcpy(_dst,_src+i,sizeof(*_dst)); - /*And iterate over the remaining non-empty sets of metrics.*/ - for(i++;i<_n;i++)if(_src[i].fragw>0){ - double wa; - double wb; - double dsatd; - double drate; - double drmse; - double w; - wa=_dst->fragw; - wb=_src[i].fragw; - dsatd=_src[i].satd/wb-_dst->satd/wa; - drate=_src[i].rate/wb-_dst->rate/wa; - drmse=_src[i].rmse/wb-_dst->rmse/wa; - w=wa*wb/(wa+wb); - _dst->fragw+=_src[i].fragw; - _dst->satd+=_src[i].satd; - _dst->rate+=_src[i].rate; - _dst->rmse+=_src[i].rmse; - _dst->satd2+=_src[i].satd2+dsatd*dsatd*w; - _dst->satdrate+=_src[i].satdrate+dsatd*drate*w; - _dst->rate2+=_src[i].rate2+drate*drate*w; - _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w; - _dst->rmse2+=_src[i].rmse2+drmse*drmse*w; - } -} - -/*Compile collected SATD/rate/RMSE metrics into a form that's immediately - useful for mode decision.*/ -static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){ - int pli; - int qti; - oc_restore_fpu(&_enc->state); - /*Convert raw collected data into cleaned up sample points.*/ - for(pli=0;pli<3;pli++){ - for(qti=0;qti<2;qti++){ - double fragw; - int bin0; - int bin1; - int bin; - fragw=0; - bin0=bin1=0; - for(bin=0;bin=OC_ZWEIGHT){ - fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw; - } - /*Merge statistics and fit lines.*/ - oc_mode_metrics_merge(&metrics, - OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0); - if(metrics.fragw>0&&metrics.satd2>0){ - double a; - double b; - double msatd; - double mrate; - double mrmse; - double rate; - double rmse; - msatd=metrics.satd/metrics.fragw; - mrate=metrics.rate/metrics.fragw; - mrmse=metrics.rmse/metrics.fragw; - /*Compute the points on these lines corresponding to the actual bin - value.*/ - b=metrics.satdrate/metrics.satd2; - a=mrate-b*msatd; - rate=ldexp(a+b*(bin<>1); - return -_extra_bits; -} - -/*Handles the pure zero run tokens.*/ -static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){ - return _extra_bits+1; -} - -/*Handles a normal coefficient value token.*/ -static ptrdiff_t oc_token_skip_val(void){ - return 1; -} - -/*Handles a category 1A zero run/coefficient value combo token.*/ -static ptrdiff_t oc_token_skip_run_cat1a(int _token){ - return _token-OC_DCT_RUN_CAT1A+2; -} - -/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/ -static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){ - int run_cati; - int ncoeffs_mask; - int ncoeffs_adjust; - run_cati=_token-OC_DCT_RUN_CAT1B; - ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati); - ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati); - return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust; -} - -/*A jump table for computing the number of coefficients or blocks to skip for - a given token value. - This reduces all the conditional branches, etc., needed to parse these token - values down to one indirect jump.*/ -static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={ - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob, - oc_token_skip_eob6, - oc_token_skip_zrl, - oc_token_skip_zrl, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_val, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - (oc_token_skip_func)oc_token_skip_run_cat1a, - oc_token_skip_run, - oc_token_skip_run, - oc_token_skip_run, - oc_token_skip_run -}; - -/*Determines the number of blocks or coefficients to be skipped for a given - token value. - _token: The token value to skip. - _extra_bits: The extra bits attached to this token. - Return: A positive value indicates that number of coefficients are to be - skipped in the current block. - Otherwise, the negative of the return value indicates that number of - blocks are to be ended. - 0 will never be returned, so that at least one coefficient in one - block will always be decoded for every token.*/ -static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){ - return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits); -} - - - -void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){ - static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ - 0,16,16,16,16,16,32,32, - 32,32,32,32,32,32,32,48, - 48,48,48,48,48,48,48,48, - 48,48,48,48,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64 - }; - const oc_fragment *frags; - const unsigned *frag_satd; - const unsigned *frag_ssd; - const ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t fragii; - double fragw; - int qti; - int qii; - int qi; - int pli; - int zzi; - int token; - int eb; - oc_restore_fpu(&_enc->state); - /*Load any existing mode metrics if we haven't already.*/ - if(!oc_has_mode_metrics){ - FILE *fmetrics; - memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS)); - fmetrics=fopen("modedec.stats","rb"); - if(fmetrics!=NULL){ - fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics); - fclose(fmetrics); - } - for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi); - oc_has_mode_metrics=1; - } - qti=_enc->state.frame_type; - frags=_enc->state.frags; - frag_satd=_enc->frag_satd; - frag_ssd=_enc->frag_ssd; - coded_fragis=_enc->state.coded_fragis; - ncoded_fragis=fragii=0; - /*Weight the fragments by the inverse frame size; this prevents HD content - from dominating the statistics.*/ - fragw=1.0/_enc->state.nfrags; - for(pli=0;pli<3;pli++){ - ptrdiff_t ti[64]; - int eob_token[64]; - int eob_run[64]; - /*Set up token indices and eob run counts. - We don't bother trying to figure out the real cost of the runs that span - coefficients; instead we use the costs that were available when R-D - token optimization was done.*/ - for(zzi=0;zzi<64;zzi++){ - ti[zzi]=_enc->dct_token_offs[pli][zzi]; - if(ti[zzi]>0){ - token=_enc->dct_tokens[pli][zzi][0]; - eb=_enc->extra_bits[pli][zzi][0]; - eob_token[zzi]=token; - eob_run[zzi]=-oc_dct_token_skip(token,eb); - } - else{ - eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX; - eob_run[zzi]=0; - } - } - /*Scan the list of coded fragments for this plane.*/ - ncoded_fragis+=_enc->state.ncoded_fragis[pli]; - for(;fragii0){ - /*We've reached the end of the block.*/ - eob_run[zzi]--; - break; - } - huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1] - +OC_ZZI_HUFF_OFFSET[zzi]; - if(eob_token[zzi]huff_codes[huffi][eob_token[zzi]].nbits - +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]]; - eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX; - } - token=_enc->dct_tokens[pli][zzi][ti[zzi]]; - eb=_enc->extra_bits[pli][zzi][ti[zzi]]; - ti[zzi]++; - skip=oc_dct_token_skip(token,eb); - if(skip<0){ - eob_token[zzi]=token; - eob_run[zzi]=-skip; - } - else{ - /*A regular DCT value token; accumulate the bits for it.*/ - frag_bits+=_enc->huff_codes[huffi][token].nbits - +OC_DCT_TOKEN_EXTRA_BITS[token]; - zzi+=skip; - } - } - mb_mode=frags[fragi].mb_mode; - qi=_enc->state.qis[frags[fragi].qii]; - satd=frag_satd[fragi]<<(pli+1&2); - bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1); - oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin, - fragw,satd,frag_bits<state.nqis;qii++){ - oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]); - } -} - -void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){ - FILE *fmetrics; - int qi; - /*Generate sample points for complete list of QI values.*/ - for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi); - fmetrics=fopen("modedec.stats","wb"); - if(fmetrics!=NULL){ - fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics); - fclose(fmetrics); - } - fprintf(stdout, - "/*File generated by libtheora with OC_COLLECT_METRICS" - " defined at compile time.*/\n" - "#if !defined(_modedec_H)\n" - "# define _modedec_H (1)\n" - "\n" - "\n" - "\n" - "# if defined(OC_COLLECT_METRICS)\n" - "typedef struct oc_mode_metrics oc_mode_metrics;\n" - "# endif\n" - "typedef struct oc_mode_rd oc_mode_rd;\n" - "\n" - "\n" - "\n" - "/*The number of extra bits of precision at which to store rate" - " metrics.*/\n" - "# define OC_BIT_SCALE (%i)\n" - "/*The number of extra bits of precision at which to store RMSE metrics.\n" - " This must be at least half OC_BIT_SCALE (rounded up).*/\n" - "# define OC_RMSE_SCALE (%i)\n" - "/*The number of bins to partition statistics into.*/\n" - "# define OC_SAD_BINS (%i)\n" - "/*The number of bits of precision to drop" - " from SAD scores to assign them to a\n" - " bin.*/\n" - "# define OC_SAD_SHIFT (%i)\n" - "\n" - "\n" - "\n" - "# if defined(OC_COLLECT_METRICS)\n" - "struct oc_mode_metrics{\n" - " double fragw;\n" - " double satd;\n" - " double rate;\n" - " double rmse;\n" - " double satd2;\n" - " double satdrate;\n" - " double rate2;\n" - " double satdrmse;\n" - " double rmse2;\n" - "};\n" - "\n" - "\n" - "int oc_has_mode_metrics;\n" - "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n" - "# endif\n" - "\n" - "\n" - "\n" - "struct oc_mode_rd{\n" - " ogg_int16_t rate;\n" - " ogg_int16_t rmse;\n" - "};\n" - "\n" - "\n" - "# if !defined(OC_COLLECT_METRICS)\n" - "static const\n" - "# endif\n" - "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n", - OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT); - for(qi=0;qi<64;qi++){ - int pli; - fprintf(stdout," {\n"); - for(pli=0;pli<3;pli++){ - int qti; - fprintf(stdout," {\n"); - for(qti=0;qti<2;qti++){ - int bin; - static const char *pl_names[3]={"Y'","Cb","Cr"}; - static const char *qti_names[2]={"INTRA","INTER"}; - fprintf(stdout," /*%s qi=%i %s*/\n", - pl_names[pli],qi,qti_names[qti]); - fprintf(stdout," {\n"); - fprintf(stdout," "); - for(bin=0;bin -#include -#include -#include "apiwrapper.h" - - - -const char *theora_version_string(void){ - return th_version_string(); -} - -ogg_uint32_t theora_version_number(void){ - return th_version_number(); -} - -void theora_info_init(theora_info *_ci){ - memset(_ci,0,sizeof(*_ci)); -} - -void theora_info_clear(theora_info *_ci){ - th_api_wrapper *api; - api=(th_api_wrapper *)_ci->codec_setup; - memset(_ci,0,sizeof(*_ci)); - if(api!=NULL){ - if(api->clear!=NULL)(*api->clear)(api); - _ogg_free(api); - } -} - -void theora_clear(theora_state *_th){ - /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ - if(_th->internal_decode!=NULL){ - (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th); - } - if(_th->internal_encode!=NULL){ - (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th); - } - if(_th->i!=NULL)theora_info_clear(_th->i); - memset(_th,0,sizeof(*_th)); -} - -int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){ - /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ - if(_th->internal_decode!=NULL){ - return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th, - _req,_buf,_buf_sz); - } - else if(_th->internal_encode!=NULL){ - return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th, - _req,_buf,_buf_sz); - } - else return TH_EINVAL; -} - -ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){ - /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ - if(_th->internal_decode!=NULL){ - return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)( - _th,_gp); - } - else if(_th->internal_encode!=NULL){ - return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)( - _th,_gp); - } - else return -1; -} - -double theora_granule_time(theora_state *_th, ogg_int64_t _gp){ - /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ - if(_th->internal_decode!=NULL){ - return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)( - _th,_gp); - } - else if(_th->internal_encode!=NULL){ - return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)( - _th,_gp); - } - else return -1; -} - -void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){ - _info->version_major=_ci->version_major; - _info->version_minor=_ci->version_minor; - _info->version_subminor=_ci->version_subminor; - _info->frame_width=_ci->width; - _info->frame_height=_ci->height; - _info->pic_width=_ci->frame_width; - _info->pic_height=_ci->frame_height; - _info->pic_x=_ci->offset_x; - _info->pic_y=_ci->offset_y; - _info->fps_numerator=_ci->fps_numerator; - _info->fps_denominator=_ci->fps_denominator; - _info->aspect_numerator=_ci->aspect_numerator; - _info->aspect_denominator=_ci->aspect_denominator; - switch(_ci->colorspace){ - case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break; - case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break; - default:_info->colorspace=TH_CS_UNSPECIFIED;break; - } - switch(_ci->pixelformat){ - case OC_PF_420:_info->pixel_fmt=TH_PF_420;break; - case OC_PF_422:_info->pixel_fmt=TH_PF_422;break; - case OC_PF_444:_info->pixel_fmt=TH_PF_444;break; - default:_info->pixel_fmt=TH_PF_RSVD; - } - _info->target_bitrate=_ci->target_bitrate; - _info->quality=_ci->quality; - _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0? - OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0; -} - -int theora_packet_isheader(ogg_packet *_op){ - return th_packet_isheader(_op); -} - -int theora_packet_iskeyframe(ogg_packet *_op){ - return th_packet_iskeyframe(_op); -} - -int theora_granule_shift(theora_info *_ci){ - /*This breaks when keyframe_frequency_force is not positive or is larger than - 2**31 (if your int is more than 32 bits), but that's what the original - function does.*/ - return oc_ilog(_ci->keyframe_frequency_force-1); -} - -void theora_comment_init(theora_comment *_tc){ - th_comment_init((th_comment *)_tc); -} - -char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){ - return th_comment_query((th_comment *)_tc,_tag,_count); -} - -int theora_comment_query_count(theora_comment *_tc,char *_tag){ - return th_comment_query_count((th_comment *)_tc,_tag); -} - -void theora_comment_clear(theora_comment *_tc){ - th_comment_clear((th_comment *)_tc); -} - -void theora_comment_add(theora_comment *_tc,char *_comment){ - th_comment_add((th_comment *)_tc,_comment); -} - -void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){ - th_comment_add_tag((th_comment *)_tc,_tag,_value); -} diff --git a/drivers/theora/apiwrapper.h b/drivers/theora/apiwrapper.h deleted file mode 100644 index 93454d7bda..0000000000 --- a/drivers/theora/apiwrapper.h +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $ - - ********************************************************************/ - -#if !defined(_apiwrapper_H) -# define _apiwrapper_H (1) -# include -# include -# include "theora/theoradec.h" -# include "theora/theoraenc.h" -# include "internal.h" - -typedef struct th_api_wrapper th_api_wrapper; -typedef struct th_api_info th_api_info; - -/*Provide an entry point for the codec setup to clear itself in case we ever - want to break pieces off into a common base library shared by encoder and - decoder. - In addition, this makes several other pieces of the API wrapper cleaner.*/ -typedef void (*oc_setup_clear_func)(void *_ts); - -/*Generally only one of these pointers will be non-NULL in any given instance. - Technically we do not even really need this struct, since we should be able - to figure out which one from "context", but doing it this way makes sure we - don't flub it up.*/ -struct th_api_wrapper{ - oc_setup_clear_func clear; - th_setup_info *setup; - th_dec_ctx *decode; - th_enc_ctx *encode; -}; - -struct th_api_info{ - th_api_wrapper api; - theora_info info; -}; - - -void oc_theora_info2th_info(th_info *_info,const theora_info *_ci); - -#endif diff --git a/drivers/theora/bitpack.c b/drivers/theora/bitpack.c deleted file mode 100644 index 8195003bad..0000000000 --- a/drivers/theora/bitpack.c +++ /dev/null @@ -1,111 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: packing variable sized words into an octet stream - last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include -#include "bitpack.h" - -/*We're 'MSb' endian; if we write a word but read individual bits, - then we'll read the MSb first.*/ - -void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){ - memset(_b,0,sizeof(*_b)); - _b->ptr=_buf; - _b->stop=_buf+_bytes; -} - -static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ - const unsigned char *ptr; - const unsigned char *stop; - oc_pb_window window; - int available; - window=_b->window; - available=_b->bits; - ptr=_b->ptr; - stop=_b->stop; - while(available<=OC_PB_WINDOW_SIZE-8&&ptrptr=ptr; - if(_bits>available){ - if(ptr>=stop){ - _b->eof=1; - available=OC_LOTS_OF_BITS; - } - else window|=*ptr>>(available&7); - } - _b->bits=available; - return window; -} - -int oc_pack_look1(oc_pack_buf *_b){ - oc_pb_window window; - int available; - window=_b->window; - available=_b->bits; - if(available<1)_b->window=window=oc_pack_refill(_b,1); - return window>>OC_PB_WINDOW_SIZE-1; -} - -void oc_pack_adv1(oc_pack_buf *_b){ - _b->window<<=1; - _b->bits--; -} - -/*Here we assume that 0<=_bits&&_bits<=32.*/ -long oc_pack_read(oc_pack_buf *_b,int _bits){ - oc_pb_window window; - int available; - long result; - window=_b->window; - available=_b->bits; - if(_bits==0)return 0; - if(available<_bits){ - window=oc_pack_refill(_b,_bits); - available=_b->bits; - } - result=window>>OC_PB_WINDOW_SIZE-_bits; - available-=_bits; - window<<=1; - window<<=_bits-1; - _b->bits=available; - _b->window=window; - return result; -} - -int oc_pack_read1(oc_pack_buf *_b){ - oc_pb_window window; - int available; - int result; - window=_b->window; - available=_b->bits; - if(available<1){ - window=oc_pack_refill(_b,1); - available=_b->bits; - } - result=window>>OC_PB_WINDOW_SIZE-1; - available--; - window<<=1; - _b->bits=available; - _b->window=window; - return result; -} - -long oc_pack_bytes_left(oc_pack_buf *_b){ - if(_b->eof)return -1; - return _b->stop-_b->ptr+(_b->bits>>3); -} diff --git a/drivers/theora/bitpack.h b/drivers/theora/bitpack.h deleted file mode 100644 index a020a292f5..0000000000 --- a/drivers/theora/bitpack.h +++ /dev/null @@ -1,59 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: packing variable sized words into an octet stream - last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $ - - ********************************************************************/ -#if !defined(_bitpack_H) -# define _bitpack_H (1) -# include - - - -typedef unsigned long oc_pb_window; -typedef struct oc_pack_buf oc_pack_buf; - - - -# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT) -/*This is meant to be a large, positive constant that can still be efficiently - loaded as an immediate (on platforms like ARM, for example). - Even relatively modest values like 100 would work fine.*/ -# define OC_LOTS_OF_BITS (0x40000000) - - - -struct oc_pack_buf{ - oc_pb_window window; - const unsigned char *ptr; - const unsigned char *stop; - int bits; - int eof; -}; - -void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes); -int oc_pack_look1(oc_pack_buf *_b); -void oc_pack_adv1(oc_pack_buf *_b); -/*Here we assume 0<=_bits&&_bits<=32.*/ -long oc_pack_read(oc_pack_buf *_b,int _bits); -int oc_pack_read1(oc_pack_buf *_b); -/* returns -1 for read beyond EOF, or the number of whole bytes available */ -long oc_pack_bytes_left(oc_pack_buf *_b); - -/*These two functions are implemented locally in huffdec.c*/ -/*Read in bits without advancing the bitptr. - Here we assume 0<=_bits&&_bits<=32.*/ -/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/ -/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/ - -#endif diff --git a/drivers/theora/codec.h b/drivers/theora/codec.h deleted file mode 100644 index 5c2669630c..0000000000 --- a/drivers/theora/codec.h +++ /dev/null @@ -1,591 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ - - ********************************************************************/ - -/**\mainpage - * - * \section intro Introduction - * - * This is the documentation for libtheora C API. - * The current reference - * implementation for Theora, a free, - * patent-unencumbered video codec. - * Theora is derived from On2's VP3 codec with additional features and - * integration with Ogg multimedia formats by - * the Xiph.Org Foundation. - * Complete documentation of the format itself is available in - * the Theora - * specification. - * - * \subsection Organization - * - * The functions documented here are actually subdivided into three - * separate libraries: - * - libtheoraenc contains the encoder interface, - * described in \ref encfuncs. - * - libtheoradec contains the decoder interface and - * routines shared with the encoder. - * You must also link to this if you link to libtheoraenc. - * The routines in this library are described in \ref decfuncs and - * \ref basefuncs. - * - libtheora contains the \ref oldfuncs. - * - * New code should link to libtheoradec and, if using encoder - * features, libtheoraenc. Together these two export both - * the standard and the legacy API, so this is all that is needed by - * any code. The older libtheora library is provided just for - * compatibility with older build configurations. - * - * In general the recommended 1.x API symbols can be distinguished - * by their th_ or TH_ namespace prefix. - * The older, legacy API uses theora_ or OC_ - * prefixes instead. - */ - -/**\file - * The shared libtheoradec and libtheoraenc C API. - * You don't need to include this directly.*/ - -#if !defined(_O_THEORA_CODEC_H_) -# define _O_THEORA_CODEC_H_ (1) -# include - -#if defined(__cplusplus) -extern "C" { -#endif - - - -/**\name Return codes*/ -/*@{*/ -/**An invalid pointer was provided.*/ -#define TH_EFAULT (-1) -/**An invalid argument was provided.*/ -#define TH_EINVAL (-10) -/**The contents of the header were incomplete, invalid, or unexpected.*/ -#define TH_EBADHEADER (-20) -/**The header does not belong to a Theora stream.*/ -#define TH_ENOTFORMAT (-21) -/**The bitstream version is too high.*/ -#define TH_EVERSION (-22) -/**The specified function is not implemented.*/ -#define TH_EIMPL (-23) -/**There were errors in the video data packet.*/ -#define TH_EBADPACKET (-24) -/**The decoded packet represented a dropped frame. - The player can continue to display the current frame, as the contents of the - decoded frame buffer have not changed.*/ -#define TH_DUPFRAME (1) -/*@}*/ - -/**The currently defined color space tags. - * See the Theora - * specification, Chapter 4, for exact details on the meaning - * of each of these color spaces.*/ -typedef enum{ - /**The color space was not specified at the encoder. - It may be conveyed by an external means.*/ - TH_CS_UNSPECIFIED, - /**A color space designed for NTSC content.*/ - TH_CS_ITU_REC_470M, - /**A color space designed for PAL/SECAM content.*/ - TH_CS_ITU_REC_470BG, - /**The total number of currently defined color spaces.*/ - TH_CS_NSPACES -}th_colorspace; - -/**The currently defined pixel format tags. - * See the Theora - * specification, Section 4.4, for details on the precise sample - * locations.*/ -typedef enum{ - /**Chroma decimation by 2 in both the X and Y directions (4:2:0). - The Cb and Cr chroma planes are half the width and half the - height of the luma plane.*/ - TH_PF_420, - /**Currently reserved.*/ - TH_PF_RSVD, - /**Chroma decimation by 2 in the X direction (4:2:2). - The Cb and Cr chroma planes are half the width of the luma plane, but full - height.*/ - TH_PF_422, - /**No chroma decimation (4:4:4). - The Cb and Cr chroma planes are full width and full height.*/ - TH_PF_444, - /**The total number of currently defined pixel formats.*/ - TH_PF_NFORMATS -}th_pixel_fmt; - - - -/**A buffer for a single color plane in an uncompressed image. - * This contains the image data in a left-to-right, top-down format. - * Each row of pixels is stored contiguously in memory, but successive - * rows need not be. - * Use \a stride to compute the offset of the next row. - * The encoder accepts both positive \a stride values (top-down in memory) - * and negative (bottom-up in memory). - * The decoder currently always generates images with positive strides.*/ -typedef struct{ - /**The width of this plane.*/ - int width; - /**The height of this plane.*/ - int height; - /**The offset in bytes between successive rows.*/ - int stride; - /**A pointer to the beginning of the first row.*/ - unsigned char *data; -}th_img_plane; - -/**A complete image buffer for an uncompressed frame. - * The chroma planes may be decimated by a factor of two in either - * direction, as indicated by th_info#pixel_fmt. - * The width and height of the Y' plane must be multiples of 16. - * They may need to be cropped for display, using the rectangle - * specified by th_info#pic_x, th_info#pic_y, th_info#pic_width, - * and th_info#pic_height. - * All samples are 8 bits. - * \note The term YUV often used to describe a colorspace is ambiguous. - * The exact parameters of the RGB to YUV conversion process aside, in - * many contexts the U and V channels actually have opposite meanings. - * To avoid this confusion, we are explicit: the name of the color - * channels are Y'CbCr, and they appear in that order, always. - * The prime symbol denotes that the Y channel is non-linear. - * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/ -typedef th_img_plane th_ycbcr_buffer[3]; - -/**Theora bitstream information. - * This contains the basic playback parameters for a stream, and corresponds to - * the initial 'info' header packet. - * To initialize an encoder, the application fills in this structure and - * passes it to th_encode_alloc(). - * A default encoding mode is chosen based on the values of the #quality and - * #target_bitrate fields. - * On decode, it is filled in by th_decode_headerin(), and then passed to - * th_decode_alloc(). - * - * Encoded Theora frames must be a multiple of 16 in size; - * this is what the #frame_width and #frame_height members represent. - * To handle arbitrary picture sizes, a crop rectangle is specified in the - * #pic_x, #pic_y, #pic_width and #pic_height members. - * - * All frame buffers contain pointers to the full, padded frame. - * However, the current encoder will not reference pixels outside of - * the cropped picture region, and the application does not need to fill them - * in. - * The decoder will allocate storage for a full frame, but the - * application should not rely on the padding containing sensible - * data. - * - * It is also generally recommended that the offsets and sizes should still be - * multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled. - * See the Theora - * specification, Section 4.4, for more details. - * - * Frame rate, in frames per second, is stored as a rational fraction, as is - * the pixel aspect ratio. - * Note that this refers to the aspect ratio of the individual pixels, not of - * the overall frame itself. - * The frame aspect ratio can be computed from pixel aspect ratio using the - * image dimensions.*/ -typedef struct{ - /**\name Theora version - * Bitstream version information.*/ - /*@{*/ - unsigned char version_major; - unsigned char version_minor; - unsigned char version_subminor; - /*@}*/ - /**The encoded frame width. - * This must be a multiple of 16, and less than 1048576.*/ - ogg_uint32_t frame_width; - /**The encoded frame height. - * This must be a multiple of 16, and less than 1048576.*/ - ogg_uint32_t frame_height; - /**The displayed picture width. - * This must be no larger than width.*/ - ogg_uint32_t pic_width; - /**The displayed picture height. - * This must be no larger than height.*/ - ogg_uint32_t pic_height; - /**The X offset of the displayed picture. - * This must be no larger than #frame_width-#pic_width or 255, whichever is - * smaller.*/ - ogg_uint32_t pic_x; - /**The Y offset of the displayed picture. - * This must be no larger than #frame_height-#pic_height, and - * #frame_height-#pic_height-#pic_y must be no larger than 255. - * This slightly funny restriction is due to the fact that the offset is - * specified from the top of the image for consistency with the standard - * graphics left-handed coordinate system used throughout this API, while - * it is stored in the encoded stream as an offset from the bottom.*/ - ogg_uint32_t pic_y; - /**\name Frame rate - * The frame rate, as a fraction. - * If either is 0, the frame rate is undefined.*/ - /*@{*/ - ogg_uint32_t fps_numerator; - ogg_uint32_t fps_denominator; - /*@}*/ - /**\name Aspect ratio - * The aspect ratio of the pixels. - * If either value is zero, the aspect ratio is undefined. - * If not specified by any external means, 1:1 should be assumed. - * The aspect ratio of the full picture can be computed as - * \code - * aspect_numerator*pic_width/(aspect_denominator*pic_height). - * \endcode */ - /*@{*/ - ogg_uint32_t aspect_numerator; - ogg_uint32_t aspect_denominator; - /*@}*/ - /**The color space.*/ - th_colorspace colorspace; - /**The pixel format.*/ - th_pixel_fmt pixel_fmt; - /**The target bit-rate in bits per second. - If initializing an encoder with this struct, set this field to a non-zero - value to activate CBR encoding by default.*/ - int target_bitrate; - /**The target quality level. - Valid values range from 0 to 63, inclusive, with higher values giving - higher quality. - If initializing an encoder with this struct, and #target_bitrate is set - to zero, VBR encoding at this quality will be activated by default.*/ - /*Currently this is set so that a qi of 0 corresponds to distortions of 24 - times the JND, and each increase by 16 halves that value. - This gives us fine discrimination at low qualities, yet effective rate - control at high qualities. - The qi value 63 is special, however. - For this, the highest quality, we use one half of a JND for our threshold. - Due to the lower bounds placed on allowable quantizers in Theora, we will - not actually be able to achieve quality this good, but this should - provide as close to visually lossless quality as Theora is capable of. - We could lift the quantizer restrictions without breaking VP3.1 - compatibility, but this would result in quantized coefficients that are - too large for the current bitstream to be able to store. - We'd have to redesign the token syntax to store these large coefficients, - which would make transcoding complex.*/ - int quality; - /**The amount to shift to extract the last keyframe number from the granule - * position. - * This can be at most 31. - * th_info_init() will set this to a default value (currently 6, - * which is good for streaming applications), but you can set it to 0 to - * make every frame a keyframe. - * The maximum distance between key frames is - * 1<<#keyframe_granule_shift. - * The keyframe frequency can be more finely controlled with - * #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted - * during encoding (for example, to force the next frame to be a keyframe), - * but it cannot be set larger than the amount permitted by this field after - * the headers have been output.*/ - int keyframe_granule_shift; -}th_info; - -/**The comment information. - * - * This structure holds the in-stream metadata corresponding to - * the 'comment' header packet. - * The comment header is meant to be used much like someone jotting a quick - * note on the label of a video. - * It should be a short, to the point text note that can be more than a couple - * words, but not more than a short paragraph. - * - * The metadata is stored as a series of (tag, value) pairs, in - * length-encoded string vectors. - * The first occurrence of the '=' character delimits the tag and value. - * A particular tag may occur more than once, and order is significant. - * The character set encoding for the strings is always UTF-8, but the tag - * names are limited to ASCII, and treated as case-insensitive. - * See the Theora - * specification, Section 6.3.3 for details. - * - * In filling in this structure, th_decode_headerin() will null-terminate - * the user_comment strings for safety. - * However, the bitstream format itself treats them as 8-bit clean vectors, - * possibly containing null characters, and so the length array should be - * treated as their authoritative length. - */ -typedef struct th_comment{ - /**The array of comment string vectors.*/ - char **user_comments; - /**An array of the corresponding length of each vector, in bytes.*/ - int *comment_lengths; - /**The total number of comment strings.*/ - int comments; - /**The null-terminated vendor string. - This identifies the software used to encode the stream.*/ - char *vendor; -}th_comment; - - - -/**A single base matrix.*/ -typedef unsigned char th_quant_base[64]; - -/**A set of \a qi ranges.*/ -typedef struct{ - /**The number of ranges in the set.*/ - int nranges; - /**The size of each of the #nranges ranges. - These must sum to 63.*/ - const int *sizes; - /**#nranges +1 base matrices. - Matrices \a i and i+1 form the endpoints of range \a i.*/ - const th_quant_base *base_matrices; -}th_quant_ranges; - -/**A complete set of quantization parameters. - The quantizer for each coefficient is calculated as: - \code - Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100), - 1024). - \endcode - - \a qti is the quantization type index: 0 for intra, 1 for inter. - ci!=0 is 0 for the DC coefficient and 1 for AC coefficients. - \a qi is the quality index, ranging between 0 (low quality) and 63 (high - quality). - \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr. - \a ci is the DCT coefficient index. - Coefficient indices correspond to the normal 2D DCT block - ordering--row-major with low frequencies first--\em not zig-zag order. - - Minimum quantizers are constant, and are given by: - \code - qmin[2][2]={{4,2},{8,4}}. - \endcode - - Parameters that can be stored in the bitstream are as follows: - - The two scale matrices ac_scale and dc_scale. - \code - scale[2][64]={dc_scale,ac_scale}. - \endcode - - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all). - In order to avoid storing a full 384 base matrices, only a sparse set of - matrices are stored, and the rest are linearly interpolated. - This is done as follows. - For each \a qti and \a pli, a series of \a n \a qi ranges is defined. - The size of each \a qi range can vary arbitrarily, but they must sum to - 63. - Then, n+1 matrices are specified, one for each endpoint of the - ranges. - For interpolation purposes, each range's endpoints are the first \a qi - value it contains and one past the last \a qi value it contains. - Fractional values are rounded to the nearest integer, with ties rounded - away from zero. - - Base matrices are stored by reference, so if the same matrices are used - multiple times, they will only appear once in the bitstream. - The bitstream is also capable of omitting an entire set of ranges and - its associated matrices if they are the same as either the previous - set (indexed in row-major order) or if the inter set is the same as the - intra set. - - - Loop filter limit values. - The same limits are used for the loop filter in all color planes, despite - potentially differing levels of quantization in each. - - For the current encoder, scale[ci!=0][qi] must be no greater - than scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must - be no greater than base[qti][pli][qi-1][ci]. - These two conditions ensure that the actual quantizer for a given \a qti, - \a pli, and \a ci does not increase as \a qi increases. - This is not required by the decoder.*/ -typedef struct{ - /**The DC scaling factors.*/ - ogg_uint16_t dc_scale[64]; - /**The AC scaling factors.*/ - ogg_uint16_t ac_scale[64]; - /**The loop filter limit values.*/ - unsigned char loop_filter_limits[64]; - /**The \a qi ranges for each \a ci and \a pli.*/ - th_quant_ranges qi_ranges[2][3]; -}th_quant_info; - - - -/**The number of Huffman tables used by Theora.*/ -#define TH_NHUFFMAN_TABLES (80) -/**The number of DCT token values in each table.*/ -#define TH_NDCT_TOKENS (32) - -/**A Huffman code for a Theora DCT token. - * Each set of Huffman codes in a given table must form a complete, prefix-free - * code. - * There is no requirement that all the tokens in a table have a valid code, - * but the current encoder is not optimized to take advantage of this. - * If each of the five grouops of 16 tables does not contain at least one table - * with a code for every token, then the encoder may fail to encode certain - * frames. - * The complete table in the first group of 16 does not have to be in the same - * place as the complete table in the other groups, but the complete tables in - * the remaining four groups must all be in the same place.*/ -typedef struct{ - /**The bit pattern for the code, with the LSbit of the pattern aligned in - * the LSbit of the word.*/ - ogg_uint32_t pattern; - /**The number of bits in the code. - * This must be between 0 and 32, inclusive.*/ - int nbits; -}th_huff_code; - - - -/**\defgroup basefuncs Functions Shared by Encode and Decode*/ -/*@{*/ -/**\name Basic shared functions*/ -/*@{*/ -/**Retrieves a human-readable string to identify the library vendor and - * version. - * \return the version string.*/ -extern const char *th_version_string(void); -/**Retrieves the library version number. - * This is the highest bitstream version that the encoder library will produce, - * or that the decoder library can decode. - * This number is composed of a 16-bit major version, 8-bit minor version - * and 8 bit sub-version, composed as follows: - * \code - * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR) - * \endcode - * \return the version number.*/ -extern ogg_uint32_t th_version_number(void); -/**Converts a granule position to an absolute frame index, starting at - * 0. - * The granule position is interpreted in the context of a given - * #th_enc_ctx or #th_dec_ctx handle (either will suffice). - * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx - * handle. - * \param _granpos The granule position to convert. - * \returns The absolute frame index corresponding to \a _granpos. - * \retval -1 The given granule position was invalid (i.e. negative).*/ -extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos); -/**Converts a granule position to an absolute time in seconds. - * The granule position is interpreted in the context of a given - * #th_enc_ctx or #th_dec_ctx handle (either will suffice). - * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx - * handle. - * \param _granpos The granule position to convert. - * \return The absolute time in seconds corresponding to \a _granpos. - * This is the "end time" for the frame, or the latest time it should - * be displayed. - * It is not the presentation time. - * \retval -1 The given granule position was invalid (i.e. negative).*/ -extern double th_granule_time(void *_encdec,ogg_int64_t _granpos); -/**Determines whether a Theora packet is a header or not. - * This function does no verification beyond checking the packet type bit, so - * it should not be used for bitstream identification; use - * th_decode_headerin() for that. - * As per the Theora specification, an empty (0-byte) packet is treated as a - * data packet (a delta frame with no coded blocks). - * \param _op An ogg_packet containing encoded Theora data. - * \retval 1 The packet is a header packet - * \retval 0 The packet is a video data packet.*/ -extern int th_packet_isheader(ogg_packet *_op); -/**Determines whether a theora packet is a key frame or not. - * This function does no verification beyond checking the packet type and - * key frame bits, so it should not be used for bitstream identification; use - * th_decode_headerin() for that. - * As per the Theora specification, an empty (0-byte) packet is treated as a - * delta frame (with no coded blocks). - * \param _op An ogg_packet containing encoded Theora data. - * \retval 1 The packet contains a key frame. - * \retval 0 The packet contains a delta frame. - * \retval -1 The packet is not a video data packet.*/ -extern int th_packet_iskeyframe(ogg_packet *_op); -/*@}*/ - - -/**\name Functions for manipulating header data*/ -/*@{*/ -/**Initializes a th_info structure. - * This should be called on a freshly allocated #th_info structure before - * attempting to use it. - * \param _info The #th_info struct to initialize.*/ -extern void th_info_init(th_info *_info); -/**Clears a #th_info structure. - * This should be called on a #th_info structure after it is no longer - * needed. - * \param _info The #th_info struct to clear.*/ -extern void th_info_clear(th_info *_info); - -/**Initialize a #th_comment structure. - * This should be called on a freshly allocated #th_comment structure - * before attempting to use it. - * \param _tc The #th_comment struct to initialize.*/ -extern void th_comment_init(th_comment *_tc); -/**Add a comment to an initialized #th_comment structure. - * \note Neither th_comment_add() nor th_comment_add_tag() support - * comments containing null values, although the bitstream format does - * support them. - * To add such comments you will need to manipulate the #th_comment - * structure directly. - * \param _tc The #th_comment struct to add the comment to. - * \param _comment Must be a null-terminated UTF-8 string containing the - * comment in "TAG=the value" form.*/ -extern void th_comment_add(th_comment *_tc, char *_comment); -/**Add a comment to an initialized #th_comment structure. - * \note Neither th_comment_add() nor th_comment_add_tag() support - * comments containing null values, although the bitstream format does - * support them. - * To add such comments you will need to manipulate the #th_comment - * structure directly. - * \param _tc The #th_comment struct to add the comment to. - * \param _tag A null-terminated string containing the tag associated with - * the comment. - * \param _val The corresponding value as a null-terminated string.*/ -extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val); -/**Look up a comment value by its tag. - * \param _tc An initialized #th_comment structure. - * \param _tag The tag to look up. - * \param _count The instance of the tag. - * The same tag can appear multiple times, each with a distinct - * value, so an index is required to retrieve them all. - * The order in which these values appear is significant and - * should be preserved. - * Use th_comment_query_count() to get the legal range for - * the \a _count parameter. - * \return A pointer to the queried tag's value. - * This points directly to data in the #th_comment structure. - * It should not be modified or freed by the application, and - * modifications to the structure may invalidate the pointer. - * \retval NULL If no matching tag is found.*/ -extern char *th_comment_query(th_comment *_tc,char *_tag,int _count); -/**Look up the number of instances of a tag. - * Call this first when querying for a specific tag and then iterate over the - * number of instances with separate calls to th_comment_query() to - * retrieve all the values for that tag in order. - * \param _tc An initialized #th_comment structure. - * \param _tag The tag to look up. - * \return The number on instances of this particular tag.*/ -extern int th_comment_query_count(th_comment *_tc,char *_tag); -/**Clears a #th_comment structure. - * This should be called on a #th_comment structure after it is no longer - * needed. - * It will free all memory used by the structure members. - * \param _tc The #th_comment struct to clear.*/ -extern void th_comment_clear(th_comment *_tc); -/*@}*/ -/*@}*/ - - - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/drivers/theora/cpu.c b/drivers/theora/cpu.c deleted file mode 100644 index a863aad7f3..0000000000 --- a/drivers/theora/cpu.c +++ /dev/null @@ -1,226 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - CPU capability detection for x86 processors. - Originally written by Rudolf Marek. - - function: - last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include "cpu.h" - -#if !defined(OC_X86_ASM) -static ogg_uint32_t oc_cpu_flags_get(void){ - return 0; -} -#else -# if !defined(_MSC_VER) -# if defined(__amd64__)||defined(__x86_64__) -/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when - compiling with -fPIC.*/ -# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ - __asm__ __volatile__( \ - "cpuid\n\t" \ - :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ - :"a"(_op) \ - :"cc" \ - ) -# else -/*On x86-32, not so much.*/ -# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ - __asm__ __volatile__( \ - "xchgl %%ebx,%[ebx]\n\t" \ - "cpuid\n\t" \ - "xchgl %%ebx,%[ebx]\n\t" \ - :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ - :"a"(_op) \ - :"cc" \ - ) -# endif -# else -/*Why does MSVC need this complicated rigamarole? - At this point I honestly do not care.*/ - -/*Visual C cpuid helper function. - For VS2005 we could as well use the _cpuid builtin, but that wouldn't work - for VS2003 users, so we do it in inline assembler.*/ -static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){ - _asm{ - mov eax,[_op] - mov esi,_cpu_info - cpuid - mov [esi+0],eax - mov [esi+4],ebx - mov [esi+8],ecx - mov [esi+12],edx - } -} - -# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ - do{ \ - ogg_uint32_t cpu_info[4]; \ - oc_cpuid_helper(cpu_info,_op); \ - (_eax)=cpu_info[0]; \ - (_ebx)=cpu_info[1]; \ - (_ecx)=cpu_info[2]; \ - (_edx)=cpu_info[3]; \ - }while(0) - -static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){ - _asm{ - pushfd - pushfd - pop eax - mov ebx,eax - xor eax,200000h - push eax - popfd - pushfd - pop eax - popfd - mov ecx,_eax - mov [ecx],eax - mov ecx,_ebx - mov [ecx],ebx - } -} -# endif - -static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ - ogg_uint32_t flags; - /*If there isn't even MMX, give up.*/ - if(!(_edx&0x00800000))return 0; - flags=OC_CPU_X86_MMX; - if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; - if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2; - if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI; - if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3; - if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1; - if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2; - return flags; -} - -static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ - ogg_uint32_t flags; - /*If there isn't even MMX, give up.*/ - if(!(_edx&0x00800000))return 0; - flags=OC_CPU_X86_MMX; - if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; - if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW; - if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; - if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A; - if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5; - return flags; -} - -static ogg_uint32_t oc_cpu_flags_get(void){ - ogg_uint32_t flags; - ogg_uint32_t eax; - ogg_uint32_t ebx; - ogg_uint32_t ecx; - ogg_uint32_t edx; -# if !defined(__amd64__)&&!defined(__x86_64__) - /*Not all x86-32 chips support cpuid, so we have to check.*/ -# if !defined(_MSC_VER) - __asm__ __volatile__( - "pushfl\n\t" - "pushfl\n\t" - "popl %[a]\n\t" - "movl %[a],%[b]\n\t" - "xorl $0x200000,%[a]\n\t" - "pushl %[a]\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %[a]\n\t" - "popfl\n\t" - :[a]"=r"(eax),[b]"=r"(ebx) - : - :"cc" - ); -# else - oc_detect_cpuid_helper(&eax,&ebx); -# endif - /*No cpuid.*/ - if(eax==ebx)return 0; -# endif - cpuid(0,eax,ebx,ecx,edx); - /* l e t n I e n i u n e G*/ - if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547|| - /* 6 8 x M T e n i u n e G*/ - ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){ - /*Intel, Transmeta (tested with Crusoe TM5800):*/ - cpuid(1,eax,ebx,ecx,edx); - flags=oc_parse_intel_flags(edx,ecx); - } - /* D M A c i t n e h t u A*/ - else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| - /* C S N y b e d o e G*/ - ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){ - /*AMD, Geode:*/ - cpuid(0x80000000,eax,ebx,ecx,edx); - if(eax<0x80000001)flags=0; - else{ - cpuid(0x80000001,eax,ebx,ecx,edx); - flags=oc_parse_amd_flags(edx,ecx); - } - /*Also check for SSE.*/ - cpuid(1,eax,ebx,ecx,edx); - flags|=oc_parse_intel_flags(edx,ecx); - } - /*Technically some VIA chips can be configured in the BIOS to return any - string here the user wants. - There is a special detection method that can be used to identify such - processors, but in my opinion, if the user really wants to change it, they - deserve what they get.*/ - /* s l u a H r u a t n e C*/ - else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){ - /*VIA:*/ - /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming) - chips (thanks to the engineers from Centaur Technology who provided it). - These chips support Intel-like cpuid info. - The C3-2 (Nehemiah) cores appear to, as well.*/ - cpuid(1,eax,ebx,ecx,edx); - flags=oc_parse_intel_flags(edx,ecx); - if(eax>=0x80000001){ - /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. - We need to check this even if the Intel test succeeds to pick up 3DNow! - support on these processors. - Unlike actual AMD processors, we cannot _rely_ on this info, since - some cores (e.g., the 693 stepping of the Nehemiah) claim to support - this function, yet return edx=0, despite the Intel test indicating - MMX support. - Therefore the features detected here are strictly added to those - detected by the Intel test.*/ - /*TODO: How about earlier chips?*/ - cpuid(0x80000001,eax,ebx,ecx,edx); - /*Note: As of the C7, this function returns Intel-style extended feature - flags, not AMD-style. - Currently, this only defines bits 11, 20, and 29 (0x20100800), which - do not conflict with any of the AMD flags we inspect. - For the remaining bits, Intel tells us, "Do not count on their value", - but VIA assures us that they will all be zero (at least on the C7 and - Isaiah chips). - In the (unlikely) event a future processor uses bits 18, 19, 30, or 31 - (0xC0C00000) for something else, we will have to add code to detect - the model to decide when it is appropriate to inspect them.*/ - flags|=oc_parse_amd_flags(edx,ecx); - } - } - else{ - /*Implement me.*/ - flags=0; - } - return flags; -} -#endif diff --git a/drivers/theora/cpu.h b/drivers/theora/cpu.h deleted file mode 100644 index a43c957a39..0000000000 --- a/drivers/theora/cpu.h +++ /dev/null @@ -1,34 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - function: - last mod: $Id: cpu.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#if !defined(_x86_cpu_H) -# define _x86_cpu_H (1) -#include "internal.h" - -#define OC_CPU_X86_MMX (1<<0) -#define OC_CPU_X86_3DNOW (1<<1) -#define OC_CPU_X86_3DNOWEXT (1<<2) -#define OC_CPU_X86_MMXEXT (1<<3) -#define OC_CPU_X86_SSE (1<<4) -#define OC_CPU_X86_SSE2 (1<<5) -#define OC_CPU_X86_PNI (1<<6) -#define OC_CPU_X86_SSSE3 (1<<7) -#define OC_CPU_X86_SSE4_1 (1<<8) -#define OC_CPU_X86_SSE4_2 (1<<9) -#define OC_CPU_X86_SSE4A (1<<10) -#define OC_CPU_X86_SSE5 (1<<11) - -#endif diff --git a/drivers/theora/dct.h b/drivers/theora/dct.h deleted file mode 100644 index 24ba6f111a..0000000000 --- a/drivers/theora/dct.h +++ /dev/null @@ -1,31 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -/*Definitions shared by the forward and inverse DCT transforms.*/ -#if !defined(_dct_H) -# define _dct_H (1) - -/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/ -#define OC_C1S7 ((ogg_int32_t)64277) -#define OC_C2S6 ((ogg_int32_t)60547) -#define OC_C3S5 ((ogg_int32_t)54491) -#define OC_C4S4 ((ogg_int32_t)46341) -#define OC_C5S3 ((ogg_int32_t)36410) -#define OC_C6S2 ((ogg_int32_t)25080) -#define OC_C7S1 ((ogg_int32_t)12785) - -#endif diff --git a/drivers/theora/decapiwrapper.c b/drivers/theora/decapiwrapper.c deleted file mode 100644 index 12ea475d17..0000000000 --- a/drivers/theora/decapiwrapper.c +++ /dev/null @@ -1,193 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $ - - ********************************************************************/ - -#include -#include -#include -#include "apiwrapper.h" -#include "decint.h" -#include "theora/theoradec.h" - -static void th_dec_api_clear(th_api_wrapper *_api){ - if(_api->setup)th_setup_free(_api->setup); - if(_api->decode)th_decode_free(_api->decode); - memset(_api,0,sizeof(*_api)); -} - -static void theora_decode_clear(theora_state *_td){ - if(_td->i!=NULL)theora_info_clear(_td->i); - memset(_td,0,sizeof(*_td)); -} - -static int theora_decode_control(theora_state *_td,int _req, - void *_buf,size_t _buf_sz){ - return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode, - _req,_buf,_buf_sz); -} - -static ogg_int64_t theora_decode_granule_frame(theora_state *_td, - ogg_int64_t _gp){ - return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); -} - -static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){ - return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); -} - -static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={ - (oc_state_clear_func)theora_decode_clear, - (oc_state_control_func)theora_decode_control, - (oc_state_granule_frame_func)theora_decode_granule_frame, - (oc_state_granule_time_func)theora_decode_granule_time, -}; - -static void th_info2theora_info(theora_info *_ci,const th_info *_info){ - _ci->version_major=_info->version_major; - _ci->version_minor=_info->version_minor; - _ci->version_subminor=_info->version_subminor; - _ci->width=_info->frame_width; - _ci->height=_info->frame_height; - _ci->frame_width=_info->pic_width; - _ci->frame_height=_info->pic_height; - _ci->offset_x=_info->pic_x; - _ci->offset_y=_info->pic_y; - _ci->fps_numerator=_info->fps_numerator; - _ci->fps_denominator=_info->fps_denominator; - _ci->aspect_numerator=_info->aspect_numerator; - _ci->aspect_denominator=_info->aspect_denominator; - switch(_info->colorspace){ - case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break; - case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break; - default:_ci->colorspace=OC_CS_UNSPECIFIED;break; - } - switch(_info->pixel_fmt){ - case TH_PF_420:_ci->pixelformat=OC_PF_420;break; - case TH_PF_422:_ci->pixelformat=OC_PF_422;break; - case TH_PF_444:_ci->pixelformat=OC_PF_444;break; - default:_ci->pixelformat=OC_PF_RSVD; - } - _ci->target_bitrate=_info->target_bitrate; - _ci->quality=_info->quality; - _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift; -} - -int theora_decode_init(theora_state *_td,theora_info *_ci){ - th_api_info *apiinfo; - th_api_wrapper *api; - th_info info; - api=(th_api_wrapper *)_ci->codec_setup; - /*Allocate our own combined API wrapper/theora_info struct. - We put them both in one malloc'd block so that when the API wrapper is - freed, the info struct goes with it. - This avoids having to figure out whether or not we need to free the info - struct in either theora_info_clear() or theora_clear().*/ - apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo)); - if(apiinfo==NULL)return OC_FAULT; - /*Make our own copy of the info struct, since its lifetime should be - independent of the one we were passed in.*/ - *&apiinfo->info=*_ci; - /*Convert the info struct now instead of saving the the one we decoded with - theora_decode_header(), since the user might have modified values (i.e., - color space, aspect ratio, etc. can be specified from a higher level). - The user also might be doing something "clever" with the header packets if - they are not using an Ogg encapsulation.*/ - oc_theora_info2th_info(&info,_ci); - /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy - of the stuff it needs.*/ - apiinfo->api.decode=th_decode_alloc(&info,api->setup); - if(apiinfo->api.decode==NULL){ - _ogg_free(apiinfo); - return OC_EINVAL; - } - apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear; - _td->internal_encode=NULL; - /*Provide entry points for ABI compatibility with old decoder shared libs.*/ - _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL; - _td->granulepos=0; - _td->i=&apiinfo->info; - _td->i->codec_setup=&apiinfo->api; - return 0; -} - -int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){ - th_api_wrapper *api; - th_info info; - int ret; - api=(th_api_wrapper *)_ci->codec_setup; - /*Allocate an API wrapper struct on demand, since it will not also include a - theora_info struct like the ones that are used in a theora_state struct.*/ - if(api==NULL){ - _ci->codec_setup=_ogg_calloc(1,sizeof(*api)); - if(_ci->codec_setup==NULL)return OC_FAULT; - api=(th_api_wrapper *)_ci->codec_setup; - api->clear=(oc_setup_clear_func)th_dec_api_clear; - } - /*Convert from the theora_info struct instead of saving our own th_info - struct between calls. - The user might be doing something "clever" with the header packets if they - are not using an Ogg encapsulation, and we don't want to break this.*/ - oc_theora_info2th_info(&info,_ci); - /*We rely on the fact that theora_comment and th_comment structures are - actually identical. - Take care not to change this fact unless you change the code here as - well!*/ - ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op); - /*We also rely on the fact that the error return code values are the same, - and that the implementations of these two functions return the same set of - them. - Note that theora_decode_header() really can return OC_NOTFORMAT, even - though it is not currently documented to do so.*/ - if(ret<0)return ret; - th_info2theora_info(_ci,&info); - return 0; -} - -int theora_decode_packetin(theora_state *_td,ogg_packet *_op){ - th_api_wrapper *api; - ogg_int64_t gp; - int ret; - if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; - api=(th_api_wrapper *)_td->i->codec_setup; - ret=th_decode_packetin(api->decode,_op,&gp); - if(ret<0)return OC_BADPACKET; - _td->granulepos=gp; - return 0; -} - -int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){ - th_api_wrapper *api; - th_dec_ctx *decode; - th_ycbcr_buffer buf; - int ret; - if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; - api=(th_api_wrapper *)_td->i->codec_setup; - decode=(th_dec_ctx *)api->decode; - if(!decode)return OC_FAULT; - ret=th_decode_ycbcr_out(decode,buf); - if(ret>=0){ - _yuv->y_width=buf[0].width; - _yuv->y_height=buf[0].height; - _yuv->y_stride=buf[0].stride; - _yuv->uv_width=buf[1].width; - _yuv->uv_height=buf[1].height; - _yuv->uv_stride=buf[1].stride; - _yuv->y=buf[0].data; - _yuv->u=buf[1].data; - _yuv->v=buf[2].data; - } - return ret; -} diff --git a/drivers/theora/decinfo.c b/drivers/theora/decinfo.c deleted file mode 100644 index 845eb1361c..0000000000 --- a/drivers/theora/decinfo.c +++ /dev/null @@ -1,246 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#include -#include -#include "decint.h" - - - -/*Unpacks a series of octets from a given byte array into the pack buffer. - No checking is done to ensure the buffer contains enough data. - _opb: The pack buffer to read the octets from. - _buf: The byte array to store the unpacked bytes in. - _len: The number of octets to unpack.*/ -static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){ - while(_len-->0){ - long val; - val=oc_pack_read(_opb,8); - *_buf++=(char)val; - } -} - -/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/ -static long oc_unpack_length(oc_pack_buf *_opb){ - long ret[4]; - int i; - for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8); - return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24; -} - -static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){ - long val; - /*Check the codec bitstream version.*/ - val=oc_pack_read(_opb,8); - _info->version_major=(unsigned char)val; - val=oc_pack_read(_opb,8); - _info->version_minor=(unsigned char)val; - val=oc_pack_read(_opb,8); - _info->version_subminor=(unsigned char)val; - /*verify we can parse this bitstream version. - We accept earlier minors and all subminors, by spec*/ - if(_info->version_major>TH_VERSION_MAJOR|| - _info->version_major==TH_VERSION_MAJOR&& - _info->version_minor>TH_VERSION_MINOR){ - return TH_EVERSION; - } - /*Read the encoded frame description.*/ - val=oc_pack_read(_opb,16); - _info->frame_width=(ogg_uint32_t)val<<4; - val=oc_pack_read(_opb,16); - _info->frame_height=(ogg_uint32_t)val<<4; - val=oc_pack_read(_opb,24); - _info->pic_width=(ogg_uint32_t)val; - val=oc_pack_read(_opb,24); - _info->pic_height=(ogg_uint32_t)val; - val=oc_pack_read(_opb,8); - _info->pic_x=(ogg_uint32_t)val; - val=oc_pack_read(_opb,8); - _info->pic_y=(ogg_uint32_t)val; - val=oc_pack_read(_opb,32); - _info->fps_numerator=(ogg_uint32_t)val; - val=oc_pack_read(_opb,32); - _info->fps_denominator=(ogg_uint32_t)val; - if(_info->frame_width==0||_info->frame_height==0|| - _info->pic_width+_info->pic_x>_info->frame_width|| - _info->pic_height+_info->pic_y>_info->frame_height|| - _info->fps_numerator==0||_info->fps_denominator==0){ - return TH_EBADHEADER; - } - /*Note: The sense of pic_y is inverted in what we pass back to the - application compared to how it is stored in the bitstream. - This is because the bitstream uses a right-handed coordinate system, while - applications expect a left-handed one.*/ - _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y; - val=oc_pack_read(_opb,24); - _info->aspect_numerator=(ogg_uint32_t)val; - val=oc_pack_read(_opb,24); - _info->aspect_denominator=(ogg_uint32_t)val; - val=oc_pack_read(_opb,8); - _info->colorspace=(th_colorspace)val; - val=oc_pack_read(_opb,24); - _info->target_bitrate=(int)val; - val=oc_pack_read(_opb,6); - _info->quality=(int)val; - val=oc_pack_read(_opb,5); - _info->keyframe_granule_shift=(int)val; - val=oc_pack_read(_opb,2); - _info->pixel_fmt=(th_pixel_fmt)val; - if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER; - val=oc_pack_read(_opb,3); - if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; - return 0; -} - -static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){ - long len; - int i; - /*Read the vendor string.*/ - len=oc_unpack_length(_opb); - if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER; - _tc->vendor=_ogg_malloc((size_t)len+1); - if(_tc->vendor==NULL)return TH_EFAULT; - oc_unpack_octets(_opb,_tc->vendor,len); - _tc->vendor[len]='\0'; - /*Read the user comments.*/ - _tc->comments=(int)oc_unpack_length(_opb); - len=_tc->comments; - if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){ - _tc->comments=0; - return TH_EBADHEADER; - } - _tc->comment_lengths=(int *)_ogg_malloc( - _tc->comments*sizeof(_tc->comment_lengths[0])); - _tc->user_comments=(char **)_ogg_malloc( - _tc->comments*sizeof(_tc->user_comments[0])); - for(i=0;i<_tc->comments;i++){ - len=oc_unpack_length(_opb); - if(len<0||len>oc_pack_bytes_left(_opb)){ - _tc->comments=i; - return TH_EBADHEADER; - } - _tc->comment_lengths[i]=len; - _tc->user_comments[i]=_ogg_malloc((size_t)len+1); - if(_tc->user_comments[i]==NULL){ - _tc->comments=i; - return TH_EFAULT; - } - oc_unpack_octets(_opb,_tc->user_comments[i],len); - _tc->user_comments[i][len]='\0'; - } - return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0; -} - -static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){ - int ret; - /*Read the quantizer tables.*/ - ret=oc_quant_params_unpack(_opb,&_setup->qinfo); - if(ret<0)return ret; - /*Read the Huffman trees.*/ - return oc_huff_trees_unpack(_opb,_setup->huff_tables); -} - -static void oc_setup_clear(th_setup_info *_setup){ - oc_quant_params_clear(&_setup->qinfo); - oc_huff_trees_clear(_setup->huff_tables); -} - -static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info, - th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){ - char buffer[6]; - long val; - int packtype; - int ret; - val=oc_pack_read(_opb,8); - packtype=(int)val; - /*If we're at a data packet and we have received all three headers, we're - done.*/ - if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){ - return 0; - } - /*Check the codec string.*/ - oc_unpack_octets(_opb,buffer,6); - if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT; - switch(packtype){ - /*Codec info header.*/ - case 0x80:{ - /*This should be the first packet, and we should not already be - initialized.*/ - if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER; - ret=oc_info_unpack(_opb,_info); - if(ret<0)th_info_clear(_info); - else ret=3; - }break; - /*Comment header.*/ - case 0x81:{ - if(_tc==NULL)return TH_EFAULT; - /*We shoud have already decoded the info header, and should not yet have - decoded the comment header.*/ - if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER; - ret=oc_comment_unpack(_opb,_tc); - if(ret<0)th_comment_clear(_tc); - else ret=2; - }break; - /*Codec setup header.*/ - case 0x82:{ - oc_setup_info *setup; - if(_tc==NULL||_setup==NULL)return TH_EFAULT; - /*We should have already decoded the info header and the comment header, - and should not yet have decoded the setup header.*/ - if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){ - return TH_EBADHEADER; - } - setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup)); - if(setup==NULL)return TH_EFAULT; - ret=oc_setup_unpack(_opb,setup); - if(ret<0){ - oc_setup_clear(setup); - _ogg_free(setup); - } - else{ - *_setup=setup; - ret=1; - } - }break; - default:{ - /*We don't know what this header is.*/ - return TH_EBADHEADER; - }break; - } - return ret; -} - - -/*Decodes one header packet. - This should be called repeatedly with the packets at the beginning of the - stream until it returns 0.*/ -int th_decode_headerin(th_info *_info,th_comment *_tc, - th_setup_info **_setup,ogg_packet *_op){ - oc_pack_buf opb; - if(_op==NULL)return TH_EBADHEADER; - if(_info==NULL)return TH_EFAULT; - oc_pack_readinit(&opb,_op->packet,_op->bytes); - return oc_dec_headerin(&opb,_info,_tc,_setup,_op); -} - -void th_setup_free(th_setup_info *_setup){ - if(_setup!=NULL){ - oc_setup_clear(_setup); - _ogg_free(_setup); - } -} diff --git a/drivers/theora/decint.h b/drivers/theora/decint.h deleted file mode 100644 index 261b67631a..0000000000 --- a/drivers/theora/decint.h +++ /dev/null @@ -1,107 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#if !defined(_decint_H) -# define _decint_H (1) -# include "theora/theoradec.h" -# include "internal.h" -# include "bitpack.h" - -typedef struct th_setup_info oc_setup_info; -typedef struct th_dec_ctx oc_dec_ctx; - -# include "huffdec.h" -# include "dequant.h" - -/*Constants for the packet-in state machine specific to the decoder.*/ - -/*Next packet to read: Data packet.*/ -#define OC_PACKET_DATA (0) - - - -struct th_setup_info{ - /*The Huffman codes.*/ - oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; - /*The quantization parameters.*/ - th_quant_info qinfo; -}; - - - -struct th_dec_ctx{ - /*Shared encoder/decoder state.*/ - oc_theora_state state; - /*Whether or not packets are ready to be emitted. - This takes on negative values while there are remaining header packets to - be emitted, reaches 0 when the codec is ready for input, and goes to 1 - when a frame has been processed and a data packet is ready.*/ - int packet_state; - /*Buffer in which to assemble packets.*/ - oc_pack_buf opb; - /*Huffman decode trees.*/ - oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; - /*The index of the first token in each plane for each coefficient.*/ - ptrdiff_t ti0[3][64]; - /*The number of outstanding EOB runs at the start of each coefficient in each - plane.*/ - ptrdiff_t eob_runs[3][64]; - /*The DCT token lists.*/ - unsigned char *dct_tokens; - /*The extra bits associated with DCT tokens.*/ - unsigned char *extra_bits; - /*The number of dct tokens unpacked so far.*/ - int dct_tokens_count; - /*The out-of-loop post-processing level.*/ - int pp_level; - /*The DC scale used for out-of-loop deblocking.*/ - int pp_dc_scale[64]; - /*The sharpen modifier used for out-of-loop deringing.*/ - int pp_sharp_mod[64]; - /*The DC quantization index of each block.*/ - unsigned char *dc_qis; - /*The variance of each block.*/ - int *variances; - /*The storage for the post-processed frame buffer.*/ - unsigned char *pp_frame_data; - /*Whether or not the post-processsed frame buffer has space for chroma.*/ - int pp_frame_state; - /*The buffer used for the post-processed frame. - Note that this is _not_ guaranteed to have the same strides and offsets as - the reference frame buffers.*/ - th_ycbcr_buffer pp_frame_buf; - /*The striped decode callback function.*/ - th_stripe_callback stripe_cb; -# if defined(HAVE_CAIRO) - /*Output metrics for debugging.*/ - int telemetry; - int telemetry_mbmode; - int telemetry_mv; - int telemetry_qi; - int telemetry_bits; - int telemetry_frame_bytes; - int telemetry_coding_bytes; - int telemetry_mode_bytes; - int telemetry_mv_bytes; - int telemetry_qi_bytes; - int telemetry_dc_bytes; - unsigned char *telemetry_frame_data; -# endif -}; - -#endif diff --git a/drivers/theora/decode.c b/drivers/theora/decode.c deleted file mode 100644 index 7be66463d8..0000000000 --- a/drivers/theora/decode.c +++ /dev/null @@ -1,2943 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $ - - ********************************************************************/ - -#include -#include -#include -#include "decint.h" -#if defined(OC_DUMP_IMAGES) -# include -# include "png.h" -#endif -#if defined(HAVE_CAIRO) -# include -#endif - - -/*No post-processing.*/ -#define OC_PP_LEVEL_DISABLED (0) -/*Keep track of DC qi for each block only.*/ -#define OC_PP_LEVEL_TRACKDCQI (1) -/*Deblock the luma plane.*/ -#define OC_PP_LEVEL_DEBLOCKY (2) -/*Dering the luma plane.*/ -#define OC_PP_LEVEL_DERINGY (3) -/*Stronger luma plane deringing.*/ -#define OC_PP_LEVEL_SDERINGY (4) -/*Deblock the chroma planes.*/ -#define OC_PP_LEVEL_DEBLOCKC (5) -/*Dering the chroma planes.*/ -#define OC_PP_LEVEL_DERINGC (6) -/*Stronger chroma plane deringing.*/ -#define OC_PP_LEVEL_SDERINGC (7) -/*Maximum valid post-processing level.*/ -#define OC_PP_LEVEL_MAX (7) - - - -/*The mode alphabets for the various mode coding schemes. - Scheme 0 uses a custom alphabet, which is not stored in this table.*/ -static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={ - /*Last MV dominates */ - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV, - OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV, - OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2, - OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV, - OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV, - OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR - }, - /*No MV dominates.*/ - { - OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2, - OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - { - OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST, - OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - }, - /*Default ordering.*/ - { - OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST, - OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, - OC_MODE_INTER_MV_FOUR - } -}; - - -/*The original DCT tokens are extended and reordered during the construction of - the Huffman tables. - The extension means more bits can be read with fewer calls to the bitpacker - during the Huffman decoding process (at the cost of larger Huffman tables), - and fewer tokens require additional extra bits (reducing the average storage - per decoded token). - The revised ordering reveals essential information in the token value - itself; specifically, whether or not there are additional extra bits to read - and the parameter to which those extra bits are applied. - The token is used to fetch a code word from the OC_DCT_CODE_WORD table below. - The extra bits are added into code word at the bit position inferred from the - token value, giving the final code word from which all required parameters - are derived. - The number of EOBs and the leading zero run length can be extracted directly. - The coefficient magnitude is optionally negated before extraction, according - to a 'flip' bit.*/ - -/*The number of additional extra bits that are decoded with each of the - internal DCT tokens.*/ -static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={ - 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6 -}; - -/*Whether or not an internal token needs any additional extra bits.*/ -#define OC_DCT_TOKEN_NEEDS_MORE(token) \ - (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \ - sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS))) - -/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/ -#define OC_DCT_TOKEN_FAT_EOB (0) - -/*The number of EOBs to use for an end-of-frame token. - Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which - is not yet available everywhere; this should be equivalent.*/ -#define OC_DCT_EOB_FINISH (~(size_t)0>>1) - -/*The location of the (6) run legth bits in the code word. - These are placed at index 0 and given 8 bits (even though 6 would suffice) - because it may be faster to extract the lower byte on some platforms.*/ -#define OC_DCT_CW_RLEN_SHIFT (0) -/*The location of the (12) EOB bits in the code word.*/ -#define OC_DCT_CW_EOB_SHIFT (8) -/*The location of the (1) flip bit in the code word. - This must be right under the magnitude bits.*/ -#define OC_DCT_CW_FLIP_BIT (20) -/*The location of the (11) token magnitude bits in the code word. - These must be last, and rely on a sign-extending right shift.*/ -#define OC_DCT_CW_MAG_SHIFT (21) - -/*Pack the given fields into a code word.*/ -#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \ - ((_eobs)<state,_info,3); - if(ret<0)return ret; - ret=oc_huff_trees_copy(_dec->huff_tables, - (const oc_huff_node *const *)_setup->huff_tables); - if(ret<0){ - oc_state_clear(&_dec->state); - return ret; - } - /*For each fragment, allocate one byte for every DCT coefficient token, plus - one byte for extra-bits for each token, plus one more byte for the long - EOB run, just in case it's the very last token and has a run length of - one.*/ - _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)* - _dec->state.nfrags*sizeof(_dec->dct_tokens[0])); - if(_dec->dct_tokens==NULL){ - oc_huff_trees_clear(_dec->huff_tables); - oc_state_clear(&_dec->state); - return TH_EFAULT; - } - for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ - _dec->state.dequant_tables[qi][pli][qti]= - _dec->state.dequant_table_data[qi][pli][qti]; - } - oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, - &_setup->qinfo); - for(qi=0;qi<64;qi++){ - int qsum; - qsum=0; - for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+ - _dec->state.dequant_tables[qti][pli][qi][17]+ - _dec->state.dequant_tables[qti][pli][qi][18]+ - _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0); - } - _dec->pp_sharp_mod[qi]=-(qsum>>11); - } - memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits, - sizeof(_dec->state.loop_filter_limits)); - _dec->pp_level=OC_PP_LEVEL_DISABLED; - _dec->dc_qis=NULL; - _dec->variances=NULL; - _dec->pp_frame_data=NULL; - _dec->stripe_cb.ctx=NULL; - _dec->stripe_cb.stripe_decoded=NULL; -#if defined(HAVE_CAIRO) - _dec->telemetry=0; - _dec->telemetry_bits=0; - _dec->telemetry_qi=0; - _dec->telemetry_mbmode=0; - _dec->telemetry_mv=0; - _dec->telemetry_frame_data=NULL; -#endif - return 0; -} - -static void oc_dec_clear(oc_dec_ctx *_dec){ -#if defined(HAVE_CAIRO) - _ogg_free(_dec->telemetry_frame_data); -#endif - _ogg_free(_dec->pp_frame_data); - _ogg_free(_dec->variances); - _ogg_free(_dec->dc_qis); - _ogg_free(_dec->dct_tokens); - oc_huff_trees_clear(_dec->huff_tables); - oc_state_clear(&_dec->state); -} - - -static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ - long val; - /*Check to make sure this is a data packet.*/ - val=oc_pack_read1(&_dec->opb); - if(val!=0)return TH_EBADPACKET; - /*Read in the frame type (I or P).*/ - val=oc_pack_read1(&_dec->opb); - _dec->state.frame_type=(int)val; - /*Read in the qi list.*/ - val=oc_pack_read(&_dec->opb,6); - _dec->state.qis[0]=(unsigned char)val; - val=oc_pack_read1(&_dec->opb); - if(!val)_dec->state.nqis=1; - else{ - val=oc_pack_read(&_dec->opb,6); - _dec->state.qis[1]=(unsigned char)val; - val=oc_pack_read1(&_dec->opb); - if(!val)_dec->state.nqis=2; - else{ - val=oc_pack_read(&_dec->opb,6); - _dec->state.qis[2]=(unsigned char)val; - _dec->state.nqis=3; - } - } - if(_dec->state.frame_type==OC_INTRA_FRAME){ - /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. - Most of the other unused bits in the VP3 headers were eliminated. - I don't know why these remain.*/ - /*I wanted to eliminate wasted bits, but not all config wiggle room - --Monty.*/ - val=oc_pack_read(&_dec->opb,3); - if(val!=0)return TH_EIMPL; - } - return 0; -} - -/*Mark all fragments as coded and in OC_MODE_INTRA. - This also builds up the coded fragment list (in coded order), and clears the - uncoded fragment list. - It does not update the coded macro block list nor the super block flags, as - those are not used when decoding INTRA frames.*/ -static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){ - const oc_sb_map *sb_maps; - const oc_sb_flags *sb_flags; - oc_fragment *frags; - ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t prev_ncoded_fragis; - unsigned nsbs; - unsigned sbi; - int pli; - coded_fragis=_dec->state.coded_fragis; - prev_ncoded_fragis=ncoded_fragis=0; - sb_maps=(const oc_sb_map *)_dec->state.sb_maps; - sb_flags=_dec->state.sb_flags; - frags=_dec->state.frags; - sbi=nsbs=0; - for(pli=0;pli<3;pli++){ - nsbs+=_dec->state.fplanes[pli].nsbs; - for(;sbi=0){ - frags[fragi].coded=1; - frags[fragi].mb_mode=OC_MODE_INTRA; - coded_fragis[ncoded_fragis++]=fragi; - } - } - } - } - _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; - prev_ncoded_fragis=ncoded_fragis; - } - _dec->state.ntotal_coded_fragis=ncoded_fragis; -} - -/*Decodes the bit flags indicating whether each super block is partially coded - or not. - Return: The number of partially coded super blocks.*/ -static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ - oc_sb_flags *sb_flags; - unsigned nsbs; - unsigned sbi; - unsigned npartial; - unsigned run_count; - long val; - int flag; - val=oc_pack_read1(&_dec->opb); - flag=(int)val; - sb_flags=_dec->state.sb_flags; - nsbs=_dec->state.nsbs; - sbi=npartial=0; - while(sbiopb); - full_run=run_count>=4129; - do{ - sb_flags[sbi].coded_partially=flag; - sb_flags[sbi].coded_fully=0; - npartial+=flag; - sbi++; - } - while(--run_count>0&&sbiopb); - flag=(int)val; - } - else flag=!flag; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ - return npartial; -} - -/*Decodes the bit flags for whether or not each non-partially-coded super - block is fully coded or not. - This function should only be called if there is at least one - non-partially-coded super block. - Return: The number of partially coded super blocks.*/ -static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ - oc_sb_flags *sb_flags; - unsigned nsbs; - unsigned sbi; - unsigned run_count; - long val; - int flag; - sb_flags=_dec->state.sb_flags; - nsbs=_dec->state.nsbs; - /*Skip partially coded super blocks.*/ - for(sbi=0;sb_flags[sbi].coded_partially;sbi++); - val=oc_pack_read1(&_dec->opb); - flag=(int)val; - do{ - int full_run; - run_count=oc_sb_run_unpack(&_dec->opb); - full_run=run_count>=4129; - for(;sbiopb); - flag=(int)val; - } - else flag=!flag; - } - while(sbistate.nsbs)oc_dec_coded_sb_flags_unpack(_dec); - if(npartial>0){ - val=oc_pack_read1(&_dec->opb); - flag=!(int)val; - } - else flag=0; - sb_maps=(const oc_sb_map *)_dec->state.sb_maps; - sb_flags=_dec->state.sb_flags; - frags=_dec->state.frags; - sbi=nsbs=run_count=0; - coded_fragis=_dec->state.coded_fragis; - uncoded_fragis=coded_fragis+_dec->state.nfrags; - prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0; - for(pli=0;pli<3;pli++){ - nsbs+=_dec->state.fplanes[pli].nsbs; - for(;sbi=0){ - int coded; - if(sb_flags[sbi].coded_fully)coded=1; - else if(!sb_flags[sbi].coded_partially)coded=0; - else{ - if(run_count<=0){ - run_count=oc_block_run_unpack(&_dec->opb); - flag=!flag; - } - run_count--; - coded=flag; - } - if(coded)coded_fragis[ncoded_fragis++]=fragi; - else *(uncoded_fragis-++nuncoded_fragis)=fragi; - frags[fragi].coded=coded; - } - } - } - } - _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; - prev_ncoded_fragis=ncoded_fragis; - } - _dec->state.ntotal_coded_fragis=ncoded_fragis; - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ -} - - - -typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb); - -static int oc_vlc_mode_unpack(oc_pack_buf *_opb){ - long val; - int i; - for(i=0;i<7;i++){ - val=oc_pack_read1(_opb); - if(!val)break; - } - return i; -} - -static int oc_clc_mode_unpack(oc_pack_buf *_opb){ - long val; - val=oc_pack_read(_opb,3); - return (int)val; -} - -/*Unpacks the list of macro block modes for INTER frames.*/ -static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ - const oc_mb_map *mb_maps; - signed char *mb_modes; - const oc_fragment *frags; - const unsigned char *alphabet; - unsigned char scheme0_alphabet[8]; - oc_mode_unpack_func mode_unpack; - size_t nmbs; - size_t mbi; - long val; - int mode_scheme; - val=oc_pack_read(&_dec->opb,3); - mode_scheme=(int)val; - if(mode_scheme==0){ - int mi; - /*Just in case, initialize the modes to something. - If the bitstream doesn't contain each index exactly once, it's likely - corrupt and the rest of the packet is garbage anyway, but this way we - won't crash, and we'll decode SOMETHING.*/ - /*LOOP VECTORIZES*/ - for(mi=0;miopb,3); - scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; - } - alphabet=scheme0_alphabet; - } - else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; - if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack; - else mode_unpack=oc_vlc_mode_unpack; - mb_modes=_dec->state.mb_modes; - mb_maps=(const oc_mb_map *)_dec->state.mb_maps; - nmbs=_dec->state.nmbs; - frags=_dec->state.frags; - for(mbi=0;mbiopb)]; - /*There were none: INTER_NOMV is forced.*/ - else mb_modes[mbi]=OC_MODE_INTER_NOMV; - } - } -} - - - -typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb); - -static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){ - long bits; - int mask; - int mv; - bits=oc_pack_read(_opb,3); - switch(bits){ - case 0:return 0; - case 1:return 1; - case 2:return -1; - case 3: - case 4:{ - mv=(int)(bits-1); - bits=oc_pack_read1(_opb); - }break; - /*case 5: - case 6: - case 7:*/ - default:{ - mv=1<>1); - bits&=1; - }break; - } - mask=-(int)bits; - return mv+mask^mask; -} - -static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){ - long bits; - int mask; - int mv; - bits=oc_pack_read(_opb,6); - mv=(int)bits>>1; - mask=-((int)bits&1); - return mv+mask^mask; -} - -/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro - block modes and motion vectors to the individual fragments.*/ -static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ - const oc_mb_map *mb_maps; - const signed char *mb_modes; - oc_set_chroma_mvs_func set_chroma_mvs; - oc_mv_comp_unpack_func mv_comp_unpack; - oc_fragment *frags; - oc_mv *frag_mvs; - const unsigned char *map_idxs; - int map_nidxs; - oc_mv last_mv[2]; - oc_mv cbmvs[4]; - size_t nmbs; - size_t mbi; - long val; - set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; - val=oc_pack_read1(&_dec->opb); - mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack; - map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; - map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; - memset(last_mv,0,sizeof(last_mv)); - frags=_dec->state.frags; - frag_mvs=_dec->state.frag_mvs; - mb_maps=(const oc_mb_map *)_dec->state.mb_maps; - mb_modes=_dec->state.mb_modes; - nmbs=_dec->state.nmbs; - for(mbi=0;mbi>2][mapi&3]; - if(frags[fragi].coded)coded[ncoded++]=mapi; - } - while(++mapiiopb); - lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi])); - } - else lbmvs[bi][0]=lbmvs[bi][1]=0; - } - if(codedi>0){ - memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); - memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0])); - } - if(codedi>2][bi]; - frags[fragi].mb_mode=mb_mode; - memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi])); - } - } - }break; - case OC_MODE_INTER_MV:{ - memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); - mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - }break; - case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break; - case OC_MODE_INTER_MV_LAST2:{ - memcpy(mbmv,last_mv[1],sizeof(mbmv)); - memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); - memcpy(last_mv[0],mbmv,sizeof(last_mv[0])); - }break; - case OC_MODE_GOLDEN_MV:{ - mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - }break; - default:memset(mbmv,0,sizeof(mbmv));break; - } - /*4MV mode fills in the fragments itself. - For all other modes we can use this common code.*/ - if(mb_mode!=OC_MODE_INTER_MV_FOUR){ - for(codedi=0;codedi>2][mapi&3]; - frags[fragi].mb_mode=mb_mode; - memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv)); - } - } - } - } -} - -static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ - oc_fragment *frags; - const ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t fragii; - ptrdiff_t fragi; - ncoded_fragis=_dec->state.ntotal_coded_fragis; - if(ncoded_fragis<=0)return; - frags=_dec->state.frags; - coded_fragis=_dec->state.coded_fragis; - if(_dec->state.nqis==1){ - /*If this frame has only a single qi value, then just use it for all coded - fragments.*/ - for(fragii=0;fragiiopb); - flag=(int)val; - nqi1=0; - fragii=0; - while(fragiiopb); - full_run=run_count>=4129; - do{ - frags[coded_fragis[fragii++]].qii=flag; - nqi1+=flag; - } - while(--run_count>0&&fragiiopb); - flag=(int)val; - } - else flag=!flag; - } - /*TODO: run_count should be 0 here. - If it's not, we should issue a warning of some kind.*/ - /*If we have 3 different qi's for this frame, and there was at least one - fragment with a non-zero qi, make the second pass.*/ - if(_dec->state.nqis==3&&nqi1>0){ - /*Skip qii==0 fragments.*/ - for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++); - val=oc_pack_read1(&_dec->opb); - flag=(int)val; - do{ - int full_run; - run_count=oc_sb_run_unpack(&_dec->opb); - full_run=run_count>=4129; - for(;fragiiopb); - flag=(int)val; - } - else flag=!flag; - } - while(fragiidct_tokens; - frags=_dec->state.frags; - coded_fragis=_dec->state.coded_fragis; - ncoded_fragis=fragii=eobs=ti=0; - for(pli=0;pli<3;pli++){ - ptrdiff_t run_counts[64]; - ptrdiff_t eob_count; - ptrdiff_t eobi; - int rli; - ncoded_fragis+=_dec->state.ncoded_fragis[pli]; - memset(run_counts,0,sizeof(run_counts)); - _dec->eob_runs[pli][0]=eobs; - _dec->ti0[pli][0]=ti; - /*Continue any previous EOB run, if there was one.*/ - eobi=eobs; - if(ncoded_fragis-fragii0)frags[coded_fragis[fragii++]].dc=0; - while(fragiiopb, - _dec->huff_tables[_huff_idxs[pli+1>>1]]); - dct_tokens[ti++]=(unsigned char)token; - if(OC_DCT_TOKEN_NEEDS_MORE(token)){ - eb=(int)oc_pack_read(&_dec->opb, - OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); - dct_tokens[ti++]=(unsigned char)eb; - if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); - eb<<=OC_DCT_TOKEN_EB_POS(token); - } - else eb=0; - cw=OC_DCT_CODE_WORD[token]+eb; - eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; - if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH; - if(eobs){ - eobi=OC_MINI(eobs,ncoded_fragis-fragii); - eob_count+=eobi; - eobs-=eobi; - while(eobi-->0)frags[coded_fragis[fragii++]].dc=0; - } - else{ - int coeff; - skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); - cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; - if(skip)coeff=0; - run_counts[skip]++; - frags[coded_fragis[fragii++]].dc=coeff; - } - } - /*Add the total EOB count to the longest run length.*/ - run_counts[63]+=eob_count; - /*And convert the run_counts array to a moment table.*/ - for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; - /*Finally, subtract off the number of coefficients that have been - accounted for by runs started in this coefficient.*/ - for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli]; - } - _dec->dct_tokens_count=ti; - return eobs; -} - -/*Unpacks the AC coefficient tokens. - This can completely discard coefficient values while unpacking, and so is - somewhat simpler than unpacking the DC coefficient tokens. - _huff_idx: The index of the Huffman table to use for each color plane. - _ntoks_left: The number of tokens left to be decoded in each color plane for - each coefficient. - This is updated as EOB tokens and zero run tokens are decoded. - _eobs: The length of any outstanding EOB run from previous - coefficients. - Return: The length of any outstanding EOB run.*/ -static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2], - ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){ - unsigned char *dct_tokens; - ptrdiff_t ti; - int pli; - dct_tokens=_dec->dct_tokens; - ti=_dec->dct_tokens_count; - for(pli=0;pli<3;pli++){ - ptrdiff_t run_counts[64]; - ptrdiff_t eob_count; - size_t ntoks_left; - size_t ntoks; - int rli; - _dec->eob_runs[pli][_zzi]=_eobs; - _dec->ti0[pli][_zzi]=ti; - ntoks_left=_ntoks_left[pli][_zzi]; - memset(run_counts,0,sizeof(run_counts)); - eob_count=0; - ntoks=0; - while(ntoks+_eobsopb, - _dec->huff_tables[_huff_idxs[pli+1>>1]]); - dct_tokens[ti++]=(unsigned char)token; - if(OC_DCT_TOKEN_NEEDS_MORE(token)){ - eb=(int)oc_pack_read(&_dec->opb, - OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); - dct_tokens[ti++]=(unsigned char)eb; - if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); - eb<<=OC_DCT_TOKEN_EB_POS(token); - } - else eb=0; - cw=OC_DCT_CODE_WORD[token]+eb; - skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); - _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; - if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH; - if(_eobs==0){ - run_counts[skip]++; - ntoks++; - } - } - /*Add the portion of the last EOB run actually used by this coefficient.*/ - eob_count+=ntoks_left-ntoks; - /*And remove it from the remaining EOB count.*/ - _eobs-=ntoks_left-ntoks; - /*Add the total EOB count to the longest run length.*/ - run_counts[63]+=eob_count; - /*And convert the run_counts array to a moment table.*/ - for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; - /*Finally, subtract off the number of coefficients that have been - accounted for by runs started in this coefficient.*/ - for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli]; - } - _dec->dct_tokens_count=ti; - return _eobs; -} - -/*Tokens describing the DCT coefficients that belong to each fragment are - stored in the bitstream grouped by coefficient, not by fragment. - - This means that we either decode all the tokens in order, building up a - separate coefficient list for each fragment as we go, and then go back and - do the iDCT on each fragment, or we have to create separate lists of tokens - for each coefficient, so that we can pull the next token required off the - head of the appropriate list when decoding a specific fragment. - - The former was VP3's choice, and it meant 2*w*h extra storage for all the - decoded coefficient values. - - We take the second option, which lets us store just one to three bytes per - token (generally far fewer than the number of coefficients, due to EOB - tokens and zero runs), and which requires us to only maintain a counter for - each of the 64 coefficients, instead of a counter for every fragment to - determine where the next token goes. - - We actually use 3 counters per coefficient, one for each color plane, so we - can decode all color planes simultaneously. - This lets color conversion, etc., be done as soon as a full MCU (one or - two super block rows) is decoded, while the image data is still in cache.*/ - -static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){ - static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64}; - ptrdiff_t ntoks_left[3][64]; - int huff_idxs[2]; - ptrdiff_t eobs; - long val; - int pli; - int zzi; - int hgi; - for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ - ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli]; - } - val=oc_pack_read(&_dec->opb,4); - huff_idxs[0]=(int)val; - val=oc_pack_read(&_dec->opb,4); - huff_idxs[1]=(int)val; - _dec->eob_runs[0][0]=0; - eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left); -#if defined(HAVE_CAIRO) - _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb); -#endif - val=oc_pack_read(&_dec->opb,4); - huff_idxs[0]=(int)val; - val=oc_pack_read(&_dec->opb,4); - huff_idxs[1]=(int)val; - zzi=1; - for(hgi=1;hgi<5;hgi++){ - huff_idxs[0]+=16; - huff_idxs[1]+=16; - for(;zzipp_level<=OC_PP_LEVEL_DISABLED){ - if(_dec->dc_qis!=NULL){ - _ogg_free(_dec->dc_qis); - _dec->dc_qis=NULL; - _ogg_free(_dec->variances); - _dec->variances=NULL; - _ogg_free(_dec->pp_frame_data); - _dec->pp_frame_data=NULL; - } - return 1; - } - if(_dec->dc_qis==NULL){ - /*If we haven't been tracking DC quantization indices, there's no point in - starting now.*/ - if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1; - _dec->dc_qis=(unsigned char *)_ogg_malloc( - _dec->state.nfrags*sizeof(_dec->dc_qis[0])); - if(_dec->dc_qis==NULL)return 1; - memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags); - } - else{ - unsigned char *dc_qis; - const ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t fragii; - unsigned char qi0; - /*Update the DC quantization index of each coded block.*/ - dc_qis=_dec->dc_qis; - coded_fragis=_dec->state.coded_fragis; - ncoded_fragis=_dec->state.ncoded_fragis[0]+ - _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; - qi0=(unsigned char)_dec->state.qis[0]; - for(fragii=0;fragiipp_level<=OC_PP_LEVEL_TRACKDCQI){ - if(_dec->variances!=NULL){ - _ogg_free(_dec->variances); - _dec->variances=NULL; - _ogg_free(_dec->pp_frame_data); - _dec->pp_frame_data=NULL; - } - return 1; - } - if(_dec->variances==NULL){ - size_t frame_sz; - size_t c_sz; - int c_w; - int c_h; - frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; - c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); - c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); - c_sz=c_w*(size_t)c_h; - /*Allocate space for the chroma planes, even if we're not going to use - them; this simplifies allocation state management, though it may waste - memory on the few systems that don't overcommit pages.*/ - frame_sz+=c_sz<<1; - _dec->pp_frame_data=(unsigned char *)_ogg_malloc( - frame_sz*sizeof(_dec->pp_frame_data[0])); - _dec->variances=(int *)_ogg_malloc( - _dec->state.nfrags*sizeof(_dec->variances[0])); - if(_dec->variances==NULL||_dec->pp_frame_data==NULL){ - _ogg_free(_dec->pp_frame_data); - _dec->pp_frame_data=NULL; - _ogg_free(_dec->variances); - _dec->variances=NULL; - return 1; - } - /*Force an update of the PP buffer pointers.*/ - _dec->pp_frame_state=0; - } - /*Update the PP buffer pointers if necessary.*/ - if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){ - if(_dec->pp_levelpp_frame_buf[0].width=_dec->state.info.frame_width; - _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; - _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width; - _dec->pp_frame_buf[0].data=_dec->pp_frame_data+ - (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride; - } - else{ - size_t y_sz; - size_t c_sz; - int c_w; - int c_h; - /*Otherwise, set up pointers to all three PP planes.*/ - y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; - c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); - c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); - c_sz=c_w*(size_t)c_h; - _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; - _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; - _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width; - _dec->pp_frame_buf[0].data=_dec->pp_frame_data; - _dec->pp_frame_buf[1].width=c_w; - _dec->pp_frame_buf[1].height=c_h; - _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width; - _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz; - _dec->pp_frame_buf[2].width=c_w; - _dec->pp_frame_buf[2].height=c_h; - _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width; - _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz; - oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf); - } - _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC); - } - /*If we're not processing chroma, copy the reference frame's chroma planes.*/ - if(_dec->pp_levelpp_frame_buf+1, - _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1, - sizeof(_dec->pp_frame_buf[1])*2); - } - return 0; -} - - - -typedef struct{ - int bounding_values[256]; - ptrdiff_t ti[3][64]; - ptrdiff_t eob_runs[3][64]; - const ptrdiff_t *coded_fragis[3]; - const ptrdiff_t *uncoded_fragis[3]; - ptrdiff_t ncoded_fragis[3]; - ptrdiff_t nuncoded_fragis[3]; - const ogg_uint16_t *dequant[3][3][2]; - int fragy0[3]; - int fragy_end[3]; - int pred_last[3][3]; - int mcu_nvfrags; - int loop_filter; - int pp_level; -}oc_dec_pipeline_state; - - - -/*Initialize the main decoding pipeline.*/ -static void oc_dec_pipeline_init(oc_dec_ctx *_dec, - oc_dec_pipeline_state *_pipe){ - const ptrdiff_t *coded_fragis; - const ptrdiff_t *uncoded_fragis; - int pli; - int qii; - int qti; - /*If chroma is sub-sampled in the vertical direction, we have to decode two - super block rows of Y' for each super block row of Cb and Cr.*/ - _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2); - /*Initialize the token and extra bits indices for each plane and - coefficient.*/ - memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti)); - /*Also copy over the initial the EOB run counts.*/ - memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs)); - /*Set up per-plane pointers to the coded and uncoded fragments lists.*/ - coded_fragis=_dec->state.coded_fragis; - uncoded_fragis=coded_fragis+_dec->state.nfrags; - for(pli=0;pli<3;pli++){ - ptrdiff_t ncoded_fragis; - _pipe->coded_fragis[pli]=coded_fragis; - _pipe->uncoded_fragis[pli]=uncoded_fragis; - ncoded_fragis=_dec->state.ncoded_fragis[pli]; - coded_fragis+=ncoded_fragis; - uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags; - } - /*Set up condensed quantizer tables.*/ - for(pli=0;pli<3;pli++){ - for(qii=0;qii<_dec->state.nqis;qii++){ - for(qti=0;qti<2;qti++){ - _pipe->dequant[pli][qii][qti]= - _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti]; - } - } - } - /*Set the previous DC predictor to 0 for all color planes and frame types.*/ - memset(_pipe->pred_last,0,sizeof(_pipe->pred_last)); - /*Initialize the bounding value array for the loop filter.*/ - _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state, - _pipe->bounding_values); - /*Initialize any buffers needed for post-processing. - We also save the current post-processing level, to guard against the user - changing it from a callback.*/ - if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level; - /*If we don't have enough information to post-process, disable it, regardless - of the user-requested level.*/ - else{ - _pipe->pp_level=OC_PP_LEVEL_DISABLED; - memcpy(_dec->pp_frame_buf, - _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]], - sizeof(_dec->pp_frame_buf[0])*3); - } -} - -/*Undo the DC prediction in a single plane of an MCU (one or two super block - rows). - As a side effect, the number of coded and uncoded fragments in this plane of - the MCU is also computed.*/ -static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec, - oc_dec_pipeline_state *_pipe,int _pli){ - const oc_fragment_plane *fplane; - oc_fragment *frags; - int *pred_last; - ptrdiff_t ncoded_fragis; - ptrdiff_t fragi; - int fragx; - int fragy; - int fragy0; - int fragy_end; - int nhfrags; - /*Compute the first and last fragment row of the current MCU for this - plane.*/ - fplane=_dec->state.fplanes+_pli; - fragy0=_pipe->fragy0[_pli]; - fragy_end=_pipe->fragy_end[_pli]; - nhfrags=fplane->nhfrags; - pred_last=_pipe->pred_last[_pli]; - frags=_dec->state.frags; - ncoded_fragis=0; - fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags; - for(fragy=fragy0;fragy=nhfrags)ur_ref=-1; - else{ - ur_ref=u_frags[fragi+1].coded? - OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; - } - if(frags[fragi].coded){ - int pred; - int ref; - ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); - /*We break out a separate case based on which of our neighbors use - the same reference frames. - This is somewhat faster than trying to make a generic case which - handles all of them, since it reduces lots of poorly predicted - jumps to one switch statement, and also lets a number of the - multiplications be optimized out by strength reduction.*/ - switch((l_ref==ref)|(ul_ref==ref)<<1| - (u_ref==ref)<<2|(ur_ref==ref)<<3){ - default:pred=pred_last[ref];break; - case 1: - case 3:pred=frags[fragi-1].dc;break; - case 2:pred=u_frags[fragi-1].dc;break; - case 4: - case 6: - case 12:pred=u_frags[fragi].dc;break; - case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; - case 8:pred=u_frags[fragi+1].dc;break; - case 9: - case 11: - case 13:{ - pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; - }break; - case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; - case 14:{ - pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) - +10*u_frags[fragi].dc)/16; - }break; - case 7: - case 15:{ - int p0; - int p1; - int p2; - p0=frags[fragi-1].dc; - p1=u_frags[fragi-1].dc; - p2=u_frags[fragi].dc; - pred=(29*(p0+p2)-26*p1)/32; - if(abs(pred-p2)>128)pred=p2; - else if(abs(pred-p0)>128)pred=p0; - else if(abs(pred-p1)>128)pred=p1; - }break; - } - pred_last[ref]=frags[fragi].dc+=pred; - ncoded_fragis++; - l_ref=ref; - } - else l_ref=-1; - ul_ref=u_ref; - u_ref=ur_ref; - } - } - } - _pipe->ncoded_fragis[_pli]=ncoded_fragis; - /*Also save the number of uncoded fragments so we know how many to copy.*/ - _pipe->nuncoded_fragis[_pli]= - (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis; -} - -/*Reconstructs all coded fragments in a single MCU (one or two super block - rows). - This requires that each coded fragment have a proper macro block mode and - motion vector (if not in INTRA mode), and have it's DC value decoded, with - the DC prediction process reversed, and the number of coded and uncoded - fragments in this plane of the MCU be counted. - The token lists for each color plane and coefficient should also be filled - in, along with initial token offsets, extra bits offsets, and EOB run - counts.*/ -static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, - oc_dec_pipeline_state *_pipe,int _pli){ - unsigned char *dct_tokens; - const unsigned char *dct_fzig_zag; - ogg_uint16_t dc_quant[2]; - const oc_fragment *frags; - const ptrdiff_t *coded_fragis; - ptrdiff_t ncoded_fragis; - ptrdiff_t fragii; - ptrdiff_t *ti; - ptrdiff_t *eob_runs; - int qti; - dct_tokens=_dec->dct_tokens; - dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag; - frags=_dec->state.frags; - coded_fragis=_pipe->coded_fragis[_pli]; - ncoded_fragis=_pipe->ncoded_fragis[_pli]; - ti=_pipe->ti[_pli]; - eob_runs=_pipe->eob_runs[_pli]; - for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0]; - for(fragii=0;fragiidequant[_pli][frags[fragi].qii][qti]; - /*Decode the AC coefficients.*/ - for(zzi=0;zzi<64;){ - int token; - last_zzi=zzi; - if(eob_runs[zzi]){ - eob_runs[zzi]--; - break; - } - else{ - ptrdiff_t eob; - int cw; - int rlen; - int coeff; - int lti; - lti=ti[zzi]; - token=dct_tokens[lti++]; - cw=OC_DCT_CODE_WORD[token]; - /*These parts could be done branchless, but the branches are fairly - predictable and the C code translates into more than a few - instructions, so it's worth it to avoid them.*/ - if(OC_DCT_TOKEN_NEEDS_MORE(token)){ - cw+=dct_tokens[lti++]<>OC_DCT_CW_EOB_SHIFT&0xFFF; - if(token==OC_DCT_TOKEN_FAT_EOB){ - eob+=dct_tokens[lti++]<<8; - if(eob==0)eob=OC_DCT_EOB_FINISH; - } - rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); - cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; - eob_runs[zzi]=eob; - ti[zzi]=lti; - zzi+=rlen; - dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]); - zzi+=!eob; - } - } - /*TODO: zzi should be exactly 64 here. - If it's not, we should report some kind of warning.*/ - zzi=OC_MINI(zzi,64); - dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc; - /*last_zzi is always initialized. - If your compiler thinks otherwise, it is dumb.*/ - oc_state_frag_recon(&_dec->state,fragi,_pli, - dct_coeffs,last_zzi,dc_quant[qti]); - } - _pipe->coded_fragis[_pli]+=ncoded_fragis; - /*Right now the reconstructed MCU has only the coded blocks in it.*/ - /*TODO: We make the decision here to always copy the uncoded blocks into it - from the reference frame. - We could also copy the coded blocks back over the reference frame, if we - wait for an additional MCU to be decoded, which might be faster if only a - small number of blocks are coded. - However, this introduces more latency, creating a larger cache footprint. - It's unknown which decision is better, but this one results in simpler - code, and the hard case (high bitrate, high resolution) is handled - correctly.*/ - /*Copy the uncoded blocks from the previous reference frame.*/ - _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; - oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli], - _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); -} - -/*Filter a horizontal block edge.*/ -static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src,int _src_ystride,int _qstep,int _flimit, - int *_variance0,int *_variance1){ - unsigned char *rdst; - const unsigned char *rsrc; - unsigned char *cdst; - const unsigned char *csrc; - int r[10]; - int sum0; - int sum1; - int bx; - int by; - rdst=_dst; - rsrc=_src; - for(bx=0;bx<8;bx++){ - cdst=rdst; - csrc=rsrc; - for(by=0;by<10;by++){ - r[by]=*csrc; - csrc+=_src_ystride; - } - sum0=sum1=0; - for(by=0;by<4;by++){ - sum0+=abs(r[by+1]-r[by]); - sum1+=abs(r[by+5]-r[by+6]); - } - *_variance0+=OC_MINI(255,sum0); - *_variance1+=OC_MINI(255,sum1); - if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ - *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); - cdst+=_dst_ystride; - *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); - cdst+=_dst_ystride; - for(by=0;by<4;by++){ - *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+ - r[by+4]+r[by+5]+r[by+6]+4>>3); - cdst+=_dst_ystride; - } - *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); - cdst+=_dst_ystride; - *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); - } - else{ - for(by=1;by<=8;by++){ - *cdst=(unsigned char)r[by]; - cdst+=_dst_ystride; - } - } - rdst++; - rsrc++; - } -} - -/*Filter a vertical block edge.*/ -static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride, - int _qstep,int _flimit,int *_variances){ - unsigned char *rdst; - const unsigned char *rsrc; - unsigned char *cdst; - int r[10]; - int sum0; - int sum1; - int bx; - int by; - cdst=_dst; - for(by=0;by<8;by++){ - rsrc=cdst-1; - rdst=cdst; - for(bx=0;bx<10;bx++)r[bx]=*rsrc++; - sum0=sum1=0; - for(bx=0;bx<4;bx++){ - sum0+=abs(r[bx+1]-r[bx]); - sum1+=abs(r[bx+5]-r[bx+6]); - } - _variances[0]+=OC_MINI(255,sum0); - _variances[1]+=OC_MINI(255,sum1); - if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ - *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); - *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); - for(bx=0;bx<4;bx++){ - *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+ - r[bx+4]+r[bx+5]+r[bx+6]+4>>3); - } - *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); - *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); - } - cdst+=_dst_ystride; - } -} - -static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec, - th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0, - int _fragy_end){ - oc_fragment_plane *fplane; - int *variance; - unsigned char *dc_qi; - unsigned char *dst; - const unsigned char *src; - ptrdiff_t froffset; - int dst_ystride; - int src_ystride; - int nhfrags; - int width; - int notstart; - int notdone; - int flimit; - int qstep; - int y_end; - int y; - int x; - _dst+=_pli; - _src+=_pli; - fplane=_dec->state.fplanes+_pli; - nhfrags=fplane->nhfrags; - froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; - variance=_dec->variances+froffset; - dc_qi=_dec->dc_qis+froffset; - notstart=_fragy0>0; - notdone=_fragy_endnvfrags; - /*We want to clear an extra row of variances, except at the end.*/ - memset(variance+(nhfrags&-notstart),0, - (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0]))); - /*Except for the first time, we want to point to the middle of the row.*/ - y=(_fragy0<<3)+(notstart<<2); - dst_ystride=_dst->stride; - src_ystride=_src->stride; - dst=_dst->data+y*(ptrdiff_t)dst_ystride; - src=_src->data+y*(ptrdiff_t)src_ystride; - width=_dst->width; - for(;y<4;y++){ - memcpy(dst,src,width*sizeof(dst[0])); - dst+=dst_ystride; - src+=src_ystride; - } - /*We also want to skip the last row in the frame for this loop.*/ - y_end=_fragy_end-!notdone<<3; - for(;ypp_dc_scale[*dc_qi]; - flimit=(qstep*3)>>2; - oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride, - qstep,flimit,variance,variance+nhfrags); - variance++; - dc_qi++; - for(x=8;xpp_dc_scale[*dc_qi]; - flimit=(qstep*3)>>2; - oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride, - qstep,flimit,variance,variance+nhfrags); - oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride, - qstep,flimit,variance-1); - variance++; - dc_qi++; - } - dst+=dst_ystride<<3; - src+=src_ystride<<3; - } - /*And finally, handle the last row in the frame, if it's in the range.*/ - if(!notdone){ - int height; - height=_dst->height; - for(;ypp_dc_scale[*dc_qi++]; - flimit=(qstep*3)>>2; - oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride, - qstep,flimit,variance++); - } - } -} - -static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, - int _dc_scale,int _sharp_mod,int _strong){ - static const unsigned char OC_MOD_MAX[2]={24,32}; - static const unsigned char OC_MOD_SHIFT[2]={1,0}; - const unsigned char *psrc; - const unsigned char *src; - const unsigned char *nsrc; - unsigned char *dst; - int vmod[72]; - int hmod[72]; - int mod_hi; - int by; - int bx; - mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]); - dst=_idata; - src=dst; - psrc=src-(_ystride&-!(_b&4)); - for(by=0;by<9;by++){ - for(bx=0;bx<8;bx++){ - int mod; - mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7); - for(bx=1;bx<7;bx++){ - a=128; - b=64; - w=hmod[(bx<<3)+by]; - a-=w; - b+=w*src[bx-1]; - w=vmod[(by<<3)+bx]; - a-=w; - b+=w*psrc[bx]; - w=vmod[(by+1<<3)+bx]; - a-=w; - b+=w*nsrc[bx]; - w=hmod[(bx+1<<3)+by]; - a-=w; - b+=w*src[bx+1]; - dst[bx]=OC_CLAMP255(a*src[bx]+b>>7); - } - a=128; - b=64; - w=hmod[(7<<3)+by]; - a-=w; - b+=w*src[6]; - w=vmod[(by<<3)+7]; - a-=w; - b+=w*psrc[7]; - w=vmod[(by+1<<3)+7]; - a-=w; - b+=w*nsrc[7]; - w=hmod[(8<<3)+by]; - a-=w; - b+=w*src[7+!(_b&2)]; - dst[7]=OC_CLAMP255(a*src[7]+b>>7); - dst+=_ystride; - psrc=src; - src=nsrc; - nsrc+=_ystride&-(!(_b&8)|by<6); - } -} - -#define OC_DERING_THRESH1 (384) -#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1) -#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1) -#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1) - -static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, - int _pli,int _fragy0,int _fragy_end){ - th_img_plane *iplane; - oc_fragment_plane *fplane; - oc_fragment *frag; - int *variance; - unsigned char *idata; - ptrdiff_t froffset; - int ystride; - int nhfrags; - int sthresh; - int strong; - int y_end; - int width; - int height; - int y; - int x; - iplane=_img+_pli; - fplane=_dec->state.fplanes+_pli; - nhfrags=fplane->nhfrags; - froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; - variance=_dec->variances+froffset; - frag=_dec->state.frags+froffset; - strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY); - sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3; - y=_fragy0<<3; - ystride=iplane->stride; - idata=iplane->data+y*(ptrdiff_t)ystride; - y_end=_fragy_end<<3; - width=iplane->width; - height=iplane->height; - for(;ystate.qis[frag->qii]; - var=*variance; - b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3; - if(strong&&var>sthresh){ - oc_dering_block(idata+x,ystride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| - !(b&2)&&variance[1]>OC_DERING_THRESH4|| - !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4|| - !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){ - oc_dering_block(idata+x,ystride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - oc_dering_block(idata+x,ystride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - } - } - else if(var>OC_DERING_THRESH2){ - oc_dering_block(idata+x,ystride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - } - else if(var>OC_DERING_THRESH1){ - oc_dering_block(idata+x,ystride,b, - _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0); - } - frag++; - variance++; - } - idata+=ystride<<3; - } -} - - - -th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){ - oc_dec_ctx *dec; - if(_info==NULL||_setup==NULL)return NULL; - dec=_ogg_malloc(sizeof(*dec)); - if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){ - _ogg_free(dec); - return NULL; - } - dec->state.curframe_num=0; - return dec; -} - -void th_decode_free(th_dec_ctx *_dec){ - if(_dec!=NULL){ - oc_dec_clear(_dec); - _ogg_free(_dec); - } -} - -int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, - size_t _buf_sz){ - switch(_req){ - case TH_DECCTL_GET_PPLEVEL_MAX:{ - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - (*(int *)_buf)=OC_PP_LEVEL_MAX; - return 0; - }break; - case TH_DECCTL_SET_PPLEVEL:{ - int pp_level; - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - pp_level=*(int *)_buf; - if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL; - _dec->pp_level=pp_level; - return 0; - }break; - case TH_DECCTL_SET_GRANPOS:{ - ogg_int64_t granpos; - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL; - granpos=*(ogg_int64_t *)_buf; - if(granpos<0)return TH_EINVAL; - _dec->state.granpos=granpos; - _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift) - -_dec->state.granpos_bias; - _dec->state.curframe_num=_dec->state.keyframe_num - +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1); - return 0; - }break; - case TH_DECCTL_SET_STRIPE_CB:{ - th_stripe_callback *cb; - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL; - cb=(th_stripe_callback *)_buf; - _dec->stripe_cb.ctx=cb->ctx; - _dec->stripe_cb.stripe_decoded=cb->stripe_decoded; - return 0; - }break; -#ifdef HAVE_CAIRO - case TH_DECCTL_SET_TELEMETRY_MBMODE:{ - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - _dec->telemetry=1; - _dec->telemetry_mbmode=*(int *)_buf; - return 0; - }break; - case TH_DECCTL_SET_TELEMETRY_MV:{ - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - _dec->telemetry=1; - _dec->telemetry_mv=*(int *)_buf; - return 0; - }break; - case TH_DECCTL_SET_TELEMETRY_QI:{ - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - _dec->telemetry=1; - _dec->telemetry_qi=*(int *)_buf; - return 0; - }break; - case TH_DECCTL_SET_TELEMETRY_BITS:{ - if(_dec==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - _dec->telemetry=1; - _dec->telemetry_bits=*(int *)_buf; - return 0; - }break; -#endif - default:return TH_EIMPL; - } -} - -/*We're decoding an INTER frame, but have no initialized reference - buffers (i.e., decoding did not start on a key frame). - We initialize them to a solid gray here.*/ -static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){ - th_info *info; - size_t yplane_sz; - size_t cplane_sz; - int yhstride; - int yheight; - int chstride; - int cheight; - _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0; - _dec->state.ref_frame_idx[OC_FRAME_PREV]=0; - _dec->state.ref_frame_idx[OC_FRAME_SELF]=1; - info=&_dec->state.info; - yhstride=info->frame_width+2*OC_UMV_PADDING; - yheight=info->frame_height+2*OC_UMV_PADDING; - chstride=yhstride>>!(info->pixel_fmt&1); - cheight=yheight>>!(info->pixel_fmt&2); - yplane_sz=yhstride*(size_t)yheight; - cplane_sz=chstride*(size_t)cheight; - memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz); -} - -int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, - ogg_int64_t *_granpos){ - int ret; - if(_dec==NULL||_op==NULL)return TH_EFAULT; - /*A completely empty packet indicates a dropped frame and is treated exactly - like an inter frame with no coded blocks. - Only proceed if we have a non-empty packet.*/ - if(_op->bytes!=0){ - oc_dec_pipeline_state pipe; - th_ycbcr_buffer stripe_buf; - int stripe_fragy; - int refi; - int pli; - int notstart; - int notdone; - oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes); -#if defined(HAVE_CAIRO) - _dec->telemetry_frame_bytes=_op->bytes; -#endif - ret=oc_dec_frame_header_unpack(_dec); - if(ret<0)return ret; - /*Select a free buffer to use for the reconstructed version of this - frame.*/ - if(_dec->state.frame_type!=OC_INTRA_FRAME&& - (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| - _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){ - /*No reference frames yet!*/ - oc_dec_init_dummy_frame(_dec); - refi=_dec->state.ref_frame_idx[OC_FRAME_SELF]; - } - else{ - for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]|| - refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++); - _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; - } - if(_dec->state.frame_type==OC_INTRA_FRAME){ - oc_dec_mark_all_intra(_dec); - _dec->state.keyframe_num=_dec->state.curframe_num; -#if defined(HAVE_CAIRO) - _dec->telemetry_coding_bytes= - _dec->telemetry_mode_bytes= - _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); -#endif - } - else{ - oc_dec_coded_flags_unpack(_dec); -#if defined(HAVE_CAIRO) - _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb); -#endif - oc_dec_mb_modes_unpack(_dec); -#if defined(HAVE_CAIRO) - _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb); -#endif - oc_dec_mv_unpack_and_frag_modes_fill(_dec); -#if defined(HAVE_CAIRO) - _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); -#endif - } - oc_dec_block_qis_unpack(_dec); -#if defined(HAVE_CAIRO) - _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb); -#endif - oc_dec_residual_tokens_unpack(_dec); - /*Update granule position. - This must be done before the striped decode callbacks so that the - application knows what to do with the frame data.*/ - _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< - _dec->state.info.keyframe_granule_shift) - +(_dec->state.curframe_num-_dec->state.keyframe_num); - _dec->state.curframe_num++; - if(_granpos!=NULL)*_granpos=_dec->state.granpos; - /*All of the rest of the operations -- DC prediction reversal, - reconstructing coded fragments, copying uncoded fragments, loop - filtering, extending borders, and out-of-loop post-processing -- should - be pipelined. - I.e., DC prediction reversal, reconstruction, and uncoded fragment - copying are done for one or two super block rows, then loop filtering is - run as far as it can, then bordering copying, then post-processing. - For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super - block rows, and one chroma. - Otherwise, an MCU consists of one super block row from each plane. - Inside each MCU, we perform all of the steps on one color plane before - moving on to the next. - After reconstruction, the additional filtering stages introduce a delay - since they need some pixels from the next fragment row. - Thus the actual number of decoded rows available is slightly smaller for - the first MCU, and slightly larger for the last. - - This entire process allows us to operate on the data while it is still in - cache, resulting in big performance improvements. - An application callback allows further application processing (blitting - to video memory, color conversion, etc.) to also use the data while it's - in cache.*/ - oc_dec_pipeline_init(_dec,&pipe); - oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf); - notstart=0; - notdone=1; - for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){ - int avail_fragy0; - int avail_fragy_end; - avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags; - notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli; - /*Compute the first and last fragment row of the current MCU for this - plane.*/ - frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2); - pipe.fragy0[pli]=stripe_fragy>>frag_shift; - pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags, - pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift)); - oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli); - oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli); - sdelay=edelay=0; - if(pipe.loop_filter){ - sdelay+=notstart; - edelay+=notdone; - oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values, - refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); - } - /*To fill the borders, we have an additional two pixel delay, since a - fragment in the next row could filter its top edge, using two pixels - from a fragment in this row. - But there's no reason to delay a full fragment between the two.*/ - oc_state_borders_fill_rows(&_dec->state,refi,pli, - (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1), - (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1)); - /*Out-of-loop post-processing.*/ - pp_offset=3*(pli!=0); - if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){ - /*Perform de-blocking in one plane.*/ - sdelay+=notstart; - edelay+=notdone; - oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf, - _dec->state.ref_frame_bufs[refi],pli, - pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); - if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){ - /*Perform de-ringing in one plane.*/ - sdelay+=notstart; - edelay+=notdone; - oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli, - pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); - } - } - /*If no post-processing is done, we still need to delay a row for the - loop filter, thanks to the strange filtering order VP3 chose.*/ - else if(pipe.loop_filter){ - sdelay+=notstart; - edelay+=notdone; - } - /*Compute the intersection of the available rows in all planes. - If chroma is sub-sampled, the effect of each of its delays is - doubled, but luma might have more post-processing filters enabled - than chroma, so we don't know up front which one is the limiting - factor.*/ - avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){ - /*The callback might want to use the FPU, so let's make sure they can. - We violate all kinds of ABI restrictions by not doing this until - now, but none of them actually matter since we don't use floating - point ourselves.*/ - oc_restore_fpu(&_dec->state); - /*Make the callback, ensuring we flip the sense of the "start" and - "end" of the available region upside down.*/ - (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf, - _dec->state.fplanes[0].nvfrags-avail_fragy_end, - _dec->state.fplanes[0].nvfrags-avail_fragy0); - } - notstart=1; - } - /*Finish filling in the reference frame borders.*/ - for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); - /*Update the reference frame indices.*/ - if(_dec->state.frame_type==OC_INTRA_FRAME){ - /*The new frame becomes both the previous and gold reference frames.*/ - _dec->state.ref_frame_idx[OC_FRAME_GOLD]= - _dec->state.ref_frame_idx[OC_FRAME_PREV]= - _dec->state.ref_frame_idx[OC_FRAME_SELF]; - } - else{ - /*Otherwise, just replace the previous reference frame.*/ - _dec->state.ref_frame_idx[OC_FRAME_PREV]= - _dec->state.ref_frame_idx[OC_FRAME_SELF]; - } - /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG - gamma values, if nothing else).*/ - oc_restore_fpu(&_dec->state); -#if defined(OC_DUMP_IMAGES) - /*Don't dump images for dropped frames.*/ - oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec"); -#endif - return 0; - } - else{ - if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| - _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){ - int refi; - /*No reference frames yet!*/ - oc_dec_init_dummy_frame(_dec); - refi=_dec->state.ref_frame_idx[OC_FRAME_PREV]; - _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; - memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi], - sizeof(_dec->pp_frame_buf[0])*3); - } - /*Just update the granule position and return.*/ - _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< - _dec->state.info.keyframe_granule_shift) - +(_dec->state.curframe_num-_dec->state.keyframe_num); - _dec->state.curframe_num++; - if(_granpos!=NULL)*_granpos=_dec->state.granpos; - return TH_DUPFRAME; - } -} - -int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){ - if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT; - oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf); -#if defined(HAVE_CAIRO) - /*If telemetry ioctls are active, we need to draw to the output buffer. - Stuff the plane into cairo.*/ - if(_dec->telemetry){ - cairo_surface_t *cs; - unsigned char *data; - unsigned char *y_row; - unsigned char *u_row; - unsigned char *v_row; - unsigned char *rgb_row; - int cstride; - int w; - int h; - int x; - int y; - int hdec; - int vdec; - w=_ycbcr[0].width; - h=_ycbcr[0].height; - hdec=!(_dec->state.info.pixel_fmt&1); - vdec=!(_dec->state.info.pixel_fmt&2); - /*Lazy data buffer init. - We could try to re-use the post-processing buffer, which would save - memory, but complicate the allocation logic there. - I don't think anyone cares about memory usage when using telemetry; it is - not meant for embedded devices.*/ - if(_dec->telemetry_frame_data==NULL){ - _dec->telemetry_frame_data=_ogg_malloc( - (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data)); - if(_dec->telemetry_frame_data==NULL)return 0; - } - cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h); - /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/ - data=cairo_image_surface_get_data(cs); - if(data==NULL){ - cairo_surface_destroy(cs); - return 0; - } - cstride=cairo_image_surface_get_stride(cs); - y_row=_ycbcr[0].data; - u_row=_ycbcr[1].data; - v_row=_ycbcr[2].data; - rgb_row=data; - for(y=0;y>hdec]-363703744)/1635200; - g=(3827562*y_row[x]-1287801*u_row[x>>hdec] - -2672387*v_row[x>>hdec]+447306710)/3287200; - b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600; - rgb_row[4*x+0]=OC_CLAMP255(b); - rgb_row[4*x+1]=OC_CLAMP255(g); - rgb_row[4*x+2]=OC_CLAMP255(r); - } - y_row+=_ycbcr[0].stride; - u_row+=_ycbcr[1].stride&-((y&1)|!vdec); - v_row+=_ycbcr[2].stride&-((y&1)|!vdec); - rgb_row+=cstride; - } - /*Draw coded identifier for each macroblock (stored in Hilbert order).*/ - { - cairo_t *c; - const oc_fragment *frags; - oc_mv *frag_mvs; - const signed char *mb_modes; - oc_mb_map *mb_maps; - size_t nmbs; - size_t mbi; - int row2; - int col2; - int qim[3]={0,0,0}; - if(_dec->state.nqis==2){ - int bqi; - bqi=_dec->state.qis[0]; - if(_dec->state.qis[1]>bqi)qim[1]=1; - if(_dec->state.qis[1]state.nqis==3){ - int bqi; - int cqi; - int dqi; - bqi=_dec->state.qis[0]; - cqi=_dec->state.qis[1]; - dqi=_dec->state.qis[2]; - if(cqi>bqi&&dqi>bqi){ - if(dqi>cqi){ - qim[1]=1; - qim[2]=2; - } - else{ - qim[1]=2; - qim[2]=1; - } - } - else if(cqistate.frags; - frag_mvs=_dec->state.frag_mvs; - mb_modes=_dec->state.mb_modes; - mb_maps=_dec->state.mb_maps; - nmbs=_dec->state.nmbs; - row2=0; - col2=0; - for(mbi=0;mbi>1)&1))*16-16; - x=(col2>>1)*16; - cairo_set_line_width(c,1.); - /*Keyframe (all intra) red box.*/ - if(_dec->state.frame_type==OC_INTRA_FRAME){ - if(_dec->telemetry_mbmode&0x02){ - cairo_set_source_rgba(c,1.,0,0,.5); - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,1.,0,0,.25); - cairo_fill(c); - } - } - else{ - const signed char *frag_mv; - ptrdiff_t fragi; - for(bi=0;bi<4;bi++){ - fragi=mb_maps[mbi][0][bi]; - if(fragi>=0&&frags[fragi].coded){ - frag_mv=frag_mvs[fragi]; - break; - } - } - if(bi<4){ - switch(mb_modes[mbi]){ - case OC_MODE_INTRA:{ - if(_dec->telemetry_mbmode&0x02){ - cairo_set_source_rgba(c,1.,0,0,.5); - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,1.,0,0,.25); - cairo_fill(c); - } - }break; - case OC_MODE_INTER_NOMV:{ - if(_dec->telemetry_mbmode&0x01){ - cairo_set_source_rgba(c,0,0,1.,.5); - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,0,0,1.,.25); - cairo_fill(c); - } - }break; - case OC_MODE_INTER_MV:{ - if(_dec->telemetry_mbmode&0x04){ - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_set_source_rgba(c,0,1.,0,.5); - cairo_stroke(c); - } - if(_dec->telemetry_mv&0x04){ - cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+8,y+8); - cairo_stroke(c); - } - }break; - case OC_MODE_INTER_MV_LAST:{ - if(_dec->telemetry_mbmode&0x08){ - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_set_source_rgba(c,0,1.,0,.5); - cairo_move_to(c,x+13.5,y+2.5); - cairo_line_to(c,x+2.5,y+8); - cairo_line_to(c,x+13.5,y+13.5); - cairo_stroke(c); - } - if(_dec->telemetry_mv&0x08){ - cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+8,y+8); - cairo_stroke(c); - } - }break; - case OC_MODE_INTER_MV_LAST2:{ - if(_dec->telemetry_mbmode&0x10){ - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_set_source_rgba(c,0,1.,0,.5); - cairo_move_to(c,x+8,y+2.5); - cairo_line_to(c,x+2.5,y+8); - cairo_line_to(c,x+8,y+13.5); - cairo_move_to(c,x+13.5,y+2.5); - cairo_line_to(c,x+8,y+8); - cairo_line_to(c,x+13.5,y+13.5); - cairo_stroke(c); - } - if(_dec->telemetry_mv&0x10){ - cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+8,y+8); - cairo_stroke(c); - } - }break; - case OC_MODE_GOLDEN_NOMV:{ - if(_dec->telemetry_mbmode&0x20){ - cairo_set_source_rgba(c,1.,1.,0,.5); - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,1.,1.,0,.25); - cairo_fill(c); - } - }break; - case OC_MODE_GOLDEN_MV:{ - if(_dec->telemetry_mbmode&0x40){ - cairo_rectangle(c,x+2.5,y+2.5,11,11); - cairo_set_source_rgba(c,1.,1.,0,.5); - cairo_stroke(c); - } - if(_dec->telemetry_mv&0x40){ - cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+8,y+8); - cairo_stroke(c); - } - }break; - case OC_MODE_INTER_MV_FOUR:{ - if(_dec->telemetry_mbmode&0x80){ - cairo_rectangle(c,x+2.5,y+2.5,4,4); - cairo_rectangle(c,x+9.5,y+2.5,4,4); - cairo_rectangle(c,x+2.5,y+9.5,4,4); - cairo_rectangle(c,x+9.5,y+9.5,4,4); - cairo_set_source_rgba(c,0,1.,0,.5); - cairo_stroke(c); - } - /*4mv is odd, coded in raster order.*/ - fragi=mb_maps[mbi][0][0]; - if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ - frag_mv=frag_mvs[fragi]; - cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+4,y+12); - cairo_stroke(c); - } - fragi=mb_maps[mbi][0][1]; - if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ - frag_mv=frag_mvs[fragi]; - cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+12,y+12); - cairo_stroke(c); - } - fragi=mb_maps[mbi][0][2]; - if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ - frag_mv=frag_mvs[fragi]; - cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+4,y+4); - cairo_stroke(c); - } - fragi=mb_maps[mbi][0][3]; - if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ - frag_mv=frag_mvs[fragi]; - cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]); - cairo_set_source_rgba(c,1.,1.,1.,.9); - cairo_set_line_width(c,3.); - cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66); - cairo_stroke_preserve(c); - cairo_set_line_width(c,2.); - cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33); - cairo_stroke_preserve(c); - cairo_set_line_width(c,1.); - cairo_line_to(c,x+12,y+4); - cairo_stroke(c); - } - }break; - } - } - } - /*qii illustration.*/ - if(_dec->telemetry_qi&0x2){ - cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE); - for(bi=0;bi<4;bi++){ - ptrdiff_t fragi; - int qiv; - int xp; - int yp; - xp=x+(bi&1)*8; - yp=y+8-(bi&2)*4; - fragi=mb_maps[mbi][0][bi]; - if(fragi>=0&&frags[fragi].coded){ - qiv=qim[frags[fragi].qii]; - cairo_set_line_width(c,3.); - cairo_set_source_rgba(c,0.,0.,0.,.5); - switch(qiv){ - /*Double plus:*/ - case 2:{ - if((bi&1)^((bi&2)>>1)){ - cairo_move_to(c,xp+2.5,yp+1.5); - cairo_line_to(c,xp+2.5,yp+3.5); - cairo_move_to(c,xp+1.5,yp+2.5); - cairo_line_to(c,xp+3.5,yp+2.5); - cairo_move_to(c,xp+5.5,yp+4.5); - cairo_line_to(c,xp+5.5,yp+6.5); - cairo_move_to(c,xp+4.5,yp+5.5); - cairo_line_to(c,xp+6.5,yp+5.5); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,0.,1.,1.,1.); - } - else{ - cairo_move_to(c,xp+5.5,yp+1.5); - cairo_line_to(c,xp+5.5,yp+3.5); - cairo_move_to(c,xp+4.5,yp+2.5); - cairo_line_to(c,xp+6.5,yp+2.5); - cairo_move_to(c,xp+2.5,yp+4.5); - cairo_line_to(c,xp+2.5,yp+6.5); - cairo_move_to(c,xp+1.5,yp+5.5); - cairo_line_to(c,xp+3.5,yp+5.5); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,0.,1.,1.,1.); - } - }break; - /*Double minus:*/ - case -2:{ - cairo_move_to(c,xp+2.5,yp+2.5); - cairo_line_to(c,xp+5.5,yp+2.5); - cairo_move_to(c,xp+2.5,yp+5.5); - cairo_line_to(c,xp+5.5,yp+5.5); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,1.,1.,1.,1.); - }break; - /*Plus:*/ - case 1:{ - if(bi&2==0)yp-=2; - if(bi&1==0)xp-=2; - cairo_move_to(c,xp+4.5,yp+2.5); - cairo_line_to(c,xp+4.5,yp+6.5); - cairo_move_to(c,xp+2.5,yp+4.5); - cairo_line_to(c,xp+6.5,yp+4.5); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,.1,1.,.3,1.); - break; - } - /*Fall through.*/ - /*Minus:*/ - case -1:{ - cairo_move_to(c,xp+2.5,yp+4.5); - cairo_line_to(c,xp+6.5,yp+4.5); - cairo_stroke_preserve(c); - cairo_set_source_rgba(c,1.,.3,.1,1.); - }break; - default:continue; - } - cairo_set_line_width(c,1.); - cairo_stroke(c); - } - } - } - col2++; - if((col2>>1)>=_dec->state.nhmbs){ - col2=0; - row2+=2; - } - } - /*Bit usage indicator[s]:*/ - if(_dec->telemetry_bits){ - int widths[6]; - int fpsn; - int fpsd; - int mult; - int fullw; - int padw; - int i; - fpsn=_dec->state.info.fps_numerator; - fpsd=_dec->state.info.fps_denominator; - mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits); - fullw=250.f*h*fpsd*mult/fpsn; - padw=w-24; - /*Header and coded block bits.*/ - if(_dec->telemetry_frame_bytes<0|| - _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){ - _dec->telemetry_frame_bytes=0; - } - if(_dec->telemetry_coding_bytes<0|| - _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){ - _dec->telemetry_coding_bytes=0; - } - if(_dec->telemetry_mode_bytes<0|| - _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){ - _dec->telemetry_mode_bytes=0; - } - if(_dec->telemetry_mv_bytes<0|| - _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){ - _dec->telemetry_mv_bytes=0; - } - if(_dec->telemetry_qi_bytes<0|| - _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){ - _dec->telemetry_qi_bytes=0; - } - if(_dec->telemetry_dc_bytes<0|| - _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){ - _dec->telemetry_dc_bytes=0; - } - widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw; - widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw; - widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw; - widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw; - widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw; - widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw; - for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w; - cairo_set_source_rgba(c,.0,.0,.0,.6); - cairo_rectangle(c,10,h-33,widths[0]+1,5); - cairo_rectangle(c,10,h-29,widths[1]+1,5); - cairo_rectangle(c,10,h-25,widths[2]+1,5); - cairo_rectangle(c,10,h-21,widths[3]+1,5); - cairo_rectangle(c,10,h-17,widths[4]+1,5); - cairo_rectangle(c,10,h-13,widths[5]+1,5); - cairo_fill(c); - cairo_set_source_rgb(c,1,0,0); - cairo_rectangle(c,10.5,h-32.5,widths[0],4); - cairo_fill(c); - cairo_set_source_rgb(c,0,1,0); - cairo_rectangle(c,10.5,h-28.5,widths[1],4); - cairo_fill(c); - cairo_set_source_rgb(c,0,0,1); - cairo_rectangle(c,10.5,h-24.5,widths[2],4); - cairo_fill(c); - cairo_set_source_rgb(c,.6,.4,.0); - cairo_rectangle(c,10.5,h-20.5,widths[3],4); - cairo_fill(c); - cairo_set_source_rgb(c,.3,.3,.3); - cairo_rectangle(c,10.5,h-16.5,widths[4],4); - cairo_fill(c); - cairo_set_source_rgb(c,.5,.5,.8); - cairo_rectangle(c,10.5,h-12.5,widths[5],4); - cairo_fill(c); - } - /*Master qi indicator[s]:*/ - if(_dec->telemetry_qi&0x1){ - cairo_text_extents_t extents; - char buffer[10]; - int p; - int y; - p=0; - y=h-7.5; - if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10; - buffer[p++]=48+_dec->state.qis[0]%10; - if(_dec->state.nqis>=2){ - buffer[p++]=' '; - if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10; - buffer[p++]=48+_dec->state.qis[1]%10; - } - if(_dec->state.nqis==3){ - buffer[p++]=' '; - if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10; - buffer[p++]=48+_dec->state.qis[2]%10; - } - buffer[p++]='\0'; - cairo_select_font_face(c,"sans", - CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD); - cairo_set_font_size(c,18); - cairo_text_extents(c,buffer,&extents); - cairo_set_source_rgb(c,1,1,1); - cairo_move_to(c,w-extents.x_advance-10,y); - cairo_show_text(c,buffer); - cairo_set_source_rgb(c,0,0,0); - cairo_move_to(c,w-extents.x_advance-10,y); - cairo_text_path(c,buffer); - cairo_set_line_width(c,.8); - cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND); - cairo_stroke(c); - } - cairo_destroy(c); - } - /*Out of the Cairo plane into the telemetry YUV buffer.*/ - _ycbcr[0].data=_dec->telemetry_frame_data; - _ycbcr[0].stride=_ycbcr[0].width; - _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride; - _ycbcr[1].stride=_ycbcr[1].width; - _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride; - _ycbcr[2].stride=_ycbcr[2].width; - y_row=_ycbcr[0].data; - u_row=_ycbcr[1].data; - v_row=_ycbcr[2].data; - rgb_row=data; - /*This is one of the few places it's worth handling chroma on a - case-by-case basis.*/ - switch(_dec->state.info.pixel_fmt){ - case TH_PF_420:{ - for(y=0;y>1]=OC_CLAMP255(u); - v_row[x>>1]=OC_CLAMP255(v); - } - y_row+=_ycbcr[0].stride<<1; - u_row+=_ycbcr[1].stride; - v_row+=_ycbcr[2].stride; - rgb_row+=cstride<<1; - } - }break; - case TH_PF_422:{ - for(y=0;y>1]=OC_CLAMP255(u); - v_row[x>>1]=OC_CLAMP255(v); - } - y_row+=_ycbcr[0].stride; - u_row+=_ycbcr[1].stride; - v_row+=_ycbcr[2].stride; - rgb_row+=cstride; - } - }break; - /*case TH_PF_444:*/ - default:{ - for(y=0;y -#include -#include -#include "dequant.h" -#include "decint.h" - -int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){ - th_quant_base *base_mats; - long val; - int nbase_mats; - int sizes[64]; - int indices[64]; - int nbits; - int bmi; - int ci; - int qti; - int pli; - int qri; - int qi; - int i; - val=oc_pack_read(_opb,3); - nbits=(int)val; - for(qi=0;qi<64;qi++){ - val=oc_pack_read(_opb,nbits); - _qinfo->loop_filter_limits[qi]=(unsigned char)val; - } - val=oc_pack_read(_opb,4); - nbits=(int)val+1; - for(qi=0;qi<64;qi++){ - val=oc_pack_read(_opb,nbits); - _qinfo->ac_scale[qi]=(ogg_uint16_t)val; - } - val=oc_pack_read(_opb,4); - nbits=(int)val+1; - for(qi=0;qi<64;qi++){ - val=oc_pack_read(_opb,nbits); - _qinfo->dc_scale[qi]=(ogg_uint16_t)val; - } - val=oc_pack_read(_opb,9); - nbase_mats=(int)val+1; - base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0])); - if(base_mats==NULL)return TH_EFAULT; - for(bmi=0;bmiqi_ranges[qti]+pli; - if(i>0){ - val=oc_pack_read1(_opb); - if(!val){ - int qtj; - int plj; - if(qti>0){ - val=oc_pack_read1(_opb); - if(val){ - qtj=qti-1; - plj=pli; - } - else{ - qtj=(i-1)/3; - plj=(i-1)%3; - } - } - else{ - qtj=(i-1)/3; - plj=(i-1)%3; - } - *qranges=*(_qinfo->qi_ranges[qtj]+plj); - continue; - } - } - val=oc_pack_read(_opb,nbits); - indices[0]=(int)val; - for(qi=qri=0;qi<63;){ - val=oc_pack_read(_opb,oc_ilog(62-qi)); - sizes[qri]=(int)val+1; - qi+=(int)val+1; - val=oc_pack_read(_opb,nbits); - indices[++qri]=(int)val; - } - /*Note: The caller is responsible for cleaning up any partially - constructed qinfo.*/ - if(qi>63){ - _ogg_free(base_mats); - return TH_EBADHEADER; - } - qranges->nranges=qri; - qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0])); - if(qranges->sizes==NULL){ - /*Note: The caller is responsible for cleaning up any partially - constructed qinfo.*/ - _ogg_free(base_mats); - return TH_EFAULT; - } - memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0])); - qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0])); - if(qrbms==NULL){ - /*Note: The caller is responsible for cleaning up any partially - constructed qinfo.*/ - _ogg_free(base_mats); - return TH_EFAULT; - } - qranges->base_matrices=(const th_quant_base *)qrbms; - do{ - bmi=indices[qri]; - /*Note: The caller is responsible for cleaning up any partially - constructed qinfo.*/ - if(bmi>=nbase_mats){ - _ogg_free(base_mats); - return TH_EBADHEADER; - } - memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri])); - } - while(qri-->0); - } - _ogg_free(base_mats); - return 0; -} - -void oc_quant_params_clear(th_quant_info *_qinfo){ - int i; - for(i=6;i-->0;){ - int qti; - int pli; - qti=i/3; - pli=i%3; - /*Clear any duplicate pointer references.*/ - if(i>0){ - int qtj; - int plj; - qtj=(i-1)/3; - plj=(i-1)%3; - if(_qinfo->qi_ranges[qti][pli].sizes== - _qinfo->qi_ranges[qtj][plj].sizes){ - _qinfo->qi_ranges[qti][pli].sizes=NULL; - } - if(_qinfo->qi_ranges[qti][pli].base_matrices== - _qinfo->qi_ranges[qtj][plj].base_matrices){ - _qinfo->qi_ranges[qti][pli].base_matrices=NULL; - } - } - if(qti>0){ - if(_qinfo->qi_ranges[1][pli].sizes== - _qinfo->qi_ranges[0][pli].sizes){ - _qinfo->qi_ranges[1][pli].sizes=NULL; - } - if(_qinfo->qi_ranges[1][pli].base_matrices== - _qinfo->qi_ranges[0][pli].base_matrices){ - _qinfo->qi_ranges[1][pli].base_matrices=NULL; - } - } - /*Now free all the non-duplicate storage.*/ - _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes); - _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices); - } -} diff --git a/drivers/theora/dequant.h b/drivers/theora/dequant.h deleted file mode 100644 index ef25838e35..0000000000 --- a/drivers/theora/dequant.h +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#if !defined(_dequant_H) -# define _dequant_H (1) -# include "quant.h" -# include "bitpack.h" - -int oc_quant_params_unpack(oc_pack_buf *_opb, - th_quant_info *_qinfo); -void oc_quant_params_clear(th_quant_info *_qinfo); - -#endif diff --git a/drivers/theora/encapiwrapper.c b/drivers/theora/encapiwrapper.c deleted file mode 100644 index 874f12442d..0000000000 --- a/drivers/theora/encapiwrapper.c +++ /dev/null @@ -1,168 +0,0 @@ -#include -#include -#include -#include "apiwrapper.h" -#include "encint.h" -#include "theora/theoraenc.h" - - - -static void th_enc_api_clear(th_api_wrapper *_api){ - if(_api->encode)th_encode_free(_api->encode); - memset(_api,0,sizeof(*_api)); -} - -static void theora_encode_clear(theora_state *_te){ - if(_te->i!=NULL)theora_info_clear(_te->i); - memset(_te,0,sizeof(*_te)); -} - -static int theora_encode_control(theora_state *_te,int _req, - void *_buf,size_t _buf_sz){ - return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode, - _req,_buf,_buf_sz); -} - -static ogg_int64_t theora_encode_granule_frame(theora_state *_te, - ogg_int64_t _gp){ - return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp); -} - -static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){ - return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp); -} - -static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={ - (oc_state_clear_func)theora_encode_clear, - (oc_state_control_func)theora_encode_control, - (oc_state_granule_frame_func)theora_encode_granule_frame, - (oc_state_granule_time_func)theora_encode_granule_time, -}; - -int theora_encode_init(theora_state *_te,theora_info *_ci){ - th_api_info *apiinfo; - th_info info; - ogg_uint32_t keyframe_frequency_force; - /*Allocate our own combined API wrapper/theora_info struct. - We put them both in one malloc'd block so that when the API wrapper is - freed, the info struct goes with it. - This avoids having to figure out whether or not we need to free the info - struct in either theora_info_clear() or theora_clear().*/ - apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo)); - if(apiinfo==NULL)return TH_EFAULT; - /*Make our own copy of the info struct, since its lifetime should be - independent of the one we were passed in.*/ - *&apiinfo->info=*_ci; - oc_theora_info2th_info(&info,_ci); - apiinfo->api.encode=th_encode_alloc(&info); - if(apiinfo->api.encode==NULL){ - _ogg_free(apiinfo); - return OC_EINVAL; - } - apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear; - /*Provide entry points for ABI compatibility with old decoder shared libs.*/ - _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL; - _te->internal_decode=NULL; - _te->granulepos=0; - _te->i=&apiinfo->info; - _te->i->codec_setup=&apiinfo->api; - /*Set the precise requested keyframe frequency.*/ - keyframe_frequency_force=_ci->keyframe_auto_p? - _ci->keyframe_frequency_force:_ci->keyframe_frequency; - th_encode_ctl(apiinfo->api.encode, - TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, - &keyframe_frequency_force,sizeof(keyframe_frequency_force)); - /*TODO: Additional codec setup using the extra fields in theora_info.*/ - return 0; -} - -int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){ - th_api_wrapper *api; - th_ycbcr_buffer buf; - int ret; - api=(th_api_wrapper *)_te->i->codec_setup; - buf[0].width=_yuv->y_width; - buf[0].height=_yuv->y_height; - buf[0].stride=_yuv->y_stride; - buf[0].data=_yuv->y; - buf[1].width=_yuv->uv_width; - buf[1].height=_yuv->uv_height; - buf[1].stride=_yuv->uv_stride; - buf[1].data=_yuv->u; - buf[2].width=_yuv->uv_width; - buf[2].height=_yuv->uv_height; - buf[2].stride=_yuv->uv_stride; - buf[2].data=_yuv->v; - ret=th_encode_ycbcr_in(api->encode,buf); - if(ret<0)return ret; - _te->granulepos=api->encode->state.granpos; - return ret; -} - -int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){ - th_api_wrapper *api; - api=(th_api_wrapper *)_te->i->codec_setup; - return th_encode_packetout(api->encode,_last_p,_op); -} - -int theora_encode_header(theora_state *_te,ogg_packet *_op){ - oc_enc_ctx *enc; - th_api_wrapper *api; - int ret; - api=(th_api_wrapper *)_te->i->codec_setup; - enc=api->encode; - /*If we've already started encoding, fail.*/ - if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){ - return TH_EINVAL; - } - /*Reset the state to make sure we output an info packet.*/ - enc->packet_state=OC_PACKET_INFO_HDR; - ret=th_encode_flushheader(api->encode,NULL,_op); - return ret>=0?0:ret; -} - -int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){ - oggpack_buffer opb; - void *buf; - int packet_state; - int ret; - packet_state=OC_PACKET_COMMENT_HDR; - oggpackB_writeinit(&opb); - ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL, - th_version_string(),(th_comment *)_tc,_op); - if(ret>=0){ - /*The oggpack_buffer's lifetime ends with this function, so we have to - copy out the packet contents. - Presumably the application knows it is supposed to free this. - This part works nothing like the Vorbis API, and the documentation on it - has been wrong for some time, claiming libtheora owned the memory.*/ - buf=_ogg_malloc(_op->bytes); - if(buf==NULL){ - _op->packet=NULL; - ret=TH_EFAULT; - } - else{ - memcpy(buf,_op->packet,_op->bytes); - _op->packet=buf; - ret=0; - } - } - oggpack_writeclear(&opb); - return ret; -} - -int theora_encode_tables(theora_state *_te,ogg_packet *_op){ - oc_enc_ctx *enc; - th_api_wrapper *api; - int ret; - api=(th_api_wrapper *)_te->i->codec_setup; - enc=api->encode; - /*If we've already started encoding, fail.*/ - if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){ - return TH_EINVAL; - } - /*Reset the state to make sure we output a setup packet.*/ - enc->packet_state=OC_PACKET_SETUP_HDR; - ret=th_encode_flushheader(api->encode,NULL,_op); - return ret>=0?0:ret; -} diff --git a/drivers/theora/encfrag.c b/drivers/theora/encfrag.c deleted file mode 100644 index bb814c8e4a..0000000000 --- a/drivers/theora/encfrag.c +++ /dev/null @@ -1,388 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encfrag.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include -#include "encint.h" - - -void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], - const unsigned char *_src,const unsigned char *_ref,int _ystride){ - (*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride); -} - -void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src, - const unsigned char *_ref,int _ystride){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]); - _src+=_ystride; - _ref+=_ystride; - } -} - -void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], - const unsigned char *_src,int _ystride){ - (*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride); -} - -void oc_enc_frag_sub_128_c(ogg_int16_t *_diff, - const unsigned char *_src,int _ystride){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128); - _src+=_ystride; - } -} - -unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x, - const unsigned char *_y,int _ystride){ - return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride); -} - -unsigned oc_enc_frag_sad_c(const unsigned char *_src, - const unsigned char *_ref,int _ystride){ - unsigned sad; - int i; - sad=0; - for(i=8;i-->0;){ - int j; - for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]); - _src+=_ystride; - _ref+=_ystride; - } - return sad; -} - -unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref,int _ystride, - unsigned _thresh){ - return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh); -} - -unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh){ - unsigned sad; - int i; - sad=0; - for(i=8;i-->0;){ - int j; - for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]); - if(sad>_thresh)break; - _src+=_ystride; - _ref+=_ystride; - } - return sad; -} - -unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref1, - const unsigned char *_ref2,int _ystride,unsigned _thresh){ - return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride, - _thresh); -} - -unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh){ - unsigned sad; - int i; - sad=0; - for(i=8;i-->0;){ - int j; - for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1)); - if(sad>_thresh)break; - _src+=_ystride; - _ref1+=_ystride; - _ref2+=_ystride; - } - return sad; -} - -static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src, - const unsigned char *_ref,int _ystride){ - int i; - for(i=0;i<8;i++){ - int t0; - int t1; - int t2; - int t3; - int t4; - int t5; - int t6; - int t7; - int r; - /*Hadamard stage 1:*/ - t0=_src[0]-_ref[0]+_src[4]-_ref[4]; - t4=_src[0]-_ref[0]-_src[4]+_ref[4]; - t1=_src[1]-_ref[1]+_src[5]-_ref[5]; - t5=_src[1]-_ref[1]-_src[5]+_ref[5]; - t2=_src[2]-_ref[2]+_src[6]-_ref[6]; - t6=_src[2]-_ref[2]-_src[6]+_ref[6]; - t3=_src[3]-_ref[3]+_src[7]-_ref[7]; - t7=_src[3]-_ref[3]-_src[7]+_ref[7]; - /*Hadamard stage 2:*/ - r=t0; - t0+=t2; - t2=r-t2; - r=t1; - t1+=t3; - t3=r-t3; - r=t4; - t4+=t6; - t6=r-t6; - r=t5; - t5+=t7; - t7=r-t7; - /*Hadamard stage 3:*/ - _buf[0*8+i]=(ogg_int16_t)(t0+t1); - _buf[1*8+i]=(ogg_int16_t)(t0-t1); - _buf[2*8+i]=(ogg_int16_t)(t2+t3); - _buf[3*8+i]=(ogg_int16_t)(t2-t3); - _buf[4*8+i]=(ogg_int16_t)(t4+t5); - _buf[5*8+i]=(ogg_int16_t)(t4-t5); - _buf[6*8+i]=(ogg_int16_t)(t6+t7); - _buf[7*8+i]=(ogg_int16_t)(t6-t7); - _src+=_ystride; - _ref+=_ystride; - } -} - -static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){ - int i; - for(i=0;i<8;i++){ - int t0; - int t1; - int t2; - int t3; - int t4; - int t5; - int t6; - int t7; - int r; - /*Hadamard stage 1:*/ - r=_ref1[0]+_ref2[0]>>1; - t4=_ref1[4]+_ref2[4]>>1; - t0=_src[0]-r+_src[4]-t4; - t4=_src[0]-r-_src[4]+t4; - r=_ref1[1]+_ref2[1]>>1; - t5=_ref1[5]+_ref2[5]>>1; - t1=_src[1]-r+_src[5]-t5; - t5=_src[1]-r-_src[5]+t5; - r=_ref1[2]+_ref2[2]>>1; - t6=_ref1[6]+_ref2[6]>>1; - t2=_src[2]-r+_src[6]-t6; - t6=_src[2]-r-_src[6]+t6; - r=_ref1[3]+_ref2[3]>>1; - t7=_ref1[7]+_ref2[7]>>1; - t3=_src[3]-r+_src[7]-t7; - t7=_src[3]-r-_src[7]+t7; - /*Hadamard stage 2:*/ - r=t0; - t0+=t2; - t2=r-t2; - r=t1; - t1+=t3; - t3=r-t3; - r=t4; - t4+=t6; - t6=r-t6; - r=t5; - t5+=t7; - t7=r-t7; - /*Hadamard stage 3:*/ - _buf[0*8+i]=(ogg_int16_t)(t0+t1); - _buf[1*8+i]=(ogg_int16_t)(t0-t1); - _buf[2*8+i]=(ogg_int16_t)(t2+t3); - _buf[3*8+i]=(ogg_int16_t)(t2-t3); - _buf[4*8+i]=(ogg_int16_t)(t4+t5); - _buf[5*8+i]=(ogg_int16_t)(t4-t5); - _buf[6*8+i]=(ogg_int16_t)(t6+t7); - _buf[7*8+i]=(ogg_int16_t)(t6-t7); - _src+=_ystride; - _ref1+=_ystride; - _ref2+=_ystride; - } -} - -static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src, - int _ystride){ - int i; - for(i=0;i<8;i++){ - int t0; - int t1; - int t2; - int t3; - int t4; - int t5; - int t6; - int t7; - int r; - /*Hadamard stage 1:*/ - t0=_src[0]+_src[4]; - t4=_src[0]-_src[4]; - t1=_src[1]+_src[5]; - t5=_src[1]-_src[5]; - t2=_src[2]+_src[6]; - t6=_src[2]-_src[6]; - t3=_src[3]+_src[7]; - t7=_src[3]-_src[7]; - /*Hadamard stage 2:*/ - r=t0; - t0+=t2; - t2=r-t2; - r=t1; - t1+=t3; - t3=r-t3; - r=t4; - t4+=t6; - t6=r-t6; - r=t5; - t5+=t7; - t7=r-t7; - /*Hadamard stage 3:*/ - _buf[0*8+i]=(ogg_int16_t)(t0+t1); - _buf[1*8+i]=(ogg_int16_t)(t0-t1); - _buf[2*8+i]=(ogg_int16_t)(t2+t3); - _buf[3*8+i]=(ogg_int16_t)(t2-t3); - _buf[4*8+i]=(ogg_int16_t)(t4+t5); - _buf[5*8+i]=(ogg_int16_t)(t4-t5); - _buf[6*8+i]=(ogg_int16_t)(t6+t7); - _buf[7*8+i]=(ogg_int16_t)(t6-t7); - _src+=_ystride; - } -} - -unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){ - unsigned sad; - int t0; - int t1; - int t2; - int t3; - int t4; - int t5; - int t6; - int t7; - int r; - int i; - sad=0; - for(i=0;i<8;i++){ - /*Hadamard stage 1:*/ - t0=_buf[i*8+0]+_buf[i*8+4]; - t4=_buf[i*8+0]-_buf[i*8+4]; - t1=_buf[i*8+1]+_buf[i*8+5]; - t5=_buf[i*8+1]-_buf[i*8+5]; - t2=_buf[i*8+2]+_buf[i*8+6]; - t6=_buf[i*8+2]-_buf[i*8+6]; - t3=_buf[i*8+3]+_buf[i*8+7]; - t7=_buf[i*8+3]-_buf[i*8+7]; - /*Hadamard stage 2:*/ - r=t0; - t0+=t2; - t2=r-t2; - r=t1; - t1+=t3; - t3=r-t3; - r=t4; - t4+=t6; - t6=r-t6; - r=t5; - t5+=t7; - t7=r-t7; - /*Hadamard stage 3:*/ - r=abs(t0+t1); - r+=abs(t0-t1); - r+=abs(t2+t3); - r+=abs(t2-t3); - r+=abs(t4+t5); - r+=abs(t4-t5); - r+=abs(t6+t7); - r+=abs(t6-t7); - sad+=r; - if(sad>_thresh)break; - } - return sad; -} - -unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref,int _ystride, - unsigned _thresh){ - return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh); -} - -unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh){ - ogg_int16_t buf[64]; - oc_diff_hadamard(buf,_src,_ref,_ystride); - return oc_hadamard_sad_thresh(buf,_thresh); -} - -unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref1, - const unsigned char *_ref2,int _ystride,unsigned _thresh){ - return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride, - _thresh); -} - -unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh){ - ogg_int16_t buf[64]; - oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride); - return oc_hadamard_sad_thresh(buf,_thresh); -} - -unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc, - const unsigned char *_src,int _ystride){ - return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride); -} - -unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){ - ogg_int16_t buf[64]; - oc_intra_hadamard(buf,_src,_ystride); - return oc_hadamard_sad_thresh(buf,UINT_MAX) - -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); -} - -void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride){ - (*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride); -} - -void oc_enc_frag_copy2_c(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride){ - int i; - int j; - for(i=8;i-->0;){ - for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1; - _dst+=_ystride; - _src1+=_ystride; - _src2+=_ystride; - } -} - -void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc, - unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){ - (*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue); -} - -void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ - (*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue); -} diff --git a/drivers/theora/encinfo.c b/drivers/theora/encinfo.c deleted file mode 100644 index 83be1dae72..0000000000 --- a/drivers/theora/encinfo.c +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include "internal.h" -#include "enquant.h" -#include "huffenc.h" - - - -/*Packs a series of octets from a given byte array into the pack buffer. - _opb: The pack buffer to store the octets in. - _buf: The byte array containing the bytes to pack. - _len: The number of octets to pack.*/ -static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){ - int i; - for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8); -} - - - -int oc_state_flushheader(oc_theora_state *_state,int *_packet_state, - oggpack_buffer *_opb,const th_quant_info *_qinfo, - const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS], - const char *_vendor,th_comment *_tc,ogg_packet *_op){ - unsigned char *packet; - int b_o_s; - if(_op==NULL)return TH_EFAULT; - switch(*_packet_state){ - /*Codec info header.*/ - case OC_PACKET_INFO_HDR:{ - if(_state==NULL)return TH_EFAULT; - oggpackB_reset(_opb); - /*Mark this packet as the info header.*/ - oggpackB_write(_opb,0x80,8); - /*Write the codec string.*/ - oc_pack_octets(_opb,"theora",6); - /*Write the codec bitstream version.*/ - oggpackB_write(_opb,TH_VERSION_MAJOR,8); - oggpackB_write(_opb,TH_VERSION_MINOR,8); - oggpackB_write(_opb,TH_VERSION_SUB,8); - /*Describe the encoded frame.*/ - oggpackB_write(_opb,_state->info.frame_width>>4,16); - oggpackB_write(_opb,_state->info.frame_height>>4,16); - oggpackB_write(_opb,_state->info.pic_width,24); - oggpackB_write(_opb,_state->info.pic_height,24); - oggpackB_write(_opb,_state->info.pic_x,8); - oggpackB_write(_opb,_state->info.pic_y,8); - oggpackB_write(_opb,_state->info.fps_numerator,32); - oggpackB_write(_opb,_state->info.fps_denominator,32); - oggpackB_write(_opb,_state->info.aspect_numerator,24); - oggpackB_write(_opb,_state->info.aspect_denominator,24); - oggpackB_write(_opb,_state->info.colorspace,8); - oggpackB_write(_opb,_state->info.target_bitrate,24); - oggpackB_write(_opb,_state->info.quality,6); - oggpackB_write(_opb,_state->info.keyframe_granule_shift,5); - oggpackB_write(_opb,_state->info.pixel_fmt,2); - /*Spare configuration bits.*/ - oggpackB_write(_opb,0,3); - b_o_s=1; - }break; - /*Comment header.*/ - case OC_PACKET_COMMENT_HDR:{ - int vendor_len; - int i; - if(_tc==NULL)return TH_EFAULT; - vendor_len=strlen(_vendor); - oggpackB_reset(_opb); - /*Mark this packet as the comment header.*/ - oggpackB_write(_opb,0x81,8); - /*Write the codec string.*/ - oc_pack_octets(_opb,"theora",6); - /*Write the vendor string.*/ - oggpack_write(_opb,vendor_len,32); - oc_pack_octets(_opb,_vendor,vendor_len); - oggpack_write(_opb,_tc->comments,32); - for(i=0;i<_tc->comments;i++){ - if(_tc->user_comments[i]!=NULL){ - oggpack_write(_opb,_tc->comment_lengths[i],32); - oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]); - } - else oggpack_write(_opb,0,32); - } - b_o_s=0; - }break; - /*Codec setup header.*/ - case OC_PACKET_SETUP_HDR:{ - int ret; - oggpackB_reset(_opb); - /*Mark this packet as the setup header.*/ - oggpackB_write(_opb,0x82,8); - /*Write the codec string.*/ - oc_pack_octets(_opb,"theora",6); - /*Write the quantizer tables.*/ - oc_quant_params_pack(_opb,_qinfo); - /*Write the huffman codes.*/ - ret=oc_huff_codes_pack(_opb,_codes); - /*This should never happen, because we validate the tables when they - are set. - If you see, it's a good chance memory is being corrupted.*/ - if(ret<0)return ret; - b_o_s=0; - }break; - /*No more headers to emit.*/ - default:return 0; - } - /*This is kind of fugly: we hand the user a buffer which they do not own. - We will overwrite it when the next packet is output, so the user better be - done with it by then. - Vorbis is little better: it hands back buffers that it will free the next - time the headers are requested, or when the encoder is cleared. - Hopefully libogg2 will make this much cleaner.*/ - packet=oggpackB_get_buffer(_opb); - /*If there's no packet, malloc failed while writing.*/ - if(packet==NULL)return TH_EFAULT; - _op->packet=packet; - _op->bytes=oggpackB_bytes(_opb); - _op->b_o_s=b_o_s; - _op->e_o_s=0; - _op->granulepos=0; - _op->packetno=*_packet_state+3; - return ++(*_packet_state)+3; -} diff --git a/drivers/theora/encint.h b/drivers/theora/encint.h deleted file mode 100644 index 97897d5a04..0000000000 --- a/drivers/theora/encint.h +++ /dev/null @@ -1,493 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#if !defined(_encint_H) -# define _encint_H (1) -# if defined(HAVE_CONFIG_H) -# include "config.h" -# endif -# include "theora/theoraenc.h" -# include "internal.h" -# include "ocintrin.h" -# include "mathops.h" -# include "enquant.h" -# include "huffenc.h" -/*# define OC_COLLECT_METRICS*/ - - - -typedef oc_mv oc_mv2[2]; - -typedef struct oc_enc_opt_vtable oc_enc_opt_vtable; -typedef struct oc_mb_enc_info oc_mb_enc_info; -typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser; -typedef struct oc_iir_filter oc_iir_filter; -typedef struct oc_frame_metrics oc_frame_metrics; -typedef struct oc_rc_state oc_rc_state; -typedef struct th_enc_ctx oc_enc_ctx; -typedef struct oc_token_checkpoint oc_token_checkpoint; - - - -/*Constants for the packet-out state machine specific to the encoder.*/ - -/*Next packet to emit: Data packet, but none are ready yet.*/ -#define OC_PACKET_EMPTY (0) -/*Next packet to emit: Data packet, and one is ready.*/ -#define OC_PACKET_READY (1) - -/*All features enabled.*/ -#define OC_SP_LEVEL_SLOW (0) -/*Enable early skip.*/ -#define OC_SP_LEVEL_EARLY_SKIP (1) -/*Disable motion compensation.*/ -#define OC_SP_LEVEL_NOMC (2) -/*Maximum valid speed level.*/ -#define OC_SP_LEVEL_MAX (2) - - -/*The bits used for each of the MB mode codebooks.*/ -extern const unsigned char OC_MODE_BITS[2][OC_NMODES]; - -/*The bits used for each of the MV codebooks.*/ -extern const unsigned char OC_MV_BITS[2][64]; - -/*The minimum value that can be stored in a SB run for each codeword. - The last entry is the upper bound on the length of a single SB run.*/ -extern const ogg_uint16_t OC_SB_RUN_VAL_MIN[8]; -/*The bits used for each SB run codeword.*/ -extern const unsigned char OC_SB_RUN_CODE_NBITS[7]; - -/*The bits used for each block run length (starting with 1).*/ -extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]; - - - -/*Encoder specific functions with accelerated variants.*/ -struct oc_enc_opt_vtable{ - unsigned (*frag_sad)(const unsigned char *_src, - const unsigned char *_ref,int _ystride); - unsigned (*frag_sad_thresh)(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); - unsigned (*frag_sad2_thresh)(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); - unsigned (*frag_satd_thresh)(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); - unsigned (*frag_satd2_thresh)(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); - unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride); - void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src, - const unsigned char *_ref,int _ystride); - void (*frag_sub_128)(ogg_int16_t _diff[64], - const unsigned char *_src,int _ystride); - void (*frag_copy2)(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride); - void (*frag_recon_intra)(unsigned char *_dst,int _ystride, - const ogg_int16_t _residue[64]); - void (*frag_recon_inter)(unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); - void (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]); -}; - - -void oc_enc_vtable_init(oc_enc_ctx *_enc); - - - -/*Encoder-specific macroblock information.*/ -struct oc_mb_enc_info{ - /*Neighboring macro blocks that have MVs available from the current frame.*/ - unsigned cneighbors[4]; - /*Neighboring macro blocks to use for MVs from the previous frame.*/ - unsigned pneighbors[4]; - /*The number of current-frame neighbors.*/ - unsigned char ncneighbors; - /*The number of previous-frame neighbors.*/ - unsigned char npneighbors; - /*Flags indicating which MB modes have been refined.*/ - unsigned char refined; - /*Motion vectors for a macro block for the current frame and the - previous two frames. - Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which - can be used to estimate constant velocity and constant acceleration - predictors. - Uninitialized MVs are (0,0).*/ - oc_mv2 analysis_mv[3]; - /*Current unrefined analysis MVs.*/ - oc_mv unref_mv[2]; - /*Unrefined block MVs.*/ - oc_mv block_mv[4]; - /*Refined block MVs.*/ - oc_mv ref_mv[4]; - /*Minimum motion estimation error from the analysis stage.*/ - ogg_uint16_t error[2]; - /*MB error for half-pel refinement for each frame type.*/ - unsigned satd[2]; - /*Block error for half-pel refinement.*/ - unsigned block_satd[4]; -}; - - - -/*State machine to estimate the opportunity cost of coding a MB mode.*/ -struct oc_mode_scheme_chooser{ - /*Pointers to the a list containing the index of each mode in the mode - alphabet used by each scheme. - The first entry points to the dynamic scheme0_ranks, while the remaining 7 - point to the constant entries stored in OC_MODE_SCHEMES.*/ - const unsigned char *mode_ranks[8]; - /*The ranks for each mode when coded with scheme 0. - These are optimized so that the more frequent modes have lower ranks.*/ - unsigned char scheme0_ranks[OC_NMODES]; - /*The list of modes, sorted in descending order of frequency, that - corresponds to the ranks above.*/ - unsigned char scheme0_list[OC_NMODES]; - /*The number of times each mode has been chosen so far.*/ - int mode_counts[OC_NMODES]; - /*The list of mode coding schemes, sorted in ascending order of bit cost.*/ - unsigned char scheme_list[8]; - /*The number of bits used by each mode coding scheme.*/ - ptrdiff_t scheme_bits[8]; -}; - - -void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser); - - - -/*A 2nd order low-pass Bessel follower. - We use this for rate control because it has fast reaction time, but is - critically damped.*/ -struct oc_iir_filter{ - ogg_int32_t c[2]; - ogg_int64_t g; - ogg_int32_t x[2]; - ogg_int32_t y[2]; -}; - - - -/*The 2-pass metrics associated with a single frame.*/ -struct oc_frame_metrics{ - /*The log base 2 of the scale factor for this frame in Q24 format.*/ - ogg_int32_t log_scale; - /*The number of application-requested duplicates of this frame.*/ - unsigned dup_count:31; - /*The frame type from pass 1.*/ - unsigned frame_type:1; -}; - - - -/*Rate control state information.*/ -struct oc_rc_state{ - /*The target average bits per frame.*/ - ogg_int64_t bits_per_frame; - /*The current buffer fullness (bits available to be used).*/ - ogg_int64_t fullness; - /*The target buffer fullness. - This is where we'd like to be by the last keyframe the appears in the next - buf_delay frames.*/ - ogg_int64_t target; - /*The maximum buffer fullness (total size of the buffer).*/ - ogg_int64_t max; - /*The log of the number of pixels in a frame in Q57 format.*/ - ogg_int64_t log_npixels; - /*The exponent used in the rate model in Q8 format.*/ - unsigned exp[2]; - /*The number of frames to distribute the buffer usage over.*/ - int buf_delay; - /*The total drop count from the previous frame. - This includes duplicates explicitly requested via the - TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/ - ogg_uint32_t prev_drop_count; - /*The log of an estimated scale factor used to obtain the real framerate, for - VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/ - ogg_int64_t log_drop_scale; - /*The log of estimated scale factor for the rate model in Q57 format.*/ - ogg_int64_t log_scale[2]; - /*The log of the target quantizer level in Q57 format.*/ - ogg_int64_t log_qtarget; - /*Will we drop frames to meet bitrate target?*/ - unsigned char drop_frames; - /*Do we respect the maximum buffer fullness?*/ - unsigned char cap_overflow; - /*Can the reservoir go negative?*/ - unsigned char cap_underflow; - /*Second-order lowpass filters to track scale and VFR.*/ - oc_iir_filter scalefilter[2]; - int inter_count; - int inter_delay; - int inter_delay_target; - oc_iir_filter vfrfilter; - /*Two-pass mode state. - 0 => 1-pass encoding. - 1 => 1st pass of 2-pass encoding. - 2 => 2nd pass of 2-pass encoding.*/ - int twopass; - /*Buffer for current frame metrics.*/ - unsigned char twopass_buffer[48]; - /*The number of bytes in the frame metrics buffer. - When 2-pass encoding is enabled, this is set to 0 after each frame is - submitted, and must be non-zero before the next frame will be accepted.*/ - int twopass_buffer_bytes; - int twopass_buffer_fill; - /*Whether or not to force the next frame to be a keyframe.*/ - unsigned char twopass_force_kf; - /*The metrics for the previous frame.*/ - oc_frame_metrics prev_metrics; - /*The metrics for the current frame.*/ - oc_frame_metrics cur_metrics; - /*The buffered metrics for future frames.*/ - oc_frame_metrics *frame_metrics; - int nframe_metrics; - int cframe_metrics; - /*The index of the current frame in the circular metric buffer.*/ - int frame_metrics_head; - /*The frame count of each type (keyframes, delta frames, and dup frames); - 32 bits limits us to 2.268 years at 60 fps.*/ - ogg_uint32_t frames_total[3]; - /*The number of frames of each type yet to be processed.*/ - ogg_uint32_t frames_left[3]; - /*The sum of the scale values for each frame type.*/ - ogg_int64_t scale_sum[2]; - /*The start of the window over which the current scale sums are taken.*/ - int scale_window0; - /*The end of the window over which the current scale sums are taken.*/ - int scale_window_end; - /*The frame count of each type in the current 2-pass window; this does not - include dup frames.*/ - int nframes[3]; - /*The total accumulated estimation bias.*/ - ogg_int64_t rate_bias; -}; - - -void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc); -void oc_rc_state_clear(oc_rc_state *_rc); - -void oc_enc_rc_resize(oc_enc_ctx *_enc); -int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp); -void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type); -int oc_enc_update_rc_state(oc_enc_ctx *_enc, - long _bits,int _qti,int _qi,int _trial,int _droppable); -int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf); -int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes); - - - -/*The internal encoder state.*/ -struct th_enc_ctx{ - /*Shared encoder/decoder state.*/ - oc_theora_state state; - /*Buffer in which to assemble packets.*/ - oggpack_buffer opb; - /*Encoder-specific macroblock information.*/ - oc_mb_enc_info *mb_info; - /*DC coefficients after prediction.*/ - ogg_int16_t *frag_dc; - /*The list of coded macro blocks, in coded order.*/ - unsigned *coded_mbis; - /*The number of coded macro blocks.*/ - size_t ncoded_mbis; - /*Whether or not packets are ready to be emitted. - This takes on negative values while there are remaining header packets to - be emitted, reaches 0 when the codec is ready for input, and becomes - positive when a frame has been processed and data packets are ready.*/ - int packet_state; - /*The maximum distance between keyframes.*/ - ogg_uint32_t keyframe_frequency_force; - /*The number of duplicates to produce for the next frame.*/ - ogg_uint32_t dup_count; - /*The number of duplicates remaining to be emitted for the current frame.*/ - ogg_uint32_t nqueued_dups; - /*The number of duplicates emitted for the last frame.*/ - ogg_uint32_t prev_dup_count; - /*The current speed level.*/ - int sp_level; - /*Whether or not VP3 compatibility mode has been enabled.*/ - unsigned char vp3_compatible; - /*Whether or not any INTER frames have been coded.*/ - unsigned char coded_inter_frame; - /*Whether or not previous frame was dropped.*/ - unsigned char prevframe_dropped; - /*Stores most recently chosen Huffman tables for each frame type, DC and AC - coefficients, and luma and chroma tokens. - The actual Huffman table used for a given coefficient depends not only on - the choice made here, but also its index in the zig-zag ordering.*/ - unsigned char huff_idxs[2][2][2]; - /*Current count of bits used by each MV coding mode.*/ - size_t mv_bits[2]; - /*The mode scheme chooser for estimating mode coding costs.*/ - oc_mode_scheme_chooser chooser; - /*The number of vertical super blocks in an MCU.*/ - int mcu_nvsbs; - /*The SSD error for skipping each fragment in the current MCU.*/ - unsigned *mcu_skip_ssd; - /*The DCT token lists for each coefficient and each plane.*/ - unsigned char **dct_tokens[3]; - /*The extra bits associated with each DCT token.*/ - ogg_uint16_t **extra_bits[3]; - /*The number of DCT tokens for each coefficient for each plane.*/ - ptrdiff_t ndct_tokens[3][64]; - /*Pending EOB runs for each coefficient for each plane.*/ - ogg_uint16_t eob_run[3][64]; - /*The offset of the first DCT token for each coefficient for each plane.*/ - unsigned char dct_token_offs[3][64]; - /*The last DC coefficient for each plane and reference frame.*/ - int dc_pred_last[3][3]; -#if defined(OC_COLLECT_METRICS) - /*Fragment SATD statistics for MB mode estimation metrics.*/ - unsigned *frag_satd; - /*Fragment SSD statistics for MB mode estimation metrics.*/ - unsigned *frag_ssd; -#endif - /*The R-D optimization parameter.*/ - int lambda; - /*The huffman tables in use.*/ - th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; - /*The quantization parameters in use.*/ - th_quant_info qinfo; - oc_iquant *enquant_tables[64][3][2]; - oc_iquant_table enquant_table_data[64][3][2]; - /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi - value. - This is used to paramterize the rate control decisions. - They are kept in the log domain to simplify later processing. - Keep in mind these are DCT domain quantizers, and so are scaled by an - additional factor of 4 from the pixel domain.*/ - ogg_int64_t log_qavg[2][64]; - /*The buffer state used to drive rate control.*/ - oc_rc_state rc; - /*Table for encoder acceleration functions.*/ - oc_enc_opt_vtable opt_vtable; -}; - - -void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode); -int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode); -#if defined(OC_COLLECT_METRICS) -void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc); -void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc); -#endif - - - -/*Perform fullpel motion search for a single MB against both reference frames.*/ -void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi); -/*Refine a MB MV for one frame.*/ -void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame); -/*Refine the block MVs.*/ -void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi); - - - -/*Used to rollback a tokenlog transaction when we retroactively decide to skip - a fragment. - A checkpoint is taken right before each token is added.*/ -struct oc_token_checkpoint{ - /*The color plane the token was added to.*/ - unsigned char pli; - /*The zig-zag index the token was added to.*/ - unsigned char zzi; - /*The outstanding EOB run count before the token was added.*/ - ogg_uint16_t eob_run; - /*The token count before the token was added.*/ - ptrdiff_t ndct_tokens; -}; - - - -void oc_enc_tokenize_start(oc_enc_ctx *_enc); -int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, - ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, - int _zzi,oc_token_checkpoint **_stack,int _acmin); -void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, - const oc_token_checkpoint *_stack,int _n); -void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, - int _pli,int _fragy0,int _frag_yend); -void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, - const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, - int _prev_ndct_tokens1,int _prev_eob_run1); -void oc_enc_tokenize_finish(oc_enc_ctx *_enc); - - - -/*Utility routine to encode one of the header packets.*/ -int oc_state_flushheader(oc_theora_state *_state,int *_packet_state, - oggpack_buffer *_opb,const th_quant_info *_qinfo, - const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS], - const char *_vendor,th_comment *_tc,ogg_packet *_op); - - - -/*Encoder-specific accelerated functions.*/ -void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], - const unsigned char *_src,const unsigned char *_ref,int _ystride); -void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], - const unsigned char *_src,int _ystride); -unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src, - const unsigned char *_ref,int _ystride); -unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref1, - const unsigned char *_ref2,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc, - const unsigned char *_src,const unsigned char *_ref1, - const unsigned char *_ref2,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc, - const unsigned char *_src,int _ystride); -void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride); -void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc, - unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]); -void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); -void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64], - const ogg_int16_t _x[64]); - -/*Default pure-C implementations.*/ -void oc_enc_vtable_init_c(oc_enc_ctx *_enc); - -void oc_enc_frag_sub_c(ogg_int16_t _diff[64], - const unsigned char *_src,const unsigned char *_ref,int _ystride); -void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64], - const unsigned char *_src,int _ystride); -void oc_enc_frag_copy2_c(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride); -unsigned oc_enc_frag_sad_c(const unsigned char *_src, - const unsigned char *_ref,int _ystride); -unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride); -void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]); - -#endif diff --git a/drivers/theora/encode.c b/drivers/theora/encode.c deleted file mode 100644 index 0c5ea6a172..0000000000 --- a/drivers/theora/encode.c +++ /dev/null @@ -1,1615 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encode.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include -#include "encint.h" -#if defined(OC_X86_ASM) -# include "x86/x86enc.h" -#endif - - - -/*The default quantization parameters used by VP3.1.*/ -static const int OC_VP31_RANGE_SIZES[1]={63}; -static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={ - { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 58, 68,109,103, 77, - 24, 35, 55, 64, 81,104,113, 92, - 49, 64, 78, 87,103,121,120,101, - 72, 92, 95, 98,112,100,103, 99 - }, - { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 58, 68,109,103, 77, - 24, 35, 55, 64, 81,104,113, 92, - 49, 64, 78, 87,103,121,120,101, - 72, 92, 95, 98,112,100,103, 99 - } -}; -static const th_quant_base OC_VP31_BASES_INTRA_C[2]={ - { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - }, - { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - } -}; -static const th_quant_base OC_VP31_BASES_INTER[2]={ - { - 16, 16, 16, 20, 24, 28, 32, 40, - 16, 16, 20, 24, 28, 32, 40, 48, - 16, 20, 24, 28, 32, 40, 48, 64, - 20, 24, 28, 32, 40, 48, 64, 64, - 24, 28, 32, 40, 48, 64, 64, 64, - 28, 32, 40, 48, 64, 64, 64, 96, - 32, 40, 48, 64, 64, 64, 96,128, - 40, 48, 64, 64, 64, 96,128,128 - }, - { - 16, 16, 16, 20, 24, 28, 32, 40, - 16, 16, 20, 24, 28, 32, 40, 48, - 16, 20, 24, 28, 32, 40, 48, 64, - 20, 24, 28, 32, 40, 48, 64, 64, - 24, 28, 32, 40, 48, 64, 64, 64, - 28, 32, 40, 48, 64, 64, 64, 96, - 32, 40, 48, 64, 64, 64, 96,128, - 40, 48, 64, 64, 64, 96,128,128 - } -}; - -const th_quant_info TH_VP31_QUANT_INFO={ - { - 220,200,190,180,170,170,160,160, - 150,150,140,140,130,130,120,120, - 110,110,100,100, 90, 90, 90, 80, - 80, 80, 70, 70, 70, 60, 60, 60, - 60, 50, 50, 50, 50, 40, 40, 40, - 40, 40, 30, 30, 30, 30, 30, 30, - 30, 20, 20, 20, 20, 20, 20, 20, - 20, 10, 10, 10, 10, 10, 10, 10 - }, - { - 500,450,400,370,340,310,285,265, - 245,225,210,195,185,180,170,160, - 150,145,135,130,125,115,110,107, - 100, 96, 93, 89, 85, 82, 75, 74, - 70, 68, 64, 60, 57, 56, 52, 50, - 49, 45, 44, 43, 40, 38, 37, 35, - 33, 32, 30, 29, 28, 25, 24, 22, - 21, 19, 18, 17, 15, 13, 12, 10 - }, - { - 30,25,20,20,15,15,14,14, - 13,13,12,12,11,11,10,10, - 9, 9, 8, 8, 7, 7, 7, 7, - 6, 6, 6, 6, 5, 5, 5, 5, - 4, 4, 4, 4, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }, - { - { - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C} - }, - { - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, - {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER} - } - } -}; - -/*The current default quantization parameters.*/ -static const int OC_DEF_QRANGE_SIZES[3]={32,16,15}; -static const th_quant_base OC_DEF_BASES_INTRA_Y[4]={ - { - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, - }, - { - 15, 12, 12, 15, 18, 20, 20, 21, - 13, 13, 14, 17, 18, 21, 21, 20, - 14, 14, 15, 18, 20, 21, 21, 21, - 14, 16, 17, 19, 20, 21, 21, 21, - 16, 17, 20, 21, 21, 21, 21, 21, - 18, 19, 20, 21, 21, 21, 21, 21, - 20, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21 - }, - { - 16, 12, 11, 16, 20, 25, 27, 28, - 13, 13, 14, 18, 21, 28, 28, 27, - 14, 13, 16, 20, 25, 28, 28, 28, - 14, 16, 19, 22, 27, 29, 29, 28, - 17, 19, 25, 28, 28, 30, 30, 29, - 20, 24, 27, 28, 29, 30, 30, 29, - 27, 28, 29, 29, 30, 30, 30, 30, - 29, 29, 29, 29, 30, 30, 30, 29 - }, - { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 58, 68,109,103, 77, - 24, 35, 55, 64, 81,104,113, 92, - 49, 64, 78, 87,103,121,120,101, - 72, 92, 95, 98,112,100,103, 99 - } -}; -static const th_quant_base OC_DEF_BASES_INTRA_C[4]={ - { - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19 - }, - { - 18, 18, 21, 25, 26, 26, 26, 26, - 18, 20, 22, 26, 26, 26, 26, 26, - 21, 22, 25, 26, 26, 26, 26, 26, - 25, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26 - }, - { - 17, 18, 22, 31, 36, 36, 36, 36, - 18, 20, 24, 34, 36, 36, 36, 36, - 22, 24, 33, 36, 36, 36, 36, 36, - 31, 34, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36 - }, - { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - } -}; -static const th_quant_base OC_DEF_BASES_INTER[4]={ - { - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21 - }, - { - 18, 18, 18, 21, 23, 24, 25, 27, - 18, 18, 21, 23, 24, 25, 27, 28, - 18, 21, 23, 24, 25, 27, 28, 29, - 21, 23, 24, 25, 27, 28, 29, 29, - 23, 24, 25, 27, 28, 29, 29, 29, - 24, 25, 27, 28, 29, 29, 29, 30, - 25, 27, 28, 29, 29, 29, 30, 30, - 27, 28, 29, 29, 29, 30, 30, 30 - }, - { - 17, 17, 17, 20, 23, 26, 28, 32, - 17, 17, 20, 23, 26, 28, 32, 34, - 17, 20, 23, 26, 28, 32, 34, 37, - 20, 23, 26, 28, 32, 34, 37, 37, - 23, 26, 28, 32, 34, 37, 37, 37, - 26, 28, 32, 34, 37, 37, 37, 41, - 28, 32, 34, 37, 37, 37, 41, 42, - 32, 34, 37, 37, 37, 41, 42, 42 - }, - { - 16, 16, 16, 20, 24, 28, 32, 40, - 16, 16, 20, 24, 28, 32, 40, 48, - 16, 20, 24, 28, 32, 40, 48, 64, - 20, 24, 28, 32, 40, 48, 64, 64, - 24, 28, 32, 40, 48, 64, 64, 64, - 28, 32, 40, 48, 64, 64, 64, 96, - 32, 40, 48, 64, 64, 64, 96,128, - 40, 48, 64, 64, 64, 96,128,128 - } -}; - -const th_quant_info TH_DEF_QUANT_INFO={ - { - 365,348,333,316,300,287,277,265, - 252,240,229,219,206,197,189,180, - 171,168,160,153,146,139,132,127, - 121,115,110,107,101, 97, 94, 89, - 85, 83, 78, 73, 72, 67, 66, 62, - 60, 59, 56, 53, 52, 48, 47, 43, - 42, 40, 36, 35, 34, 33, 31, 30, - 28, 25, 24, 22, 20, 17, 14, 10 - }, - { - 365,348,333,316,300,287,277,265, - 252,240,229,219,206,197,189,180, - 171,168,160,153,146,139,132,127, - 121,115,110,107,101, 97, 94, 89, - 85, 83, 78, 73, 72, 67, 66, 62, - 60, 59, 56, 53, 52, 48, 47, 43, - 42, 40, 36, 35, 34, 33, 31, 30, - 28, 25, 24, 22, 20, 17, 14, 10 - }, - { - 30,25,20,20,15,15,14,14, - 13,13,12,12,11,11,10,10, - 9, 9, 8, 8, 7, 7, 7, 7, - 6, 6, 6, 6, 5, 5, 5, 5, - 4, 4, 4, 4, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }, - { - { - {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_Y}, - {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C}, - {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C} - }, - { - {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}, - {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}, - {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER} - } - } -}; - - - -/*The Huffman codes used for macro block modes.*/ - -const unsigned char OC_MODE_BITS[2][OC_NMODES]={ - /*Codebook 0: a maximally skewed prefix code.*/ - {1,2,3,4,5,6,7,7}, - /*Codebook 1: a fixed-length code.*/ - {3,3,3,3,3,3,3,3} -}; - -static const unsigned char OC_MODE_CODES[2][OC_NMODES]={ - /*Codebook 0: a maximally skewed prefix code.*/ - {0x00,0x02,0x06,0x0E,0x1E,0x3E,0x7E,0x7F}, - /*Codebook 1: a fixed-length code.*/ - {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07} -}; - - -/*The Huffman codes used for motion vectors.*/ - -const unsigned char OC_MV_BITS[2][64]={ - /*Codebook 0: VLC code.*/ - { - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,7,7,7,7,7,7,7,7,6,6,6,6,4,4,3, - 3, - 3,4,4,6,6,6,6,7,7,7,7,7,7,7,7,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 - }, - /*Codebook 1: (5 bit magnitude, 1 bit sign). - This wastes a code word (0x01, negative zero), or a bit (0x00, positive - zero, requires only 5 bits to uniquely decode), but is hopefully not used - very often.*/ - { - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 - } -}; - -static const unsigned char OC_MV_CODES[2][64]={ - /*Codebook 0: VLC code.*/ - { - 0xFF,0xFD,0xFB,0xF9,0xF7,0xF5,0xF3, - 0xF1,0xEF,0xED,0xEB,0xE9,0xE7,0xE5,0xE3, - 0xE1,0x6F,0x6D,0x6B,0x69,0x67,0x65,0x63, - 0x61,0x2F,0x2D,0x2B,0x29,0x09,0x07,0x02, - 0x00, - 0x01,0x06,0x08,0x28,0x2A,0x2C,0x2E,0x60, - 0x62,0x64,0x66,0x68,0x6A,0x6C,0x6E,0xE0, - 0xE2,0xE4,0xE6,0xE8,0xEA,0xEC,0xEE,0xF0, - 0xF2,0xF4,0xF6,0xF8,0xFA,0xFC,0xFE - }, - /*Codebook 1: (5 bit magnitude, 1 bit sign).*/ - { - 0x3F,0x3D,0x3B,0x39,0x37,0x35,0x33, - 0x31,0x2F,0x2D,0x2B,0x29,0x27,0x25,0x23, - 0x21,0x1F,0x1D,0x1B,0x19,0x17,0x15,0x13, - 0x11,0x0F,0x0D,0x0B,0x09,0x07,0x05,0x03, - 0x00, - 0x02,0x04,0x06,0x08,0x0A,0x0C,0x0E,0x10, - 0x12,0x14,0x16,0x18,0x1A,0x1C,0x1E,0x20, - 0x22,0x24,0x26,0x28,0x2A,0x2C,0x2E,0x30, - 0x32,0x34,0x36,0x38,0x3A,0x3C,0x3E - } -}; - - - -/*Super block run coding scheme: - Codeword Run Length - 0 1 - 10x 2-3 - 110x 4-5 - 1110xx 6-9 - 11110xxx 10-17 - 111110xxxx 18-33 - 111111xxxxxxxxxxxx 34-4129*/ -const ogg_uint16_t OC_SB_RUN_VAL_MIN[8]={1,2,4,6,10,18,34,4130}; -static const unsigned OC_SB_RUN_CODE_PREFIX[7]={ - 0,4,0xC,0x38,0xF0,0x3E0,0x3F000 -}; -const unsigned char OC_SB_RUN_CODE_NBITS[7]={1,3,4,6,8,10,18}; - - -/*Writes the bit pattern for the run length of a super block run to the given - oggpack_buffer. - _opb: The buffer to write to. - _run_count: The length of the run, which must be positive. - _flag: The current flag. - _done: Whether or not more flags are to be encoded.*/ -static void oc_sb_run_pack(oggpack_buffer *_opb,ptrdiff_t _run_count, - int _flag,int _done){ - int i; - if(_run_count>=4129){ - do{ - oggpackB_write(_opb,0x3FFFF,18); - _run_count-=4129; - if(_run_count>0)oggpackB_write(_opb,_flag,1); - else if(!_done)oggpackB_write(_opb,!_flag,1); - } - while(_run_count>=4129); - if(_run_count<=0)return; - } - for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++); - oggpackB_write(_opb,OC_SB_RUN_CODE_PREFIX[i]+_run_count-OC_SB_RUN_VAL_MIN[i], - OC_SB_RUN_CODE_NBITS[i]); -} - - - -/*Block run coding scheme: - Codeword Run Length - 0x 1-2 - 10x 3-4 - 110x 5-6 - 1110xx 7-10 - 11110xx 11-14 - 11111xxxx 15-30*/ -const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]={ - 2,2,3,3,4,4,6,6,6,6,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9 -}; -static const ogg_uint16_t OC_BLOCK_RUN_CODE_PATTERN[30]={ - 0x000,0x001,0x004,0x005,0x00C,0x00D,0x038, - 0x039,0x03A,0x03B,0x078,0x079,0x07A,0x07B,0x1F0, - 0x1F1,0x1F2,0x1F3,0x1F4,0x1F5,0x1F6,0x1F7,0x1F8, - 0x1F9,0x1FA,0x1FB,0x1FC,0x1FD,0x1FE,0x1FF -}; - - -/*Writes the bit pattern for the run length of a block run to the given - oggpack_buffer. - _opb: The buffer to write to. - _run_count: The length of the run. - This must be positive, and no more than 30.*/ -static void oc_block_run_pack(oggpack_buffer *_opb,int _run_count){ - oggpackB_write(_opb,OC_BLOCK_RUN_CODE_PATTERN[_run_count-1], - OC_BLOCK_RUN_CODE_NBITS[_run_count-1]); -} - - - -static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){ - /*Mark this as a data packet.*/ - oggpackB_write(&_enc->opb,0,1); - /*Output the frame type (key frame or delta frame).*/ - oggpackB_write(&_enc->opb,_enc->state.frame_type,1); - /*Write out the current qi list.*/ - oggpackB_write(&_enc->opb,_enc->state.qis[0],6); - if(_enc->state.nqis>1){ - oggpackB_write(&_enc->opb,1,1); - oggpackB_write(&_enc->opb,_enc->state.qis[1],6); - if(_enc->state.nqis>2){ - oggpackB_write(&_enc->opb,1,1); - oggpackB_write(&_enc->opb,_enc->state.qis[2],6); - } - else oggpackB_write(&_enc->opb,0,1); - } - else oggpackB_write(&_enc->opb,0,1); - if(_enc->state.frame_type==OC_INTRA_FRAME){ - /*Key frames have 3 unused configuration bits, holdovers from the VP3 days. - Most of the other unused bits in the VP3 headers were eliminated. - Monty kept these to leave us some wiggle room for future expansion, - though a single bit in all frames would have been far more useful.*/ - oggpackB_write(&_enc->opb,0,3); - } -} - -/*Writes the bit flags for whether or not each super block is partially coded - or not. - These flags are run-length encoded, with the flag value alternating between - each run. - Return: The number partially coded SBs.*/ -static unsigned oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){ - const oc_sb_flags *sb_flags; - unsigned nsbs; - unsigned sbi; - unsigned npartial; - int flag; - sb_flags=_enc->state.sb_flags; - nsbs=_enc->state.nsbs; - flag=sb_flags[0].coded_partially; - oggpackB_write(&_enc->opb,flag,1); - sbi=npartial=0; - do{ - unsigned run_count; - for(run_count=0;sbiopb,run_count,flag,sbi>=nsbs); - flag=!flag; - } - while(sbistate.sb_flags; - nsbs=_enc->state.nsbs; - /*Skip partially coded super blocks; their flags have already been coded.*/ - for(sbi=0;sb_flags[sbi].coded_partially;sbi++); - flag=sb_flags[sbi].coded_fully; - oggpackB_write(&_enc->opb,flag,1); - do{ - unsigned run_count; - for(run_count=0;sbiopb,run_count,flag,sbi>=nsbs); - flag=!flag; - } - while(sbistate.nsbs)oc_enc_coded_sb_flags_pack(_enc); - sb_maps=(const oc_sb_map *)_enc->state.sb_maps; - sb_flags=_enc->state.sb_flags; - nsbs=_enc->state.nsbs; - frags=_enc->state.frags; - for(sbi=0;sbiopb,flag,1); - run_count=0; - nsbs=sbi=0; - for(pli=0;pli<3;pli++){ - nsbs+=_enc->state.fplanes[pli].nsbs; - for(;sbi=0){ - if(frags[fragi].coded!=flag){ - oc_block_run_pack(&_enc->opb,run_count); - flag=!flag; - run_count=1; - } - else run_count++; - } - } - } - } - } - } - /*Flush any trailing block coded run.*/ - if(run_count>0)oc_block_run_pack(&_enc->opb,run_count); - } -} - -static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){ - const unsigned char *mode_codes; - const unsigned char *mode_bits; - const unsigned char *mode_ranks; - unsigned *coded_mbis; - size_t ncoded_mbis; - const signed char *mb_modes; - unsigned mbii; - int scheme; - int mb_mode; - scheme=_enc->chooser.scheme_list[0]; - /*Encode the best scheme.*/ - oggpackB_write(&_enc->opb,scheme,3); - /*If the chosen scheme is scheme 0, send the mode frequency ordering.*/ - if(scheme==0){ - for(mb_mode=0;mb_modeopb,_enc->chooser.scheme0_ranks[mb_mode],3); - } - } - mode_ranks=_enc->chooser.mode_ranks[scheme]; - mode_bits=OC_MODE_BITS[scheme+1>>3]; - mode_codes=OC_MODE_CODES[scheme+1>>3]; - coded_mbis=_enc->coded_mbis; - ncoded_mbis=_enc->ncoded_mbis; - mb_modes=_enc->state.mb_modes; - for(mbii=0;mbiiopb,mode_codes[rank],mode_bits[rank]); - } -} - -static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _mv_scheme,int _dx,int _dy){ - oggpackB_write(&_enc->opb, - OC_MV_CODES[_mv_scheme][_dx+31],OC_MV_BITS[_mv_scheme][_dx+31]); - oggpackB_write(&_enc->opb, - OC_MV_CODES[_mv_scheme][_dy+31],OC_MV_BITS[_mv_scheme][_dy+31]); -} - -static void oc_enc_mvs_pack(oc_enc_ctx *_enc){ - const unsigned *coded_mbis; - size_t ncoded_mbis; - const oc_mb_map *mb_maps; - const signed char *mb_modes; - const oc_fragment *frags; - const oc_mv *frag_mvs; - unsigned mbii; - int mv_scheme; - /*Choose the coding scheme.*/ - mv_scheme=_enc->mv_bits[1]<_enc->mv_bits[0]; - oggpackB_write(&_enc->opb,mv_scheme,1); - /*Encode the motion vectors. - Macro blocks are iterated in Hilbert scan order, but the MVs within the - macro block are coded in raster order.*/ - coded_mbis=_enc->coded_mbis; - ncoded_mbis=_enc->ncoded_mbis; - mb_modes=_enc->state.mb_modes; - mb_maps=(const oc_mb_map *)_enc->state.mb_maps; - frags=_enc->state.frags; - frag_mvs=(const oc_mv *)_enc->state.frag_mvs; - for(mbii=0;mbiistate.nqis<=1)return; - ncoded_fragis=_enc->state.ntotal_coded_fragis; - if(ncoded_fragis<=0)return; - coded_fragis=_enc->state.coded_fragis; - frags=_enc->state.frags; - flag=!!frags[coded_fragis[0]].qii; - oggpackB_write(&_enc->opb,flag,1); - nqi0=0; - for(fragii=0;fragiiopb,run_count,flag,fragii>=ncoded_fragis); - flag=!flag; - } - if(_enc->state.nqis<3||nqi0>=ncoded_fragis)return; - for(fragii=0;!frags[coded_fragis[fragii]].qii;fragii++); - flag=frags[coded_fragis[fragii]].qii-1; - oggpackB_write(&_enc->opb,flag,1); - while(fragiiopb,run_count,flag,fragii>=ncoded_fragis); - flag=!flag; - } -} - -/*Counts the tokens of each type used for the given range of coefficient - indices in zig-zag order. - _zzi_start: The first zig-zag index to include. - _zzi_end: The first zig-zag index to not include. - _token_counts_y: Returns the token counts for the Y' plane. - _token_counts_c: Returns the token counts for the Cb and Cr planes.*/ -static void oc_enc_count_tokens(oc_enc_ctx *_enc,int _zzi_start,int _zzi_end, - ptrdiff_t _token_counts_y[32],ptrdiff_t _token_counts_c[32]){ - const unsigned char *dct_tokens; - ptrdiff_t ndct_tokens; - int pli; - int zzi; - ptrdiff_t ti; - memset(_token_counts_y,0,32*sizeof(*_token_counts_y)); - memset(_token_counts_c,0,32*sizeof(*_token_counts_c)); - for(zzi=_zzi_start;zzi<_zzi_end;zzi++){ - dct_tokens=_enc->dct_tokens[0][zzi]; - ndct_tokens=_enc->ndct_tokens[0][zzi]; - for(ti=_enc->dct_token_offs[0][zzi];tidct_tokens[pli][zzi]; - ndct_tokens=_enc->ndct_tokens[pli][zzi]; - for(ti=_enc->dct_token_offs[pli][zzi];tihuff_codes[huffi+huff_offs][token].nbits; - } - } -} - -/*Returns the Huffman index using the fewest number of bits.*/ -static int oc_select_huff_idx(size_t _bit_counts[16]){ - int best_huffi; - int huffi; - best_huffi=0; - for(huffi=1;huffi<16;huffi++)if(_bit_counts[huffi]<_bit_counts[best_huffi]){ - best_huffi=huffi; - } - return best_huffi; -} - -static void oc_enc_huff_group_pack(oc_enc_ctx *_enc, - int _zzi_start,int _zzi_end,const int _huff_idxs[2]){ - int zzi; - for(zzi=_zzi_start;zzi<_zzi_end;zzi++){ - int pli; - for(pli=0;pli<3;pli++){ - const unsigned char *dct_tokens; - const ogg_uint16_t *extra_bits; - ptrdiff_t ndct_tokens; - const th_huff_code *huff_codes; - ptrdiff_t ti; - dct_tokens=_enc->dct_tokens[pli][zzi]; - extra_bits=_enc->extra_bits[pli][zzi]; - ndct_tokens=_enc->ndct_tokens[pli][zzi]; - huff_codes=_enc->huff_codes[_huff_idxs[pli+1>>1]]; - for(ti=_enc->dct_token_offs[pli][zzi];tiopb,huff_codes[token].pattern, - huff_codes[token].nbits); - neb=OC_DCT_TOKEN_EXTRA_BITS[token]; - if(neb)oggpackB_write(&_enc->opb,extra_bits[ti],neb); - } - } - } -} - -static void oc_enc_residual_tokens_pack(oc_enc_ctx *_enc){ - static const unsigned char OC_HUFF_GROUP_MIN[6]={0,1,6,15,28,64}; - static const unsigned char *OC_HUFF_GROUP_MAX=OC_HUFF_GROUP_MIN+1; - ptrdiff_t token_counts_y[32]; - ptrdiff_t token_counts_c[32]; - size_t bits_y[16]; - size_t bits_c[16]; - int huff_idxs[2]; - int frame_type; - int hgi; - frame_type=_enc->state.frame_type; - /*Choose which Huffman tables to use for the DC token list.*/ - oc_enc_count_tokens(_enc,0,1,token_counts_y,token_counts_c); - memset(bits_y,0,sizeof(bits_y)); - memset(bits_c,0,sizeof(bits_c)); - oc_enc_count_bits(_enc,0,token_counts_y,bits_y); - oc_enc_count_bits(_enc,0,token_counts_c,bits_c); - huff_idxs[0]=oc_select_huff_idx(bits_y); - huff_idxs[1]=oc_select_huff_idx(bits_c); - /*Write the DC token list with the chosen tables.*/ - oggpackB_write(&_enc->opb,huff_idxs[0],4); - oggpackB_write(&_enc->opb,huff_idxs[1],4); - _enc->huff_idxs[frame_type][0][0]=(unsigned char)huff_idxs[0]; - _enc->huff_idxs[frame_type][0][1]=(unsigned char)huff_idxs[1]; - oc_enc_huff_group_pack(_enc,0,1,huff_idxs); - /*Choose which Huffman tables to use for the AC token lists.*/ - memset(bits_y,0,sizeof(bits_y)); - memset(bits_c,0,sizeof(bits_c)); - for(hgi=1;hgi<5;hgi++){ - oc_enc_count_tokens(_enc,OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi], - token_counts_y,token_counts_c); - oc_enc_count_bits(_enc,hgi,token_counts_y,bits_y); - oc_enc_count_bits(_enc,hgi,token_counts_c,bits_c); - } - huff_idxs[0]=oc_select_huff_idx(bits_y); - huff_idxs[1]=oc_select_huff_idx(bits_c); - /*Write the AC token lists using the chosen tables.*/ - oggpackB_write(&_enc->opb,huff_idxs[0],4); - oggpackB_write(&_enc->opb,huff_idxs[1],4); - _enc->huff_idxs[frame_type][1][0]=(unsigned char)huff_idxs[0]; - _enc->huff_idxs[frame_type][1][1]=(unsigned char)huff_idxs[1]; - for(hgi=1;hgi<5;hgi++){ - huff_idxs[0]+=16; - huff_idxs[1]+=16; - oc_enc_huff_group_pack(_enc, - OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],huff_idxs); - } -} - -static void oc_enc_frame_pack(oc_enc_ctx *_enc){ - oggpackB_reset(&_enc->opb); - /*Only proceed if we have some coded blocks. - If there are no coded blocks, we can drop this frame simply by emitting a - 0 byte packet.*/ - if(_enc->state.ntotal_coded_fragis>0){ - oc_enc_frame_header_pack(_enc); - if(_enc->state.frame_type==OC_INTER_FRAME){ - /*Coded block flags, MB modes, and MVs are only needed for delta frames.*/ - oc_enc_coded_flags_pack(_enc); - oc_enc_mb_modes_pack(_enc); - oc_enc_mvs_pack(_enc); - } - oc_enc_block_qis_pack(_enc); - oc_enc_tokenize_finish(_enc); - oc_enc_residual_tokens_pack(_enc); - } - /*Success: Mark the packet as ready to be flushed.*/ - _enc->packet_state=OC_PACKET_READY; -#if defined(OC_COLLECT_METRICS) - oc_enc_mode_metrics_collect(_enc); -#endif -} - - -void oc_enc_vtable_init_c(oc_enc_ctx *_enc){ - /*The implementations prefixed with oc_enc_ are encoder-specific. - The rest we re-use from the decoder.*/ - _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c; - _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c; - _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c; - _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c; - _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c; - _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c; - _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c; - _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c; - _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c; - _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; - _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; - _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c; -} - -/*Initialize the macro block neighbor lists for MC analysis. - This assumes that the entire mb_info memory region has been initialized with - zeros.*/ -static void oc_enc_mb_info_init(oc_enc_ctx *_enc){ - oc_mb_enc_info *embs; - const signed char *mb_modes; - unsigned nhsbs; - unsigned nvsbs; - unsigned nhmbs; - unsigned nvmbs; - unsigned sby; - mb_modes=_enc->state.mb_modes; - embs=_enc->mb_info; - nhsbs=_enc->state.fplanes[0].nhsbs; - nvsbs=_enc->state.fplanes[0].nvsbs; - nhmbs=_enc->state.nhmbs; - nvmbs=_enc->state.nvmbs; - for(sby=0;sby>1); - mby=2*sby+(quadi+1>>1&1); - /*Fill in the neighbors with current motion vectors available.*/ - for(ni=0;ni=nhmbs||nmby<0||nmby>=nvmbs)continue; - nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1]; - if(mb_modes[nmbi]==OC_MODE_INVALID)continue; - embs[mbi].cneighbors[embs[mbi].ncneighbors++]=nmbi; - } - /*Fill in the neighbors with previous motion vectors available.*/ - for(ni=0;ni<4;ni++){ - nmbx=mbx+PDX[ni]; - nmby=mby+PDY[ni]; - if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue; - nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1]; - if(mb_modes[nmbi]==OC_MODE_INVALID)continue; - embs[mbi].pneighbors[embs[mbi].npneighbors++]=nmbi; - } - } - } - } -} - -static int oc_enc_set_huffman_codes(oc_enc_ctx *_enc, - const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){ - int ret; - if(_enc==NULL)return TH_EFAULT; - if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL; - if(_codes==NULL)_codes=TH_VP31_HUFF_CODES; - /*Validate the codes.*/ - oggpackB_reset(&_enc->opb); - ret=oc_huff_codes_pack(&_enc->opb,_codes); - if(ret<0)return ret; - memcpy(_enc->huff_codes,_codes,sizeof(_enc->huff_codes)); - return 0; -} - -/*Sets the quantization parameters to use. - This may only be called before the setup header is written. - If it is called multiple times, only the last call has any effect. - _qinfo: The quantization parameters. - These are described in more detail in theoraenc.h. - This can be NULL, in which case the default quantization parameters - will be used.*/ -static int oc_enc_set_quant_params(oc_enc_ctx *_enc, - const th_quant_info *_qinfo){ - int qi; - int pli; - int qti; - if(_enc==NULL)return TH_EFAULT; - if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL; - if(_qinfo==NULL)_qinfo=&TH_DEF_QUANT_INFO; - /*TODO: Analyze for packing purposes instead of just doing a shallow copy.*/ - memcpy(&_enc->qinfo,_qinfo,sizeof(_enc->qinfo)); - for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ - _enc->state.dequant_tables[qi][pli][qti]= - _enc->state.dequant_table_data[qi][pli][qti]; - _enc->enquant_tables[qi][pli][qti]=_enc->enquant_table_data[qi][pli][qti]; - } - oc_enquant_tables_init(_enc->state.dequant_tables, - _enc->enquant_tables,_qinfo); - memcpy(_enc->state.loop_filter_limits,_qinfo->loop_filter_limits, - sizeof(_enc->state.loop_filter_limits)); - oc_enquant_qavg_init(_enc->log_qavg,_enc->state.dequant_tables, - _enc->state.info.pixel_fmt); - return 0; -} - -static void oc_enc_clear(oc_enc_ctx *_enc); - -static int oc_enc_init(oc_enc_ctx *_enc,const th_info *_info){ - th_info info; - size_t mcu_nmbs; - ptrdiff_t mcu_nfrags; - int hdec; - int vdec; - int ret; - int pli; - /*Clean up the requested settings.*/ - memcpy(&info,_info,sizeof(info)); - info.version_major=TH_VERSION_MAJOR; - info.version_minor=TH_VERSION_MINOR; - info.version_subminor=TH_VERSION_SUB; - if(info.quality>63)info.quality=63; - if(info.quality<0)info.quality=32; - if(info.target_bitrate<0)info.target_bitrate=0; - /*Initialize the shared encoder/decoder state.*/ - ret=oc_state_init(&_enc->state,&info,4); - if(ret<0)return ret; - _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info)); - _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc)); - _enc->coded_mbis= - (unsigned *)_ogg_malloc(_enc->state.nmbs*sizeof(*_enc->coded_mbis)); - hdec=!(_enc->state.info.pixel_fmt&1); - vdec=!(_enc->state.info.pixel_fmt&2); - /*If chroma is sub-sampled in the vertical direction, we have to encode two - super block rows of Y' for each super block row of Cb and Cr.*/ - _enc->mcu_nvsbs=1<mcu_nvsbs*_enc->state.fplanes[0].nhsbs*(size_t)4; - mcu_nfrags=4*mcu_nmbs+(8*mcu_nmbs>>hdec+vdec); - _enc->mcu_skip_ssd=(unsigned *)_ogg_malloc( - mcu_nfrags*sizeof(*_enc->mcu_skip_ssd)); - for(pli=0;pli<3;pli++){ - _enc->dct_tokens[pli]=(unsigned char **)oc_malloc_2d(64, - _enc->state.fplanes[pli].nfrags,sizeof(**_enc->dct_tokens)); - _enc->extra_bits[pli]=(ogg_uint16_t **)oc_malloc_2d(64, - _enc->state.fplanes[pli].nfrags,sizeof(**_enc->extra_bits)); - } -#if defined(OC_COLLECT_METRICS) - _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd)); - _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd)); -#endif -#if defined(OC_X86_ASM) - oc_enc_vtable_init_x86(_enc); -#else - oc_enc_vtable_init_c(_enc); -#endif - _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift; - _enc->state.qis[0]=_enc->state.info.quality; - _enc->state.nqis=1; - oc_rc_state_init(&_enc->rc,_enc); - oggpackB_writeinit(&_enc->opb); - if(_enc->mb_info==NULL||_enc->frag_dc==NULL||_enc->coded_mbis==NULL|| - _enc->mcu_skip_ssd==NULL||_enc->dct_tokens[0]==NULL|| - _enc->dct_tokens[1]==NULL||_enc->dct_tokens[2]==NULL|| - _enc->extra_bits[0]==NULL||_enc->extra_bits[1]==NULL|| - _enc->extra_bits[2]==NULL -#if defined(OC_COLLECT_METRICS) - ||_enc->frag_satd==NULL||_enc->frag_ssd==NULL -#endif - ){ - oc_enc_clear(_enc); - return TH_EFAULT; - } - oc_mode_scheme_chooser_init(&_enc->chooser); - oc_enc_mb_info_init(_enc); - memset(_enc->huff_idxs,0,sizeof(_enc->huff_idxs)); - /*Reset the packet-out state machine.*/ - _enc->packet_state=OC_PACKET_INFO_HDR; - _enc->dup_count=0; - _enc->nqueued_dups=0; - _enc->prev_dup_count=0; - /*Enable speed optimizations up through early skip by default.*/ - _enc->sp_level=OC_SP_LEVEL_EARLY_SKIP; - /*Disable VP3 compatibility by default.*/ - _enc->vp3_compatible=0; - /*No INTER frames coded yet.*/ - _enc->coded_inter_frame=0; - memcpy(_enc->huff_codes,TH_VP31_HUFF_CODES,sizeof(_enc->huff_codes)); - oc_enc_set_quant_params(_enc,NULL); - return 0; -} - -static void oc_enc_clear(oc_enc_ctx *_enc){ - int pli; - oc_rc_state_clear(&_enc->rc); -#if defined(OC_COLLECT_METRICS) - oc_enc_mode_metrics_dump(_enc); -#endif - oggpackB_writeclear(&_enc->opb); -#if defined(OC_COLLECT_METRICS) - _ogg_free(_enc->frag_ssd); - _ogg_free(_enc->frag_satd); -#endif - for(pli=3;pli-->0;){ - oc_free_2d(_enc->extra_bits[pli]); - oc_free_2d(_enc->dct_tokens[pli]); - } - _ogg_free(_enc->mcu_skip_ssd); - _ogg_free(_enc->coded_mbis); - _ogg_free(_enc->frag_dc); - _ogg_free(_enc->mb_info); - oc_state_clear(&_enc->state); -} - -static void oc_enc_drop_frame(th_enc_ctx *_enc){ - /*Use the previous frame's reconstruction.*/ - _enc->state.ref_frame_idx[OC_FRAME_SELF]= - _enc->state.ref_frame_idx[OC_FRAME_PREV]; - /*Flag motion vector analysis about the frame drop.*/ - _enc->prevframe_dropped=1; - /*Zero the packet.*/ - oggpackB_reset(&_enc->opb); -} - -static void oc_enc_compress_keyframe(oc_enc_ctx *_enc,int _recode){ - if(_enc->state.info.target_bitrate>0){ - _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTRA_FRAME, - _enc->state.curframe_num>0); - _enc->state.nqis=1; - } - oc_enc_calc_lambda(_enc,OC_INTRA_FRAME); - oc_enc_analyze_intra(_enc,_recode); - oc_enc_frame_pack(_enc); - /*On the first frame, the previous call was an initial dry-run to prime - feed-forward statistics.*/ - if(!_recode&&_enc->state.curframe_num==0){ - if(_enc->state.info.target_bitrate>0){ - oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, - OC_INTRA_FRAME,_enc->state.qis[0],1,0); - } - oc_enc_compress_keyframe(_enc,1); - } -} - -static void oc_enc_compress_frame(oc_enc_ctx *_enc,int _recode){ - if(_enc->state.info.target_bitrate>0){ - _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTER_FRAME,1); - _enc->state.nqis=1; - } - oc_enc_calc_lambda(_enc,OC_INTER_FRAME); - if(oc_enc_analyze_inter(_enc,_enc->rc.twopass!=2,_recode)){ - /*Mode analysis thinks this should have been a keyframe; start over.*/ - oc_enc_compress_keyframe(_enc,1); - } - else{ - oc_enc_frame_pack(_enc); - if(!_enc->coded_inter_frame){ - /*On the first INTER frame, the previous call was an initial dry-run to - prime feed-forward statistics.*/ - _enc->coded_inter_frame=1; - if(_enc->state.info.target_bitrate>0){ - /*Rate control also needs to prime.*/ - oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, - OC_INTER_FRAME,_enc->state.qis[0],1,0); - } - oc_enc_compress_frame(_enc,1); - } - } -} - -/*Set the granule position for the next packet to output based on the current - internal state.*/ -static void oc_enc_set_granpos(oc_enc_ctx *_enc){ - unsigned dup_offs; - /*Add an offset for the number of duplicate frames we've emitted so far.*/ - dup_offs=_enc->prev_dup_count-_enc->nqueued_dups; - /*If the current frame was a keyframe, use it for the high part.*/ - if(_enc->state.frame_type==OC_INTRA_FRAME){ - _enc->state.granpos=(_enc->state.curframe_num+_enc->state.granpos_bias<< - _enc->state.info.keyframe_granule_shift)+dup_offs; - } - /*Otherwise use the last keyframe in the high part and put the current frame - in the low part.*/ - else{ - _enc->state.granpos= - (_enc->state.keyframe_num+_enc->state.granpos_bias<< - _enc->state.info.keyframe_granule_shift) - +_enc->state.curframe_num-_enc->state.keyframe_num+dup_offs; - } -} - - -th_enc_ctx *th_encode_alloc(const th_info *_info){ - oc_enc_ctx *enc; - if(_info==NULL)return NULL; - enc=_ogg_malloc(sizeof(*enc)); - if(enc==NULL||oc_enc_init(enc,_info)<0){ - _ogg_free(enc); - return NULL; - } - return enc; -} - -void th_encode_free(th_enc_ctx *_enc){ - if(_enc!=NULL){ - oc_enc_clear(_enc); - _ogg_free(_enc); - } -} - -int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){ - switch(_req){ - case TH_ENCCTL_SET_HUFFMAN_CODES:{ - if(_buf==NULL&&_buf_sz!=0|| - _buf!=NULL&&_buf_sz!=sizeof(th_huff_table)*TH_NHUFFMAN_TABLES){ - return TH_EINVAL; - } - return oc_enc_set_huffman_codes(_enc,(const th_huff_table *)_buf); - }break; - case TH_ENCCTL_SET_QUANT_PARAMS:{ - if(_buf==NULL&&_buf_sz!=0|| - _buf!=NULL&&_buf_sz!=sizeof(th_quant_info)){ - return TH_EINVAL; - } - return oc_enc_set_quant_params(_enc,(th_quant_info *)_buf); - }break; - case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:{ - ogg_uint32_t keyframe_frequency_force; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(keyframe_frequency_force))return TH_EINVAL; - keyframe_frequency_force=*(ogg_uint32_t *)_buf; - if(keyframe_frequency_force<=0)keyframe_frequency_force=1; - if(_enc->packet_state==OC_PACKET_INFO_HDR){ - /*It's still early enough to enlarge keyframe_granule_shift.*/ - _enc->state.info.keyframe_granule_shift=OC_CLAMPI( - _enc->state.info.keyframe_granule_shift, - OC_ILOG_32(keyframe_frequency_force-1),31); - } - _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force, - (ogg_uint32_t)1U<<_enc->state.info.keyframe_granule_shift); - *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force; - return 0; - }break; - case TH_ENCCTL_SET_VP3_COMPATIBLE:{ - int vp3_compatible; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(vp3_compatible))return TH_EINVAL; - vp3_compatible=*(int *)_buf; - _enc->vp3_compatible=vp3_compatible; - if(oc_enc_set_huffman_codes(_enc,TH_VP31_HUFF_CODES)<0)vp3_compatible=0; - if(oc_enc_set_quant_params(_enc,&TH_VP31_QUANT_INFO)<0)vp3_compatible=0; - if(_enc->state.info.pixel_fmt!=TH_PF_420|| - _enc->state.info.pic_width<_enc->state.info.frame_width|| - _enc->state.info.pic_height<_enc->state.info.frame_height|| - /*If we have more than 4095 super blocks, VP3's RLE coding might - overflow. - We could overcome this by ensuring we flip the coded/not-coded flags on - at least one super block in the frame, but we pick the simple solution - of just telling the user the stream will be incompatible instead. - It's unlikely the old VP3 codec would be able to decode streams at this - resolution in real time in the first place.*/ - _enc->state.nsbs>4095){ - vp3_compatible=0; - } - *(int *)_buf=vp3_compatible; - return 0; - }break; - case TH_ENCCTL_GET_SPLEVEL_MAX:{ - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - *(int *)_buf=OC_SP_LEVEL_MAX; - return 0; - }break; - case TH_ENCCTL_SET_SPLEVEL:{ - int speed; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(speed))return TH_EINVAL; - speed=*(int *)_buf; - if(speed<0||speed>OC_SP_LEVEL_MAX)return TH_EINVAL; - _enc->sp_level=speed; - return 0; - }break; - case TH_ENCCTL_GET_SPLEVEL:{ - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(int))return TH_EINVAL; - *(int *)_buf=_enc->sp_level; - return 0; - } - case TH_ENCCTL_SET_DUP_COUNT:{ - int dup_count; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(dup_count))return TH_EINVAL; - dup_count=*(int *)_buf; - if(dup_count>=_enc->keyframe_frequency_force)return TH_EINVAL; - _enc->dup_count=OC_MAXI(dup_count,0); - return 0; - }break; - case TH_ENCCTL_SET_QUALITY:{ - int qi; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_enc->state.info.target_bitrate>0)return TH_EINVAL; - qi=*(int *)_buf; - if(qi<0||qi>63)return TH_EINVAL; - _enc->state.info.quality=qi; - _enc->state.qis[0]=(unsigned char)qi; - _enc->state.nqis=1; - return 0; - }break; - case TH_ENCCTL_SET_BITRATE:{ - long bitrate; - int reset; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - bitrate=*(long *)_buf; - if(bitrate<=0)return TH_EINVAL; - reset=_enc->state.info.target_bitrate<=0; - _enc->state.info.target_bitrate=bitrate>INT_MAX?INT_MAX:bitrate; - if(reset)oc_rc_state_init(&_enc->rc,_enc); - else oc_enc_rc_resize(_enc); - return 0; - }break; - case TH_ENCCTL_SET_RATE_FLAGS:{ - int set; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(set))return TH_EINVAL; - if(_enc->state.info.target_bitrate<=0)return TH_EINVAL; - set=*(int *)_buf; - _enc->rc.drop_frames=set&TH_RATECTL_DROP_FRAMES; - _enc->rc.cap_overflow=set&TH_RATECTL_CAP_OVERFLOW; - _enc->rc.cap_underflow=set&TH_RATECTL_CAP_UNDERFLOW; - return 0; - }break; - case TH_ENCCTL_SET_RATE_BUFFER:{ - int set; - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_buf_sz!=sizeof(set))return TH_EINVAL; - if(_enc->state.info.target_bitrate<=0)return TH_EINVAL; - set=*(int *)_buf; - _enc->rc.buf_delay=set; - oc_enc_rc_resize(_enc); - *(int *)_buf=_enc->rc.buf_delay; - return 0; - }break; - case TH_ENCCTL_2PASS_OUT:{ - if(_enc==NULL||_buf==NULL)return TH_EFAULT; - if(_enc->state.info.target_bitrate<=0|| - _enc->state.curframe_num>=0&&_enc->rc.twopass!=1|| - _buf_sz!=sizeof(unsigned char *)){ - return TH_EINVAL; - } - return oc_enc_rc_2pass_out(_enc,(unsigned char **)_buf); - }break; - case TH_ENCCTL_2PASS_IN:{ - if(_enc==NULL)return TH_EFAULT; - if(_enc->state.info.target_bitrate<=0|| - _enc->state.curframe_num>=0&&_enc->rc.twopass!=2){ - return TH_EINVAL; - } - return oc_enc_rc_2pass_in(_enc,_buf,_buf_sz); - }break; - default:return TH_EIMPL; - } -} - -int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){ - if(_enc==NULL)return TH_EFAULT; - return oc_state_flushheader(&_enc->state,&_enc->packet_state,&_enc->opb, - &_enc->qinfo,(const th_huff_table *)_enc->huff_codes,th_version_string(), - _tc,_op); -} - -static void oc_img_plane_copy_pad(th_img_plane *_dst,th_img_plane *_src, - ogg_int32_t _pic_x,ogg_int32_t _pic_y, - ogg_int32_t _pic_width,ogg_int32_t _pic_height){ - unsigned char *dst; - int dstride; - ogg_uint32_t frame_width; - ogg_uint32_t frame_height; - ogg_uint32_t y; - frame_width=_dst->width; - frame_height=_dst->height; - /*If we have _no_ data, just encode a dull green.*/ - if(_pic_width==0||_pic_height==0){ - dst=_dst->data; - dstride=_dst->stride; - for(y=0;ystride; - sstride=_src->stride; - dst_data=_dst->data; - src_data=_src->data; - dst=dst_data+_pic_y*(ptrdiff_t)dstride+_pic_x; - src=src_data+_pic_y*(ptrdiff_t)sstride+_pic_x; - for(y=0;y<_pic_height;y++){ - memcpy(dst,src,_pic_width); - dst+=dstride; - src+=sstride; - } - /*Step 2: Perform a low-pass extension into the padding region.*/ - /*Left side.*/ - for(x=_pic_x;x-->0;){ - dst=dst_data+_pic_y*(ptrdiff_t)dstride+x; - for(y=0;y<_pic_height;y++){ - dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1] - +(dst+(dstride&-(y+1<_pic_height)))[1]+2>>2; - dst+=dstride; - } - } - /*Right side.*/ - for(x=_pic_x+_pic_width;x0)))[0] - +(dst+(dstride&-(y+1<_pic_height)))[0]+2>>2; - dst+=dstride; - } - } - /*Top.*/ - dst=dst_data+_pic_y*(ptrdiff_t)dstride; - for(y=_pic_y;y-->0;){ - for(x=0;x0)] - +dst[x+(x+1>2; - } - dst-=dstride; - } - /*Bottom.*/ - dst=dst_data+(_pic_y+_pic_height)*(ptrdiff_t)dstride; - for(y=_pic_y+_pic_height;y0)] - +(dst-dstride)[x+(x+1>2; - } - dst+=dstride; - } - } -} - -int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){ - th_ycbcr_buffer img; - int cframe_width; - int cframe_height; - int cpic_width; - int cpic_height; - int cpic_x; - int cpic_y; - int hdec; - int vdec; - int pli; - int refi; - int drop; - /*Step 1: validate parameters.*/ - if(_enc==NULL||_img==NULL)return TH_EFAULT; - if(_enc->packet_state==OC_PACKET_DONE)return TH_EINVAL; - if(_enc->rc.twopass&&_enc->rc.twopass_buffer_bytes==0)return TH_EINVAL; - if((ogg_uint32_t)_img[0].width!=_enc->state.info.frame_width|| - (ogg_uint32_t)_img[0].height!=_enc->state.info.frame_height){ - return TH_EINVAL; - } - hdec=!(_enc->state.info.pixel_fmt&1); - vdec=!(_enc->state.info.pixel_fmt&2); - cframe_width=_enc->state.info.frame_width>>hdec; - cframe_height=_enc->state.info.frame_height>>vdec; - if(_img[1].width!=cframe_width||_img[2].width!=cframe_width|| - _img[1].height!=cframe_height||_img[2].height!=cframe_height){ - return TH_EINVAL; - } - /*Step 2: Copy the input to our internal buffer. - This lets us add padding, if necessary, so we don't have to worry about - dereferencing possibly invalid addresses, and allows us to use the same - strides and fragment offsets for both the input frame and the reference - frames.*/ - /*Flip the input buffer upside down.*/ - oc_ycbcr_buffer_flip(img,_img); - oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+0,img+0, - _enc->state.info.pic_x,_enc->state.info.pic_y, - _enc->state.info.pic_width,_enc->state.info.pic_height); - cpic_x=_enc->state.info.pic_x>>hdec; - cpic_y=_enc->state.info.pic_y>>vdec; - cpic_width=(_enc->state.info.pic_x+_enc->state.info.pic_width+hdec>>hdec) - -cpic_x; - cpic_height=(_enc->state.info.pic_y+_enc->state.info.pic_height+vdec>>vdec) - -cpic_y; - for(pli=1;pli<3;pli++){ - oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+pli,img+pli, - cpic_x,cpic_y,cpic_width,cpic_height); - } - /*Step 3: Update the buffer state.*/ - if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){ - _enc->state.ref_frame_idx[OC_FRAME_PREV]= - _enc->state.ref_frame_idx[OC_FRAME_SELF]; - if(_enc->state.frame_type==OC_INTRA_FRAME){ - /*The new frame becomes both the previous and gold reference frames.*/ - _enc->state.keyframe_num=_enc->state.curframe_num; - _enc->state.ref_frame_idx[OC_FRAME_GOLD]= - _enc->state.ref_frame_idx[OC_FRAME_SELF]; - } - } - /*Select a free buffer to use for the reconstructed version of this frame.*/ - for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]|| - refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++); - _enc->state.ref_frame_idx[OC_FRAME_SELF]=refi; - _enc->state.curframe_num+=_enc->prev_dup_count+1; - /*Step 4: Compress the frame.*/ - /*Start with a keyframe, and don't allow the generation of invalid files that - overflow the keyframe_granule_shift.*/ - if(_enc->rc.twopass_force_kf||_enc->state.curframe_num==0|| - _enc->state.curframe_num-_enc->state.keyframe_num+_enc->dup_count>= - _enc->keyframe_frequency_force){ - oc_enc_compress_keyframe(_enc,0); - drop=0; - } - else{ - oc_enc_compress_frame(_enc,0); - drop=1; - } - oc_restore_fpu(&_enc->state); - /*drop currently indicates if the frame is droppable.*/ - if(_enc->state.info.target_bitrate>0){ - drop=oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, - _enc->state.frame_type,_enc->state.qis[0],0,drop); - } - else drop=0; - /*drop now indicates if the frame was dropped.*/ - if(drop)oc_enc_drop_frame(_enc); - else _enc->prevframe_dropped=0; - _enc->packet_state=OC_PACKET_READY; - _enc->prev_dup_count=_enc->nqueued_dups=_enc->dup_count; - _enc->dup_count=0; -#if defined(OC_DUMP_IMAGES) - oc_enc_set_granpos(_enc); - oc_state_dump_frame(&_enc->state,OC_FRAME_IO,"src"); - oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec"); -#endif - return 0; -} - -int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){ - if(_enc==NULL||_op==NULL)return TH_EFAULT; - if(_enc->packet_state==OC_PACKET_READY){ - _enc->packet_state=OC_PACKET_EMPTY; - if(_enc->rc.twopass!=1){ - unsigned char *packet; - packet=oggpackB_get_buffer(&_enc->opb); - /*If there's no packet, malloc failed while writing; it's lost forever.*/ - if(packet==NULL)return TH_EFAULT; - _op->packet=packet; - _op->bytes=oggpackB_bytes(&_enc->opb); - } - /*For the first pass in 2-pass mode, don't emit any packet data.*/ - else{ - _op->packet=NULL; - _op->bytes=0; - } - } - else if(_enc->packet_state==OC_PACKET_EMPTY){ - if(_enc->nqueued_dups>0){ - _enc->nqueued_dups--; - _op->packet=NULL; - _op->bytes=0; - } - else{ - if(_last_p)_enc->packet_state=OC_PACKET_DONE; - return 0; - } - } - else return 0; - _last_p=_last_p&&_enc->nqueued_dups<=0; - _op->b_o_s=0; - _op->e_o_s=_last_p; - oc_enc_set_granpos(_enc); - _op->packetno=th_granule_frame(_enc,_enc->state.granpos)+3; - _op->granulepos=_enc->state.granpos; - if(_last_p)_enc->packet_state=OC_PACKET_DONE; - return 1+_enc->nqueued_dups; -} diff --git a/drivers/theora/encoder_disabled.c b/drivers/theora/encoder_disabled.c deleted file mode 100644 index 0cbf6645ac..0000000000 --- a/drivers/theora/encoder_disabled.c +++ /dev/null @@ -1,67 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: encoder_disabled.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include "apiwrapper.h" -#include "encint.h" - -th_enc_ctx *th_encode_alloc(const th_info *_info){ - return NULL; -} - -void th_encode_free(th_enc_ctx *_enc){} - - -int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){ - return OC_DISABLED; -} - -int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){ - return OC_DISABLED; -} - -int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){ - return OC_DISABLED; -} - -int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){ - return OC_DISABLED; -} - - - -int theora_encode_init(theora_state *_te,theora_info *_ci){ - return OC_DISABLED; -} - -int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){ - return OC_DISABLED; -} - -int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){ - return OC_DISABLED; -} - -int theora_encode_header(theora_state *_te,ogg_packet *_op){ - return OC_DISABLED; -} - -int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){ - return OC_DISABLED; -} - -int theora_encode_tables(theora_state *_te,ogg_packet *_op){ - return OC_DISABLED; -} diff --git a/drivers/theora/enquant.c b/drivers/theora/enquant.c deleted file mode 100644 index 3372fed221..0000000000 --- a/drivers/theora/enquant.c +++ /dev/null @@ -1,274 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: enquant.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include -#include "encint.h" - - - -void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ - const th_quant_ranges *qranges; - const th_quant_base *base_mats[2*3*64]; - int indices[2][3][64]; - int nbase_mats; - int nbits; - int ci; - int qi; - int qri; - int qti; - int pli; - int qtj; - int plj; - int bmi; - int i; - i=_qinfo->loop_filter_limits[0]; - for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]); - nbits=OC_ILOG_32(i); - oggpackB_write(_opb,nbits,3); - for(qi=0;qi<64;qi++){ - oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits); - } - /*580 bits for VP3.*/ - i=1; - for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i); - nbits=OC_ILOGNZ_32(i); - oggpackB_write(_opb,nbits-1,4); - for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits); - /*516 bits for VP3.*/ - i=1; - for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i); - nbits=OC_ILOGNZ_32(i); - oggpackB_write(_opb,nbits-1,4); - for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits); - /*Consolidate any duplicate base matrices.*/ - nbase_mats=0; - for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - qranges=_qinfo->qi_ranges[qti]+pli; - for(qri=0;qri<=qranges->nranges;qri++){ - for(bmi=0;;bmi++){ - if(bmi>=nbase_mats){ - base_mats[bmi]=qranges->base_matrices+qri; - indices[qti][pli][qri]=nbase_mats++; - break; - } - else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri], - sizeof(base_mats[bmi][0]))==0){ - indices[qti][pli][qri]=bmi; - break; - } - } - } - } - /*Write out the list of unique base matrices. - 1545 bits for VP3 matrices.*/ - oggpackB_write(_opb,nbase_mats-1,9); - for(bmi=0;bmiqi_ranges[qti]+pli; - if(i>0){ - if(qti>0){ - if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&& - memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes, - qranges->nranges*sizeof(qranges->sizes[0]))==0&& - memcmp(indices[qti][pli],indices[qti-1][pli], - (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ - oggpackB_write(_opb,1,2); - continue; - } - } - qtj=(i-1)/3; - plj=(i-1)%3; - if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&& - memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes, - qranges->nranges*sizeof(qranges->sizes[0]))==0&& - memcmp(indices[qti][pli],indices[qtj][plj], - (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ - oggpackB_write(_opb,0,1+(qti>0)); - continue; - } - oggpackB_write(_opb,1,1); - } - oggpackB_write(_opb,indices[qti][pli][0],nbits); - for(qi=qri=0;qi<63;qri++){ - oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi)); - qi+=qranges->sizes[qri]; - oggpackB_write(_opb,indices[qti][pli][qri+1],nbits); - } - } -} - -static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ - ogg_uint32_t t; - int l; - _d<<=1; - l=OC_ILOGNZ_32(_d)-1; - t=1+((ogg_uint32_t)1<<16+l)/_d; - _this->m=(ogg_int16_t)(t-0x10000); - _this->l=l; -} - -/*See comments at oc_dequant_tables_init() for how the quantization tables' - storage should be initialized.*/ -void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], - oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){ - int qi; - int pli; - int qti; - /*Initialize the dequantization tables first.*/ - oc_dequant_tables_init(_dequant,NULL,_qinfo); - /*Derive the quantization tables directly from the dequantization tables.*/ - for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - int zzi; - int plj; - int qtj; - int dupe; - dupe=0; - for(qtj=0;qtj<=qti;qtj++){ - for(plj=0;plj<(qtj>1))/qd; - qp+=rq*(ogg_uint32_t)rq; - } - q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp; - } - /*qavg=1.0/sqrt(q2).*/ - _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1; - } -} diff --git a/drivers/theora/enquant.h b/drivers/theora/enquant.h deleted file mode 100644 index d62df10d1a..0000000000 --- a/drivers/theora/enquant.h +++ /dev/null @@ -1,27 +0,0 @@ -#if !defined(_enquant_H) -# define _enquant_H (1) -# include "quant.h" - -typedef struct oc_iquant oc_iquant; - -#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1)) - -/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0)) - (i.e., one 16x16->16 mul, 2 shifts, and 2 adds). - This is not an approximation; for 16-bit x and d, it is exact.*/ -struct oc_iquant{ - ogg_int16_t m; - ogg_int16_t l; -}; - -typedef oc_iquant oc_iquant_table[64]; - - - -void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo); -void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], - oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo); -void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64], - ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt); - -#endif diff --git a/drivers/theora/fdct.c b/drivers/theora/fdct.c deleted file mode 100644 index dc3a66f245..0000000000 --- a/drivers/theora/fdct.c +++ /dev/null @@ -1,422 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: fdct.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include "encint.h" -#include "dct.h" - - - -/*Performs a forward 8 point Type-II DCT transform. - The output is scaled by a factor of 2 from the orthonormal version of the - transform. - _y: The buffer to store the result in. - Data will be placed the first 8 entries (e.g., in a row of an 8x8 block). - _x: The input coefficients. - Every 8th entry is used (e.g., from a column of an 8x8 block).*/ -static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){ - int t0; - int t1; - int t2; - int t3; - int t4; - int t5; - int t6; - int t7; - int r; - int s; - int u; - int v; - /*Stage 1:*/ - /*0-7 butterfly.*/ - t0=_x[0<<3]+(int)_x[7<<3]; - t7=_x[0<<3]-(int)_x[7<<3]; - /*1-6 butterfly.*/ - t1=_x[1<<3]+(int)_x[6<<3]; - t6=_x[1<<3]-(int)_x[6<<3]; - /*2-5 butterfly.*/ - t2=_x[2<<3]+(int)_x[5<<3]; - t5=_x[2<<3]-(int)_x[5<<3]; - /*3-4 butterfly.*/ - t3=_x[3<<3]+(int)_x[4<<3]; - t4=_x[3<<3]-(int)_x[4<<3]; - /*Stage 2:*/ - /*0-3 butterfly.*/ - r=t0+t3; - t3=t0-t3; - t0=r; - /*1-2 butterfly.*/ - r=t1+t2; - t2=t1-t2; - t1=r; - /*6-5 butterfly.*/ - r=t6+t5; - t5=t6-t5; - t6=r; - /*Stages 3 and 4 are where all the approximation occurs. - These are chosen to be as close to an exact inverse of the approximations - made in the iDCT as possible, while still using mostly 16-bit arithmetic. - We use some 16x16->32 signed MACs, but those still commonly execute in 1 - cycle on a 16-bit DSP. - For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of - t5=(OC_C4S4*s>>16). - That is, applying the latter to the output of the former will recover t5 - exactly (over the valid input range of t5, -23171...23169). - We increase the rounding bias to 0xB500 in this particular case so that - errors inverting the subsequent butterfly are not one-sided (e.g., the - mean error is very close to zero). - The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0. - The fDCT of an all-zeros block will still not be zero, because of the - biases we added at the very beginning of the process, but it will be close - enough that it is guaranteed to round to zero.*/ - /*Stage 3:*/ - /*4-5 butterfly.*/ - s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1; - r=t4+s; - t5=t4-s; - t4=r; - /*7-6 butterfly.*/ - s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1; - r=t7+s; - t6=t7-s; - t7=r; - /*Stage 4:*/ - /*0-1 butterfly.*/ - r=(27146*t0+0x4000>>16)+t0+(t0!=0); - s=(27146*t1+0xB500>>16)+t1+(t1!=0); - u=r+s>>1; - v=r-u; - _y[0]=u; - _y[4]=v; - /*3-2 rotation by 6pi/16*/ - u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0); - s=(OC_C6S2*u>>16)-t2; - v=(s*21600+0x2800>>18)+s+(s!=0); - _y[2]=u; - _y[6]=v; - /*6-5 rotation by 3pi/16*/ - u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0); - s=t6-(OC_C5S3*u>>16); - v=(s*26568+0x3400>>17)+s+(s!=0); - _y[5]=u; - _y[3]=v; - /*7-4 rotation by 7pi/16*/ - u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0); - s=(OC_C7S1*u>>16)-t4; - v=(s*20539+0x3000>>20)+s+(s!=0); - _y[1]=u; - _y[7]=v; -} - -void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64], - const ogg_int16_t _x[64]){ - (*_enc->opt_vtable.fdct8x8)(_y,_x); -} - -/*Performs a forward 8x8 Type-II DCT transform. - The output is scaled by a factor of 4 relative to the orthonormal version - of the transform. - _y: The buffer to store the result in. - This may be the same as _x. - _x: The input coefficients. */ -void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - const ogg_int16_t *in; - ogg_int16_t *end; - ogg_int16_t *out; - ogg_int16_t w[64]; - int i; - /*Add two extra bits of working precision to improve accuracy; any more and - we could overflow.*/ - for(i=0;i<64;i++)w[i]=_x[i]<<2; - /*These biases correct for some systematic error that remains in the full - fDCT->iDCT round trip.*/ - w[0]+=(w[0]!=0)+1; - w[1]++; - w[8]--; - /*Transform columns of w into rows of _y.*/ - for(in=w,out=_y,end=out+64;out>2; -} - - - -/*This does not seem to outperform simple LFE border padding before MC. - It yields higher PSNR, but much higher bitrate usage.*/ -#if 0 -typedef struct oc_extension_info oc_extension_info; - - - -/*Information needed to pad boundary blocks. - We multiply each row/column by an extension matrix that fills in the padding - values as a linear combination of the active values, so that an equivalent - number of coefficients are forced to zero. - This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as - little as 7 multiplies. - We compute the extension matrices for every possible shape in advance, as - there are only 35. - The coefficients for all matrices are stored in a single array to take - advantage of the overlap and repetitiveness of many of the shapes. - A similar technique is applied to the offsets into this array. - This reduces the required table storage by about 48%. - See tools/extgen.c for details. - We could conceivably do the same for all 256 possible shapes.*/ -struct oc_extension_info{ - /*The mask of the active pixels in the shape.*/ - short mask; - /*The number of active pixels in the shape.*/ - short na; - /*The extension matrix. - This is (8-na)xna*/ - const ogg_int16_t *const *ext; - /*The pixel indices: na active pixels followed by 8-na padding pixels.*/ - unsigned char pi[8]; - /*The coefficient indices: na unconstrained coefficients followed by 8-na - coefficients to be forced to zero.*/ - unsigned char ci[8]; -}; - - -/*The number of shapes we need.*/ -#define OC_NSHAPES (35) - -static const ogg_int16_t OC_EXT_COEFFS[229]={ - 0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF, - 0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000, - 0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000, - 0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345, - 0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59, - 0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59, - 0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225, - 0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C, - 0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882, - 0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768, - 0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185, - 0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59, - 0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC, - 0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C, - 0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7, - 0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C, - 0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF, - 0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C, - 0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D, - 0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04, - 0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546, - 0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587, - 0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4, - 0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC, - 0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB, - 0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08, - 0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12, - 0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A, - 0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC -}; - -static const ogg_int16_t *const OC_EXT_ROWS[96]={ - OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0, - OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 6, - OC_EXT_COEFFS+ 27,OC_EXT_COEFFS+ 38,OC_EXT_COEFFS+ 43,OC_EXT_COEFFS+ 32, - OC_EXT_COEFFS+ 49,OC_EXT_COEFFS+ 58,OC_EXT_COEFFS+ 67,OC_EXT_COEFFS+ 71, - OC_EXT_COEFFS+ 62,OC_EXT_COEFFS+ 53,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15, - OC_EXT_COEFFS+ 14,OC_EXT_COEFFS+ 13,OC_EXT_COEFFS+ 76,OC_EXT_COEFFS+ 81, - OC_EXT_COEFFS+ 86,OC_EXT_COEFFS+ 91,OC_EXT_COEFFS+ 96,OC_EXT_COEFFS+ 98, - OC_EXT_COEFFS+ 93,OC_EXT_COEFFS+ 88,OC_EXT_COEFFS+ 83,OC_EXT_COEFFS+ 78, - OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12, - OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15, - OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108, - OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+ 16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141, - OC_EXT_COEFFS+ 20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116, - OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167, - OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149, - OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177, - OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197, - OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215, - OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+ 24,OC_EXT_COEFFS+ 204, - OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199, - OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135, - OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 134, - OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223, - OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221 -}; - -static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={ - {0x7F,7,OC_EXT_ROWS+ 0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}}, - {0xFE,7,OC_EXT_ROWS+ 7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}}, - {0x3F,6,OC_EXT_ROWS+ 8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}}, - {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}}, - {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}}, - {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}}, - {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}}, - {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}}, - {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}}, - {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}}, - {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}}, - {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}}, - {0x01,1,OC_EXT_ROWS+ 0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}}, - {0x80,1,OC_EXT_ROWS+ 0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, - {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}}, - {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}}, - {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}}, - {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}}, - {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}}, - {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}}, - {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}}, - {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}}, - {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}}, - {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}}, - {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}}, - {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}}, - {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}}, - {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}}, - {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}}, - {0x40,1,OC_EXT_ROWS+ 0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, - {0x20,1,OC_EXT_ROWS+ 0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, - {0x10,1,OC_EXT_ROWS+ 0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}}, - {0x08,1,OC_EXT_ROWS+ 0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}}, - {0x04,1,OC_EXT_ROWS+ 0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}}, - {0x02,1,OC_EXT_ROWS+ 0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}} -}; - - - -/*Pads a single column of a partial block and then performs a forward Type-II - DCT on the result. - The input is scaled by a factor of 4 and biased appropriately for the current - fDCT implementation. - The output is scaled by an additional factor of 2 from the orthonormal - version of the transform. - _y: The buffer to store the result in. - Data will be placed the first 8 entries (e.g., in a row of an 8x8 block). - _x: The input coefficients. - Every 8th entry is used (e.g., from a column of an 8x8 block). - _e: The extension information for the shape.*/ -static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x, - const oc_extension_info *_e){ - const unsigned char *pi; - int na; - na=_e->na; - pi=_e->pi; - if(na==1){ - int ci; - /*While the branch below is still correct for shapes with na==1, we can - perform the entire transform with just 1 multiply in this case instead - of 23.*/ - _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]]))); - for(ci=1;ci<8;ci++)_y[ci]=0; - } - else{ - const ogg_int16_t *const *ext; - int zpi; - int api; - int nz; - /*First multiply by the extension matrix to compute the padding values.*/ - nz=8-na; - ext=_e->ext; - for(zpi=0;zpi>16)+1>>1; - } - oc_fdct8(_y,_x); - } -} - -/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the - border of the picture region. - This method ONLY works with rectangular regions. - _border: A description of which pixels are inside the border. - _y: The buffer to store the result in. - This may be the same as _x. - _x: The input pixel values. - Pixel values outside the border will be ignored.*/ -void oc_fdct8x8_border(const oc_border_info *_border, - ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - ogg_int16_t *in; - ogg_int16_t *out; - ogg_int16_t w[64]; - ogg_int64_t mask; - const oc_extension_info *cext; - const oc_extension_info *rext; - int cmask; - int rmask; - int ri; - int ci; - /*Identify the shapes of the non-zero rows and columns.*/ - rmask=cmask=0; - mask=_border->mask; - for(ri=0;ri<8;ri++){ - /*This aggregation is _only_ correct for rectangular masks.*/ - cmask|=((mask&0xFF)!=0)<>=8; - } - /*Find the associated extension info for these shapes.*/ - if(cmask==0xFF)cext=NULL; - else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){ - /*If we somehow can't find the shape, then just do an unpadded fDCT. - It won't be efficient, but it should still be correct.*/ - if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){ - oc_enc_fdct8x8_c(_y,_x); - return; - } - } - if(rmask==0xFF)rext=NULL; - else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){ - /*If we somehow can't find the shape, then just do an unpadded fDCT. - It won't be efficient, but it should still be correct.*/ - if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){ - oc_enc_fdct8x8_c(_y,_x); - return; - } - } - /*Add two extra bits of working precision to improve accuracy; any more and - we could overflow.*/ - for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2; - /*These biases correct for some systematic error that remains in the full - fDCT->iDCT round trip. - We can safely add them before padding, since if these pixel values are - overwritten, we didn't care what they were anyway (and the unbiased values - will usually yield smaller DCT coefficient magnitudes).*/ - w[0]+=(w[0]!=0)+1; - w[1]++; - w[8]--; - /*Transform the columns. - We can ignore zero columns without a problem.*/ - in=w; - out=_y; - if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci); - else for(ci=0;ci<8;ci++)if(rmask&(1<>2; -} -#endif diff --git a/drivers/theora/fragment.c b/drivers/theora/fragment.c deleted file mode 100644 index 15372e9d9f..0000000000 --- a/drivers/theora/fragment.c +++ /dev/null @@ -1,87 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include "internal.h" - -void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, - const unsigned char *_src,int _ystride){ - (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride); -} - -void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){ - int i; - for(i=8;i-->0;){ - memcpy(_dst,_src,8*sizeof(*_dst)); - _dst+=_ystride; - _src+=_ystride; - } -} - -void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst, - int _ystride,const ogg_int16_t _residue[64]){ - _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue); -} - -void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride, - const ogg_int16_t _residue[64]){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128); - _dst+=_ystride; - } -} - -void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ - _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue); -} - -void oc_frag_recon_inter_c(unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]); - _dst+=_ystride; - _src+=_ystride; - } -} - -void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride, - const ogg_int16_t _residue[64]){ - _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue); -} - -void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){ - int i; - for(i=0;i<8;i++){ - int j; - for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1)); - _dst+=_ystride; - _src1+=_ystride; - _src2+=_ystride; - } -} - -void oc_restore_fpu(const oc_theora_state *_state){ - _state->opt_vtable.restore_fpu(); -} - -void oc_restore_fpu_c(void){} diff --git a/drivers/theora/huffdec.c b/drivers/theora/huffdec.c deleted file mode 100644 index 8cf27f0341..0000000000 --- a/drivers/theora/huffdec.c +++ /dev/null @@ -1,489 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#include -#include -#include "huffdec.h" -#include "decint.h" - - -/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/ -#define _ogg_offsetof(_type,_field)\ - ((size_t)((char *)&((_type *)0)->_field-(char *)0)) - -/*The number of internal tokens associated with each of the spec tokens.*/ -static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={ - 1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8 -}; - -/*The map from external spec-defined tokens to internal tokens. - This is constructed so that any extra bits read with the original token value - can be masked off the least significant bits of its internal token index. - In addition, all of the tokens which require additional extra bits are placed - at the start of the list, and grouped by type. - OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so - giving it index 0 may simplify comparisons on some architectures. - These requirements require some substantial reordering.*/ -static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={ - /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ - 15, - /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ - 16, - /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ - 17, - /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/ - 88, - /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/ - 80, - /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ - 1, - /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ - 0, - /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/ - 48, - /*OC_DCT_ZRL_TOKEN (6 extra bits)*/ - 14, - /*OC_ONE_TOKEN (0 extra bits)*/ - 56, - /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ - 57, - /*OC_TWO_TOKEN (0 extra bits)*/ - 58, - /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ - 59, - /*OC_DCT_VAL_CAT2 (1 extra bit)*/ - 60, - 62, - 64, - 66, - /*OC_DCT_VAL_CAT3 (2 extra bits)*/ - 68, - /*OC_DCT_VAL_CAT4 (3 extra bits)*/ - 72, - /*OC_DCT_VAL_CAT5 (4 extra bits)*/ - 2, - /*OC_DCT_VAL_CAT6 (5 extra bits)*/ - 4, - /*OC_DCT_VAL_CAT7 (6 extra bits)*/ - 6, - /*OC_DCT_VAL_CAT8 (10 extra bits)*/ - 8, - /*OC_DCT_RUN_CAT1A (1 extra bit)*/ - 18, - 20, - 22, - 24, - 26, - /*OC_DCT_RUN_CAT1B (3 extra bits)*/ - 32, - /*OC_DCT_RUN_CAT1C (4 extra bits)*/ - 12, - /*OC_DCT_RUN_CAT2A (2 extra bits)*/ - 28, - /*OC_DCT_RUN_CAT2B (3 extra bits)*/ - 40 -}; - -/*These three functions are really part of the bitpack.c module, but - they are only used here. - Declaring local static versions so they can be inlined saves considerable - function call overhead.*/ - -static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ - const unsigned char *ptr; - const unsigned char *stop; - oc_pb_window window; - int available; - window=_b->window; - available=_b->bits; - ptr=_b->ptr; - stop=_b->stop; - /*This version of _refill() doesn't bother setting eof because we won't - check for it after we've started decoding DCT tokens.*/ - if(ptr>=stop)available=OC_LOTS_OF_BITS; - while(available<=OC_PB_WINDOW_SIZE-8){ - available+=8; - window|=(oc_pb_window)*ptr++<=stop)available=OC_LOTS_OF_BITS; - } - _b->ptr=ptr; - if(_bits>available)window|=*ptr>>(available&7); - _b->bits=available; - return window; -} - - -/*Read in bits without advancing the bit pointer. - Here we assume 0<=_bits&&_bits<=32.*/ -static long oc_pack_look(oc_pack_buf *_b,int _bits){ - oc_pb_window window; - int available; - long result; - window=_b->window; - available=_b->bits; - if(_bits==0)return 0; - if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits); - result=window>>OC_PB_WINDOW_SIZE-_bits; - return result; -} - -/*Advance the bit pointer.*/ -static void oc_pack_adv(oc_pack_buf *_b,int _bits){ - /*We ignore the special cases for _bits==0 and _bits==32 here, since they are - never used actually used. - OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read - 32 bits in a single go, and would require a 32 GB lookup table (assuming - 8 byte pointers, since 4 byte pointers couldn't fit such a table).*/ - _b->window<<=_bits; - _b->bits-=_bits; -} - - -/*The log_2 of the size of a lookup table is allowed to grow to relative to - the number of unique nodes it contains. - E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is - wasted (each node will have an amortized cost of at most 20 bytes when using - 4-byte pointers). - Larger numbers can decode tokens with fewer read operations, while smaller - numbers may save more space (requiring as little as 8 bytes amortized per - node, though there will be more nodes). - With a sample file: - 32233473 read calls are required when no tree collapsing is done (100.0%). - 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%). - 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%). - 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%). - 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%). - Since a value of 1 gets us the vast majority of the speed-up with only a - small amount of wasted memory, this is what we use.*/ -#define OC_HUFF_SLUSH (1) - - -/*Determines the size in bytes of a Huffman tree node that represents a - subtree of depth _nbits. - _nbits: The depth of the subtree. - If this is 0, the node is a leaf node. - Otherwise 1<<_nbits pointers are allocated for children. - Return: The number of bytes required to store the node.*/ -static size_t oc_huff_node_size(int _nbits){ - size_t size; - size=_ogg_offsetof(oc_huff_node,nodes); - if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits); - return size; -} - -static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){ - oc_huff_node *ret; - ret=(oc_huff_node *)*_storage; - ret->nbits=(unsigned char)_nbits; - (*_storage)+=_size; - return ret; -} - - -/*Determines the size in bytes of a Huffman tree. - _nbits: The depth of the subtree. - If this is 0, the node is a leaf node. - Otherwise storage for 1<<_nbits pointers are added for children. - Return: The number of bytes required to store the tree.*/ -static size_t oc_huff_tree_size(const oc_huff_node *_node){ - size_t size; - size=oc_huff_node_size(_node->nbits); - if(_node->nbits){ - int nchildren; - int i; - nchildren=1<<_node->nbits; - for(i=0;inbits-_node->nodes[i]->depth){ - size+=oc_huff_tree_size(_node->nodes[i]); - } - } - return size; -} - - -/*Unpacks a sub-tree from the given buffer. - _opb: The buffer to unpack from. - _binodes: The nodes to store the sub-tree in. - _nbinodes: The number of nodes available for the sub-tree. - Return: 0 on success, or a negative value on error.*/ -static int oc_huff_tree_unpack(oc_pack_buf *_opb, - oc_huff_node *_binodes,int _nbinodes){ - oc_huff_node *binode; - long bits; - int nused; - if(_nbinodes<1)return TH_EBADHEADER; - binode=_binodes; - nused=0; - bits=oc_pack_read1(_opb); - if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; - /*Read an internal node:*/ - if(!bits){ - int ret; - nused++; - binode->nbits=1; - binode->depth=1; - binode->nodes[0]=_binodes+nused; - ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); - if(ret>=0){ - nused+=ret; - binode->nodes[1]=_binodes+nused; - ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); - } - if(ret<0)return ret; - nused+=ret; - } - /*Read a leaf node:*/ - else{ - int ntokens; - int token; - int i; - bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS); - if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; - /*Find out how many internal tokens we translate this external token into.*/ - ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits]; - if(_nbinodes<2*ntokens-1)return TH_EBADHEADER; - /*Fill in a complete binary tree pointing to the internal tokens.*/ - for(i=1;inbits=0; - binode->depth=1; - binode->token=token+i; - } - } - return nused; -} - -/*Finds the depth of shortest branch of the given sub-tree. - The tree must be binary. - _binode: The root of the given sub-tree. - _binode->nbits must be 0 or 1. - Return: The smallest depth of a leaf node in this sub-tree. - 0 indicates this sub-tree is a leaf node.*/ -static int oc_huff_tree_mindepth(oc_huff_node *_binode){ - int depth0; - int depth1; - if(_binode->nbits==0)return 0; - depth0=oc_huff_tree_mindepth(_binode->nodes[0]); - depth1=oc_huff_tree_mindepth(_binode->nodes[1]); - return OC_MINI(depth0,depth1)+1; -} - -/*Finds the number of internal nodes at a given depth, plus the number of - leaves at that depth or shallower. - The tree must be binary. - _binode: The root of the given sub-tree. - _binode->nbits must be 0 or 1. - Return: The number of entries that would be contained in a jump table of the - given depth.*/ -static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){ - if(_binode->nbits==0||_depth<=0)return 1; - else{ - return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+ - oc_huff_tree_occupancy(_binode->nodes[1],_depth-1); - } -} - -/*Makes a copy of the given Huffman tree. - _node: The Huffman tree to copy. - Return: The copy of the Huffman tree.*/ -static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node, - char **_storage){ - oc_huff_node *ret; - ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits); - ret->depth=_node->depth; - if(_node->nbits){ - int nchildren; - int i; - int inext; - nchildren=1<<_node->nbits; - for(i=0;inodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage); - inext=i+(1<<_node->nbits-ret->nodes[i]->depth); - while(++inodes[i]=ret->nodes[i-1]; - } - } - else ret->token=_node->token; - return ret; -} - -static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){ - size_t size; - int mindepth; - int depth; - int loccupancy; - int occupancy; - if(_binode->nbits!=0&&_depth>0){ - return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+ - oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1); - } - depth=mindepth=oc_huff_tree_mindepth(_binode); - occupancy=1<loccupancy&&occupancy>=1<0){ - size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1); - size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1); - } - return size; -} - -static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, - char **_storage); - -/*Fills the given nodes table with all the children in the sub-tree at the - given depth. - The nodes in the sub-tree with a depth less than that stored in the table - are freed. - The sub-tree must be binary and complete up until the given depth. - _nodes: The nodes table to fill. - _binode: The root of the sub-tree to fill it with. - _binode->nbits must be 0 or 1. - _level: The current level in the table. - 0 indicates that the current node should be stored, regardless of - whether it is a leaf node or an internal node. - _depth: The depth of the nodes to fill the table with, relative to their - parent.*/ -static void oc_huff_node_fill(oc_huff_node **_nodes, - oc_huff_node *_binode,int _level,int _depth,char **_storage){ - if(_level<=0||_binode->nbits==0){ - int i; - _binode->depth=(unsigned char)(_depth-_level); - _nodes[0]=oc_huff_tree_collapse(_binode,_storage); - for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0]; - } - else{ - _level--; - oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage); - _nodes+=1<<_level; - oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage); - } -} - -/*Finds the largest complete sub-tree rooted at the current node and collapses - it into a single node. - This procedure is then applied recursively to all the children of that node. - _binode: The root of the sub-tree to collapse. - _binode->nbits must be 0 or 1. - Return: The new root of the collapsed sub-tree.*/ -static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, - char **_storage){ - oc_huff_node *root; - size_t size; - int mindepth; - int depth; - int loccupancy; - int occupancy; - depth=mindepth=oc_huff_tree_mindepth(_binode); - occupancy=1<loccupancy&&occupancy>=1<depth=_binode->depth; - oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage); - return root; -} - -/*Unpacks a set of Huffman trees, and reduces them to a collapsed - representation. - _opb: The buffer to unpack the trees from. - _nodes: The table to fill with the Huffman trees. - Return: 0 on success, or a negative value on error.*/ -int oc_huff_trees_unpack(oc_pack_buf *_opb, - oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ - int i; - for(i=0;i0)_ogg_free(_dst[i]); - return TH_EFAULT; - } - _dst[i]=oc_huff_tree_copy(_src[i],&storage); - } - return 0; -} - -/*Frees the memory used by a set of Huffman trees. - _nodes: The array of trees to free.*/ -void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ - int i; - for(i=0;inbits!=0){ - bits=oc_pack_look(_opb,_node->nbits); - _node=_node->nodes[bits]; - oc_pack_adv(_opb,_node->depth); - } - return _node->token; -} diff --git a/drivers/theora/huffdec.h b/drivers/theora/huffdec.h deleted file mode 100644 index d7ffa0e99b..0000000000 --- a/drivers/theora/huffdec.h +++ /dev/null @@ -1,92 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#if !defined(_huffdec_H) -# define _huffdec_H (1) -# include "huffman.h" -# include "bitpack.h" - - - -typedef struct oc_huff_node oc_huff_node; - -/*A node in the Huffman tree. - Instead of storing every branching in the tree, subtrees can be collapsed - into one node, with a table of size 1< -#include -#include -#include "huffenc.h" - - - -/*The default Huffman codes used for VP3.1.*/ -const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={ - { - {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8}, - {0x02CE,10},{0x059E,11},{0x027D,11},{0x0008, 5}, - {0x04F9,12},{0x000F, 4},{0x000E, 4},{0x001B, 5}, - {0x0006, 4},{0x0008, 4},{0x0005, 4},{0x001A, 5}, - {0x0015, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3}, - {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x0029, 6}, - {0x0028, 6},{0x00B2, 8},{0x04F8,12},{0x059F,11}, - {0x009E, 9},{0x013F,10},{0x0012, 6},{0x0058, 7} - }, - { - {0x0010, 5},{0x0047, 7},{0x01FF, 9},{0x008C, 8}, - {0x03FC,10},{0x046A,11},{0x0469,11},{0x0022, 6}, - {0x11A1,13},{0x000E, 4},{0x000D, 4},{0x0004, 4}, - {0x0005, 4},{0x0009, 4},{0x0006, 4},{0x001E, 5}, - {0x0016, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3}, - {0x0000, 3},{0x000A, 4},{0x0017, 5},{0x007D, 7}, - {0x007E, 7},{0x011B, 9},{0x08D1,12},{0x03FD,10}, - {0x046B,11},{0x11A0,13},{0x007C, 7},{0x00FE, 8} - }, - { - {0x0016, 5},{0x0020, 6},{0x0086, 8},{0x0087, 8}, - {0x0367,10},{0x06CC,11},{0x06CB,11},{0x006E, 7}, - {0x366D,14},{0x000F, 4},{0x000E, 4},{0x0004, 4}, - {0x0005, 4},{0x000A, 4},{0x0006, 4},{0x001A, 5}, - {0x0011, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3}, - {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x006F, 7}, - {0x006D, 7},{0x0364,10},{0x0D9A,12},{0x06CA,11}, - {0x1B37,13},{0x366C,14},{0x0042, 7},{0x00D8, 8} - }, - { - {0x0000, 4},{0x002D, 6},{0x00F7, 8},{0x0058, 7}, - {0x0167, 9},{0x02CB,10},{0x02CA,10},{0x000E, 6}, - {0x1661,13},{0x0003, 3},{0x0002, 3},{0x0008, 4}, - {0x0009, 4},{0x000D, 4},{0x0002, 4},{0x001F, 5}, - {0x0017, 5},{0x0001, 4},{0x000C, 4},{0x000E, 4}, - {0x000A, 4},{0x0006, 5},{0x0078, 7},{0x000F, 6}, - {0x007A, 7},{0x0164, 9},{0x0599,11},{0x02CD,10}, - {0x0B31,12},{0x1660,13},{0x0079, 7},{0x00F6, 8} - }, - { - {0x0003, 4},{0x003C, 6},{0x000F, 7},{0x007A, 7}, - {0x001D, 8},{0x0020, 9},{0x0072,10},{0x0006, 6}, - {0x0399,13},{0x0004, 3},{0x0005, 3},{0x0005, 4}, - {0x0006, 4},{0x000E, 4},{0x0004, 4},{0x0000, 4}, - {0x0019, 5},{0x0002, 4},{0x000D, 4},{0x0007, 4}, - {0x001F, 5},{0x0030, 6},{0x0011, 8},{0x0031, 6}, - {0x0005, 6},{0x0021, 9},{0x00E7,11},{0x0038, 9}, - {0x01CD,12},{0x0398,13},{0x007B, 7},{0x0009, 7} - }, - { - {0x0009, 4},{0x0002, 5},{0x0074, 7},{0x0007, 6}, - {0x00EC, 8},{0x00D1, 9},{0x01A6,10},{0x0006, 6}, - {0x0D21,13},{0x0005, 3},{0x0006, 3},{0x0008, 4}, - {0x0007, 4},{0x000F, 4},{0x0004, 4},{0x0000, 4}, - {0x001C, 5},{0x0002, 4},{0x0005, 4},{0x0003, 4}, - {0x000C, 5},{0x0035, 7},{0x01A7,10},{0x001B, 6}, - {0x0077, 7},{0x01A5,10},{0x0349,11},{0x00D0, 9}, - {0x0691,12},{0x0D20,13},{0x0075, 7},{0x00ED, 8} - }, - { - {0x000A, 4},{0x000C, 5},{0x0012, 6},{0x001B, 6}, - {0x00B7, 8},{0x016C, 9},{0x0099, 9},{0x005A, 7}, - {0x16D8,13},{0x0007, 3},{0x0006, 3},{0x0009, 4}, - {0x0008, 4},{0x0000, 3},{0x0005, 4},{0x0017, 5}, - {0x000E, 5},{0x0002, 4},{0x0003, 4},{0x000F, 5}, - {0x001A, 6},{0x004D, 8},{0x2DB3,14},{0x002C, 6}, - {0x0011, 6},{0x02DA,10},{0x05B7,11},{0x0098, 9}, - {0x0B6D,12},{0x2DB2,14},{0x0010, 6},{0x0027, 7} - }, - { - {0x000D, 4},{0x000F, 5},{0x001D, 6},{0x0008, 5}, - {0x0051, 7},{0x0056, 8},{0x00AF, 9},{0x002A, 7}, - {0x148A,13},{0x0007, 3},{0x0000, 2},{0x0008, 4}, - {0x0009, 4},{0x000C, 4},{0x0006, 4},{0x0017, 5}, - {0x000B, 5},{0x0016, 5},{0x0015, 5},{0x0009, 5}, - {0x0050, 7},{0x00AE, 9},{0x2917,14},{0x001C, 6}, - {0x0014, 6},{0x0290,10},{0x0523,11},{0x0149, 9}, - {0x0A44,12},{0x2916,14},{0x0053, 7},{0x00A5, 8} - }, - { - {0x0001, 4},{0x001D, 6},{0x00F5, 8},{0x00F4, 8}, - {0x024D,10},{0x0499,11},{0x0498,11},{0x0001, 5}, - {0x0021, 6},{0x0006, 3},{0x0005, 3},{0x0006, 4}, - {0x0005, 4},{0x0002, 4},{0x0007, 5},{0x0025, 6}, - {0x007B, 7},{0x001C, 6},{0x0020, 6},{0x000D, 6}, - {0x0048, 7},{0x0092, 8},{0x0127, 9},{0x000E, 4}, - {0x0004, 4},{0x0011, 5},{0x000C, 6},{0x003C, 6}, - {0x000F, 5},{0x0000, 5},{0x001F, 5},{0x0013, 5} - }, - { - {0x0005, 4},{0x003C, 6},{0x0040, 7},{0x000D, 7}, - {0x0031, 9},{0x0061,10},{0x0060,10},{0x0002, 5}, - {0x00F5, 8},{0x0006, 3},{0x0005, 3},{0x0007, 4}, - {0x0006, 4},{0x0002, 4},{0x0009, 5},{0x0025, 6}, - {0x0007, 6},{0x0021, 6},{0x0024, 6},{0x0010, 6}, - {0x0041, 7},{0x00F4, 8},{0x0019, 8},{0x000E, 4}, - {0x0003, 4},{0x0011, 5},{0x0011, 6},{0x003F, 6}, - {0x003E, 6},{0x007B, 7},{0x0000, 4},{0x0013, 5} - }, - { - {0x000A, 4},{0x0007, 5},{0x0001, 6},{0x0009, 6}, - {0x0131, 9},{0x0261,10},{0x0260,10},{0x0015, 6}, - {0x0001, 7},{0x0007, 3},{0x0006, 3},{0x0008, 4}, - {0x0007, 4},{0x0006, 4},{0x0012, 5},{0x002F, 6}, - {0x0014, 6},{0x0027, 6},{0x002D, 6},{0x0016, 6}, - {0x004D, 7},{0x0099, 8},{0x0000, 7},{0x0004, 4}, - {0x0001, 4},{0x0005, 5},{0x0017, 6},{0x002E, 6}, - {0x002C, 6},{0x0008, 6},{0x0006, 5},{0x0001, 5} - }, - { - {0x0000, 3},{0x000E, 5},{0x0017, 6},{0x002A, 6}, - {0x0010, 7},{0x00F9,10},{0x00F8,10},{0x001E, 7}, - {0x003F, 8},{0x0007, 3},{0x0006, 3},{0x0009, 4}, - {0x0008, 4},{0x0006, 4},{0x000F, 5},{0x0005, 5}, - {0x0016, 6},{0x0029, 6},{0x002B, 6},{0x0015, 6}, - {0x0050, 7},{0x0011, 7},{0x007D, 9},{0x0004, 4}, - {0x0017, 5},{0x0006, 5},{0x0014, 6},{0x002C, 6}, - {0x002D, 6},{0x000E, 6},{0x0009, 6},{0x0051, 7} - }, - { - {0x0002, 3},{0x0018, 5},{0x002F, 6},{0x000D, 5}, - {0x0053, 7},{0x0295,10},{0x0294,10},{0x00A4, 8}, - {0x007C, 8},{0x0000, 2},{0x0007, 3},{0x0009, 4}, - {0x0008, 4},{0x001B, 5},{0x000C, 5},{0x0028, 6}, - {0x006A, 7},{0x001E, 6},{0x001D, 6},{0x0069, 7}, - {0x00D7, 8},{0x007D, 8},{0x014B, 9},{0x0019, 5}, - {0x0016, 5},{0x002E, 6},{0x001C, 6},{0x002B, 6}, - {0x002A, 6},{0x0068, 7},{0x003F, 7},{0x00D6, 8} - }, - { - {0x0002, 3},{0x001B, 5},{0x000C, 5},{0x0018, 5}, - {0x0029, 6},{0x007F, 8},{0x02F0,10},{0x0198, 9}, - {0x0179, 9},{0x0000, 2},{0x0007, 3},{0x0009, 4}, - {0x0008, 4},{0x001A, 5},{0x000D, 5},{0x002A, 6}, - {0x0064, 7},{0x001E, 6},{0x0067, 7},{0x005F, 7}, - {0x00CD, 8},{0x007E, 8},{0x02F1,10},{0x0016, 5}, - {0x000E, 5},{0x002E, 6},{0x0065, 7},{0x002B, 6}, - {0x0028, 6},{0x003E, 7},{0x00BD, 8},{0x0199, 9} - }, - { - {0x0002, 3},{0x0007, 4},{0x0016, 5},{0x0006, 4}, - {0x0036, 6},{0x005C, 7},{0x015D, 9},{0x015C, 9}, - {0x02BF,10},{0x0000, 2},{0x0007, 3},{0x0009, 4}, - {0x0008, 4},{0x0018, 5},{0x0034, 6},{0x002A, 6}, - {0x005E, 7},{0x006A, 7},{0x0064, 7},{0x005D, 7}, - {0x00CB, 8},{0x00AD, 8},{0x02BE,10},{0x0014, 5}, - {0x0033, 6},{0x006E, 7},{0x005F, 7},{0x006F, 7}, - {0x006B, 7},{0x00CA, 8},{0x00AC, 8},{0x015E, 9} - }, - { - {0x000F, 4},{0x001D, 5},{0x0018, 5},{0x000B, 4}, - {0x0019, 5},{0x0029, 6},{0x00D6, 8},{0x0551,11}, - {0x0AA1,12},{0x0001, 2},{0x0000, 2},{0x0009, 4}, - {0x0008, 4},{0x001B, 5},{0x0038, 6},{0x0028, 6}, - {0x0057, 7},{0x006A, 7},{0x0068, 7},{0x0056, 7}, - {0x00E5, 8},{0x0155, 9},{0x0AA0,12},{0x0073, 7}, - {0x0069, 7},{0x00D7, 8},{0x00AB, 8},{0x00E4, 8}, - {0x00A9, 8},{0x0151, 9},{0x0150, 9},{0x02A9,10} - }, - { - {0x0008, 5},{0x0025, 7},{0x017A, 9},{0x02F7,10}, - {0x0BDB,12},{0x17B4,13},{0x2F6B,14},{0x001D, 5}, - {0x2F6A,14},{0x0008, 4},{0x0007, 4},{0x0001, 4}, - {0x0002, 4},{0x000A, 4},{0x0006, 4},{0x0000, 4}, - {0x001C, 5},{0x0009, 4},{0x000D, 4},{0x000F, 4}, - {0x000C, 4},{0x0003, 4},{0x000A, 5},{0x0016, 5}, - {0x0013, 6},{0x005D, 7},{0x0024, 7},{0x00BC, 8}, - {0x005C, 7},{0x05EC,11},{0x000B, 5},{0x005F, 7} - }, - { - {0x000F, 5},{0x0010, 6},{0x004B, 8},{0x00C6, 8}, - {0x031D,10},{0x0C71,12},{0x0C70,12},{0x0001, 4}, - {0x0C73,12},{0x0008, 4},{0x0009, 4},{0x0002, 4}, - {0x0003, 4},{0x000B, 4},{0x0006, 4},{0x0000, 4}, - {0x001C, 5},{0x0005, 4},{0x000D, 4},{0x000F, 4}, - {0x000A, 4},{0x0019, 5},{0x0013, 6},{0x001D, 5}, - {0x0030, 6},{0x0062, 7},{0x0024, 7},{0x004A, 8}, - {0x018F, 9},{0x0C72,12},{0x000E, 5},{0x0011, 6} - }, - { - {0x001B, 5},{0x0003, 6},{0x008D, 8},{0x0040, 7}, - {0x0239,10},{0x0471,11},{0x08E0,12},{0x0003, 4}, - {0x11C3,13},{0x000A, 4},{0x0009, 4},{0x0004, 4}, - {0x0005, 4},{0x000E, 4},{0x0007, 4},{0x0001, 4}, - {0x001E, 5},{0x0006, 4},{0x000C, 4},{0x000B, 4}, - {0x0002, 4},{0x0000, 5},{0x0041, 7},{0x001F, 5}, - {0x0022, 6},{0x0002, 6},{0x008F, 8},{0x008C, 8}, - {0x011D, 9},{0x11C2,13},{0x001A, 5},{0x0021, 6} - }, - { - {0x001F, 5},{0x0003, 6},{0x0003, 7},{0x0043, 7}, - {0x000B, 9},{0x0015,10},{0x0051,12},{0x0003, 4}, - {0x0050,12},{0x000D, 4},{0x000C, 4},{0x0004, 4}, - {0x0006, 4},{0x000E, 4},{0x000A, 4},{0x0001, 4}, - {0x001E, 5},{0x0005, 4},{0x0009, 4},{0x0007, 4}, - {0x0011, 5},{0x0002, 6},{0x0004, 8},{0x0002, 4}, - {0x002D, 6},{0x0020, 6},{0x0042, 7},{0x0001, 7}, - {0x0000, 7},{0x0029,11},{0x0017, 5},{0x002C, 6} - }, - { - {0x0003, 4},{0x001F, 6},{0x003A, 7},{0x005D, 7}, - {0x0173, 9},{0x02E4,10},{0x172D,13},{0x0004, 4}, - {0x172C,13},{0x000F, 4},{0x000E, 4},{0x0009, 4}, - {0x0008, 4},{0x000C, 4},{0x000A, 4},{0x0001, 4}, - {0x0016, 5},{0x0002, 4},{0x0005, 4},{0x001A, 5}, - {0x002F, 6},{0x0038, 7},{0x05CA,11},{0x0006, 4}, - {0x0037, 6},{0x001E, 6},{0x003B, 7},{0x0039, 7}, - {0x00B8, 8},{0x0B97,12},{0x0000, 4},{0x0036, 6} - }, - { - {0x0006, 4},{0x0037, 6},{0x005D, 7},{0x000C, 6}, - {0x00B9, 8},{0x02E3,10},{0x05C4,11},{0x0004, 4}, - {0x1715,13},{0x0000, 3},{0x000F, 4},{0x0008, 4}, - {0x0007, 4},{0x000C, 4},{0x0009, 4},{0x001D, 5}, - {0x0016, 5},{0x001C, 5},{0x001A, 5},{0x000B, 5}, - {0x005E, 7},{0x0170, 9},{0x1714,13},{0x000A, 4}, - {0x000A, 5},{0x0036, 6},{0x005F, 7},{0x001B, 7}, - {0x001A, 7},{0x0B8B,12},{0x0002, 4},{0x0007, 5} - }, - { - {0x000C, 4},{0x000B, 5},{0x0079, 7},{0x0022, 6}, - {0x00F0, 8},{0x0119, 9},{0x0230,10},{0x001D, 5}, - {0x08C4,12},{0x0001, 3},{0x0000, 3},{0x000A, 4}, - {0x0009, 4},{0x000B, 4},{0x0007, 4},{0x001C, 5}, - {0x003D, 6},{0x000D, 5},{0x0008, 5},{0x0015, 6}, - {0x008D, 8},{0x118B,13},{0x118A,13},{0x000D, 4}, - {0x0010, 5},{0x0009, 5},{0x0014, 6},{0x0047, 7}, - {0x00F1, 8},{0x0463,11},{0x001F, 5},{0x000C, 5} - }, - { - {0x0000, 3},{0x001A, 5},{0x0033, 6},{0x000C, 5}, - {0x0046, 7},{0x01E3, 9},{0x03C5,10},{0x0017, 5}, - {0x1E21,13},{0x0002, 3},{0x0001, 3},{0x0009, 4}, - {0x000A, 4},{0x0007, 4},{0x001B, 5},{0x003D, 6}, - {0x001B, 6},{0x0022, 6},{0x0079, 7},{0x00F0, 8}, - {0x1E20,13},{0x1E23,13},{0x1E22,13},{0x000E, 4}, - {0x0016, 5},{0x0018, 5},{0x0032, 6},{0x001A, 6}, - {0x0047, 7},{0x0789,11},{0x001F, 5},{0x0010, 5} - }, - { - {0x001D, 5},{0x0061, 7},{0x004E, 8},{0x009E, 9}, - {0x027C,11},{0x09F5,13},{0x09F4,13},{0x0003, 4}, - {0x0060, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4}, - {0x000A, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5}, - {0x0031, 6},{0x0008, 5},{0x0038, 6},{0x0012, 6}, - {0x0026, 7},{0x013F,10},{0x04FB,12},{0x000D, 4}, - {0x0002, 4},{0x000C, 5},{0x0039, 6},{0x001C, 6}, - {0x000F, 5},{0x001D, 6},{0x0008, 4},{0x0019, 5} - }, - { - {0x0007, 4},{0x0019, 6},{0x00AB, 8},{0x00AA, 8}, - {0x0119,10},{0x0461,12},{0x0460,12},{0x001B, 5}, - {0x0047, 8},{0x0001, 3},{0x0000, 3},{0x000C, 4}, - {0x000B, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5}, - {0x0035, 6},{0x003D, 6},{0x003C, 6},{0x0018, 6}, - {0x0022, 7},{0x008D, 9},{0x0231,11},{0x000E, 4}, - {0x001F, 5},{0x0009, 5},{0x002B, 6},{0x0010, 6}, - {0x0034, 6},{0x0054, 7},{0x0008, 4},{0x0014, 5} - }, - { - {0x000C, 4},{0x0005, 5},{0x0008, 6},{0x005B, 7}, - {0x004D, 9},{0x0131,11},{0x0261,12},{0x001A, 5}, - {0x0012, 7},{0x0000, 3},{0x000F, 4},{0x000A, 4}, - {0x0009, 4},{0x0006, 4},{0x001B, 5},{0x0006, 5}, - {0x001C, 6},{0x002C, 6},{0x0015, 6},{0x005A, 7}, - {0x0027, 8},{0x0099,10},{0x0260,12},{0x000E, 4}, - {0x0004, 4},{0x000F, 5},{0x0007, 5},{0x001D, 6}, - {0x000B, 5},{0x0014, 6},{0x0008, 4},{0x0017, 5} - }, - { - {0x000F, 4},{0x0013, 5},{0x0075, 7},{0x0024, 6}, - {0x0095, 8},{0x0251,10},{0x04A0,11},{0x0010, 5}, - {0x00C8, 8},{0x0002, 3},{0x0001, 3},{0x0001, 4}, - {0x0000, 4},{0x001A, 5},{0x0011, 5},{0x002C, 6}, - {0x0065, 7},{0x0074, 7},{0x004B, 7},{0x00C9, 8}, - {0x0129, 9},{0x0943,12},{0x0942,12},{0x0003, 3}, - {0x000A, 4},{0x001C, 5},{0x0018, 5},{0x0033, 6}, - {0x0017, 5},{0x002D, 6},{0x001B, 5},{0x003B, 6} - }, - { - {0x0003, 3},{0x001A, 5},{0x002D, 6},{0x0038, 6}, - {0x0028, 7},{0x0395,10},{0x0E51,12},{0x0037, 6}, - {0x00E4, 8},{0x0001, 3},{0x0000, 3},{0x001F, 5}, - {0x001E, 5},{0x0017, 5},{0x003A, 6},{0x0073, 7}, - {0x002A, 7},{0x002B, 7},{0x0029, 7},{0x01CB, 9}, - {0x0729,11},{0x1CA1,13},{0x1CA0,13},{0x0004, 3}, - {0x000A, 4},{0x0004, 4},{0x0018, 5},{0x0036, 6}, - {0x000B, 5},{0x002C, 6},{0x0019, 5},{0x003B, 6} - }, - { - {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0017, 5}, - {0x0075, 7},{0x01F5, 9},{0x07D1,11},{0x0017, 6}, - {0x01F6, 9},{0x0001, 3},{0x0000, 3},{0x001B, 5}, - {0x001A, 5},{0x000A, 5},{0x0032, 6},{0x0074, 7}, - {0x00F8, 8},{0x00F9, 8},{0x01F7, 9},{0x03E9,10}, - {0x0FA0,12},{0x1F43,13},{0x1F42,13},{0x0003, 3}, - {0x000A, 4},{0x001E, 5},{0x001C, 5},{0x003B, 6}, - {0x0018, 5},{0x0016, 6},{0x0016, 5},{0x0033, 6} - }, - { - {0x0004, 3},{0x0007, 4},{0x0018, 5},{0x001E, 5}, - {0x0036, 6},{0x0031, 7},{0x0177, 9},{0x0077, 7}, - {0x0176, 9},{0x0001, 3},{0x0000, 3},{0x001A, 5}, - {0x0019, 5},{0x003A, 6},{0x0019, 6},{0x005C, 7}, - {0x00BA, 8},{0x0061, 8},{0x00C1, 9},{0x0180,10}, - {0x0302,11},{0x0607,12},{0x0606,12},{0x0002, 3}, - {0x000A, 4},{0x001F, 5},{0x001C, 5},{0x0037, 6}, - {0x0016, 5},{0x0076, 7},{0x000D, 5},{0x002F, 6} - }, - { - {0x0000, 3},{0x000A, 4},{0x001A, 5},{0x000C, 4}, - {0x001D, 5},{0x0039, 6},{0x0078, 7},{0x005E, 7}, - {0x0393,11},{0x0002, 3},{0x0001, 3},{0x0016, 5}, - {0x000F, 5},{0x002E, 6},{0x005F, 7},{0x0073, 8}, - {0x00E5, 9},{0x01C8,10},{0x0E4A,13},{0x1C97,14}, - {0x1C96,14},{0x0E49,13},{0x0E48,13},{0x0004, 3}, - {0x0006, 4},{0x001F, 5},{0x001B, 5},{0x001D, 6}, - {0x0038, 6},{0x0038, 7},{0x003D, 6},{0x0079, 7} - }, - { - {0x000B, 5},{0x002B, 7},{0x0054, 8},{0x01B7, 9}, - {0x06D9,11},{0x0DB1,12},{0x0DB0,12},{0x0002, 4}, - {0x00AB, 9},{0x0009, 4},{0x000A, 4},{0x0007, 4}, - {0x0008, 4},{0x000F, 4},{0x000C, 4},{0x0003, 4}, - {0x001D, 5},{0x0004, 4},{0x000B, 4},{0x0006, 4}, - {0x001A, 5},{0x0003, 6},{0x00AA, 9},{0x0001, 4}, - {0x0000, 5},{0x0014, 6},{0x006C, 7},{0x00DA, 8}, - {0x0002, 6},{0x036D,10},{0x001C, 5},{0x0037, 6} - }, - { - {0x001D, 5},{0x0004, 6},{0x00B6, 8},{0x006A, 8}, - {0x05B9,11},{0x16E1,13},{0x16E0,13},{0x0007, 4}, - {0x016F, 9},{0x000C, 4},{0x000D, 4},{0x0009, 4}, - {0x0008, 4},{0x000F, 4},{0x000A, 4},{0x0003, 4}, - {0x0017, 5},{0x0002, 4},{0x0004, 4},{0x001C, 5}, - {0x002C, 6},{0x006B, 8},{0x0B71,12},{0x0005, 4}, - {0x0003, 5},{0x001B, 6},{0x005A, 7},{0x0034, 7}, - {0x0005, 6},{0x02DD,10},{0x0000, 4},{0x000C, 5} - }, - { - {0x0003, 4},{0x007F, 7},{0x00A1, 8},{0x00A0, 8}, - {0x020C,10},{0x0834,12},{0x106B,13},{0x0007, 4}, - {0x0082, 8},{0x000E, 4},{0x000D, 4},{0x000B, 4}, - {0x000C, 4},{0x0000, 3},{0x0009, 4},{0x0002, 4}, - {0x0011, 5},{0x001E, 5},{0x0015, 5},{0x003E, 6}, - {0x0040, 7},{0x041B,11},{0x106A,13},{0x0006, 4}, - {0x000A, 5},{0x0029, 6},{0x007E, 7},{0x0051, 7}, - {0x0021, 6},{0x0107, 9},{0x0004, 4},{0x000B, 5} - }, - { - {0x0007, 4},{0x001B, 6},{0x00F6, 8},{0x00E9, 8}, - {0x03A1,10},{0x0740,11},{0x0E82,12},{0x001F, 5}, - {0x01EF, 9},{0x0001, 3},{0x0002, 3},{0x000B, 4}, - {0x000C, 4},{0x000D, 4},{0x0008, 4},{0x001C, 5}, - {0x0003, 5},{0x0012, 5},{0x0002, 5},{0x0075, 7}, - {0x01D1, 9},{0x1D07,13},{0x1D06,13},{0x000A, 4}, - {0x0013, 5},{0x003B, 6},{0x001A, 6},{0x007A, 7}, - {0x003C, 6},{0x01EE, 9},{0x0000, 4},{0x000C, 5} - }, - { - {0x000D, 4},{0x003D, 6},{0x0042, 7},{0x0037, 7}, - {0x00D9, 9},{0x0362,11},{0x06C6,12},{0x001F, 5}, - {0x0086, 8},{0x0001, 3},{0x0002, 3},{0x000C, 4}, - {0x000B, 4},{0x000A, 4},{0x0001, 4},{0x000F, 5}, - {0x0025, 6},{0x003C, 6},{0x001A, 6},{0x0087, 8}, - {0x01B0,10},{0x0D8F,13},{0x0D8E,13},{0x000E, 4}, - {0x0013, 5},{0x000C, 5},{0x0024, 6},{0x0020, 6}, - {0x0011, 5},{0x006D, 8},{0x0000, 4},{0x000E, 5} - }, - { - {0x0000, 3},{0x0012, 5},{0x0076, 7},{0x0077, 7}, - {0x014D, 9},{0x0533,11},{0x14C9,13},{0x0013, 5}, - {0x00A5, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4}, - {0x000C, 4},{0x0008, 4},{0x001A, 5},{0x002B, 6}, - {0x0075, 7},{0x0074, 7},{0x00A7, 8},{0x0298,10}, - {0x14C8,13},{0x14CB,13},{0x14CA,13},{0x000F, 4}, - {0x001C, 5},{0x0007, 5},{0x002A, 6},{0x0028, 6}, - {0x001B, 5},{0x00A4, 8},{0x0002, 4},{0x0006, 5} - }, - { - {0x0002, 3},{0x001A, 5},{0x002B, 6},{0x003A, 6}, - {0x00ED, 8},{0x0283,10},{0x0A0A,12},{0x0004, 5}, - {0x00A1, 8},{0x0004, 3},{0x0003, 3},{0x000B, 4}, - {0x000C, 4},{0x001F, 5},{0x0006, 5},{0x0077, 7}, - {0x00A3, 8},{0x00A2, 8},{0x0140, 9},{0x1417,13}, - {0x1416,13},{0x0A09,12},{0x0A08,12},{0x0000, 3}, - {0x001E, 5},{0x0007, 5},{0x002A, 6},{0x0029, 6}, - {0x001C, 5},{0x00EC, 8},{0x001B, 5},{0x0005, 5} - }, - { - {0x0002, 3},{0x0002, 4},{0x0018, 5},{0x001D, 5}, - {0x0035, 6},{0x00E4, 8},{0x01CF,11},{0x001D, 7}, - {0x0072, 9},{0x0004, 3},{0x0005, 3},{0x0006, 4}, - {0x0007, 4},{0x0006, 5},{0x0073, 7},{0x0038, 8}, - {0x01CE,11},{0x039B,12},{0x0398,12},{0x0733,13}, - {0x0732,13},{0x0735,13},{0x0734,13},{0x0000, 3}, - {0x001F, 5},{0x001B, 5},{0x0034, 6},{0x000F, 6}, - {0x001E, 5},{0x00E5, 8},{0x0019, 5},{0x0038, 6} - }, - { - {0x0016, 5},{0x0050, 7},{0x0172, 9},{0x02E7,10}, - {0x1732,13},{0x2E67,14},{0x2E66,14},{0x0006, 4}, - {0x0051, 7},{0x0001, 3},{0x0000, 3},{0x000D, 4}, - {0x000C, 4},{0x0009, 4},{0x001C, 5},{0x0009, 5}, - {0x001C, 6},{0x001D, 6},{0x005D, 7},{0x00B8, 8}, - {0x05CD,11},{0x1731,13},{0x1730,13},{0x000F, 4}, - {0x0005, 4},{0x000F, 5},{0x0008, 5},{0x0029, 6}, - {0x001D, 5},{0x002F, 6},{0x0008, 4},{0x0015, 5} - }, - { - {0x0009, 4},{0x0021, 6},{0x0040, 7},{0x00AD, 8}, - {0x02B0,10},{0x1589,13},{0x1588,13},{0x001C, 5}, - {0x005F, 7},{0x0000, 3},{0x000F, 4},{0x000D, 4}, - {0x000C, 4},{0x0006, 4},{0x0011, 5},{0x002A, 6}, - {0x0057, 7},{0x005E, 7},{0x0041, 7},{0x0159, 9}, - {0x0563,11},{0x158B,13},{0x158A,13},{0x0001, 3}, - {0x0005, 4},{0x0014, 5},{0x003B, 6},{0x002E, 6}, - {0x0004, 4},{0x003A, 6},{0x0007, 4},{0x0016, 5} - }, - { - {0x000E, 4},{0x0007, 5},{0x0046, 7},{0x0045, 7}, - {0x0064, 9},{0x032A,12},{0x0657,13},{0x0018, 5}, - {0x000D, 6},{0x0000, 3},{0x000F, 4},{0x000A, 4}, - {0x000B, 4},{0x001A, 5},{0x0036, 6},{0x0047, 7}, - {0x0044, 7},{0x0018, 7},{0x0033, 8},{0x00CB,10}, - {0x0656,13},{0x0329,12},{0x0328,12},{0x0002, 3}, - {0x0006, 4},{0x0019, 5},{0x000E, 5},{0x0037, 6}, - {0x0009, 4},{0x000F, 5},{0x0002, 4},{0x0010, 5} - }, - { - {0x0003, 3},{0x0018, 5},{0x0023, 6},{0x0077, 7}, - {0x0194, 9},{0x1956,13},{0x32AF,14},{0x003A, 6}, - {0x0076, 7},{0x0002, 3},{0x0001, 3},{0x001F, 5}, - {0x001E, 5},{0x0014, 5},{0x0022, 6},{0x0064, 7}, - {0x0197, 9},{0x0196, 9},{0x032B,10},{0x0654,11}, - {0x32AE,14},{0x1955,13},{0x1954,13},{0x0000, 3}, - {0x0009, 4},{0x001C, 5},{0x0015, 5},{0x0010, 5}, - {0x000D, 4},{0x0017, 5},{0x0016, 5},{0x0033, 6} - }, - { - {0x0005, 3},{0x0006, 4},{0x003E, 6},{0x0010, 5}, - {0x0048, 7},{0x093F,12},{0x24FA,14},{0x0032, 6}, - {0x0067, 7},{0x0002, 3},{0x0001, 3},{0x001B, 5}, - {0x001E, 5},{0x0034, 6},{0x0066, 7},{0x0092, 8}, - {0x0126, 9},{0x024E,10},{0x049E,11},{0x49F7,15}, - {0x49F6,15},{0x24F9,14},{0x24F8,14},{0x0000, 3}, - {0x0007, 4},{0x0018, 5},{0x0011, 5},{0x003F, 6}, - {0x000E, 4},{0x0013, 5},{0x0035, 6},{0x0025, 6} - }, - { - {0x0005, 3},{0x0008, 4},{0x0012, 5},{0x001C, 5}, - {0x001C, 6},{0x00EA, 9},{0x1D75,14},{0x001E, 6}, - {0x0066, 7},{0x0001, 3},{0x0002, 3},{0x001B, 5}, - {0x001A, 5},{0x001F, 6},{0x003B, 7},{0x0074, 8}, - {0x01D6,10},{0x03AF,11},{0x1D74,14},{0x1D77,14}, - {0x1D76,14},{0x0EB9,13},{0x0EB8,13},{0x000F, 4}, - {0x0006, 4},{0x0013, 5},{0x003B, 6},{0x003A, 6}, - {0x0000, 3},{0x0018, 5},{0x0032, 6},{0x0067, 7} - }, - { - {0x0004, 3},{0x000A, 4},{0x001B, 5},{0x000C, 4}, - {0x000D, 5},{0x00E6, 8},{0x0684,11},{0x0072, 7}, - {0x00E7, 8},{0x0002, 3},{0x0001, 3},{0x0017, 5}, - {0x0016, 5},{0x0018, 6},{0x00D1, 8},{0x01A0, 9}, - {0x0686,11},{0x0D0F,12},{0x0D0A,12},{0x1A17,13}, - {0x1A16,13},{0x1A1D,13},{0x1A1C,13},{0x000F, 4}, - {0x001D, 5},{0x000E, 5},{0x0035, 6},{0x0038, 6}, - {0x0000, 3},{0x000F, 5},{0x0019, 6},{0x0069, 7} - }, - { - {0x0003, 3},{0x000C, 4},{0x001B, 5},{0x0000, 3}, - {0x0003, 4},{0x002E, 6},{0x0051, 9},{0x00BC, 8}, - {0x0053, 9},{0x0004, 3},{0x0002, 3},{0x0016, 5}, - {0x0015, 5},{0x0015, 7},{0x0050, 9},{0x00A4,10}, - {0x0294,12},{0x052B,13},{0x052A,13},{0x052D,13}, - {0x052C,13},{0x052F,13},{0x052E,13},{0x000E, 4}, - {0x001A, 5},{0x0004, 5},{0x0028, 6},{0x0029, 6}, - {0x000F, 4},{0x000B, 6},{0x005F, 7},{0x00BD, 8} - }, - { - {0x0003, 4},{0x0009, 6},{0x00D0, 8},{0x01A3, 9}, - {0x0344,10},{0x0D14,12},{0x1A2B,13},{0x0004, 4}, - {0x0015, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4}, - {0x000C, 4},{0x000E, 4},{0x0009, 4},{0x001B, 5}, - {0x000A, 5},{0x0014, 5},{0x000D, 5},{0x002A, 6}, - {0x0014, 7},{0x068B,11},{0x1A2A,13},{0x0008, 4}, - {0x000B, 5},{0x002B, 6},{0x000B, 6},{0x0069, 7}, - {0x0035, 6},{0x0008, 6},{0x0007, 4},{0x000C, 5} - }, - { - {0x000A, 4},{0x003C, 6},{0x0032, 7},{0x0030, 7}, - {0x00C5, 9},{0x0621,12},{0x0620,12},{0x001F, 5}, - {0x0033, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4}, - {0x000D, 4},{0x000C, 4},{0x0004, 4},{0x000D, 5}, - {0x0026, 6},{0x0027, 6},{0x0014, 6},{0x0063, 8}, - {0x0189,10},{0x0623,12},{0x0622,12},{0x000B, 4}, - {0x0012, 5},{0x003D, 6},{0x0022, 6},{0x0015, 6}, - {0x000B, 5},{0x0023, 6},{0x0007, 4},{0x0010, 5} - }, - { - {0x000F, 4},{0x000C, 5},{0x0043, 7},{0x0010, 6}, - {0x0044, 8},{0x0114,10},{0x0455,12},{0x0018, 5}, - {0x0023, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4}, - {0x000D, 4},{0x0009, 4},{0x0019, 5},{0x0009, 5}, - {0x0017, 6},{0x0016, 6},{0x0042, 7},{0x008B, 9}, - {0x0454,12},{0x0457,12},{0x0456,12},{0x000B, 4}, - {0x0015, 5},{0x000A, 5},{0x0029, 6},{0x0020, 6}, - {0x000D, 5},{0x0028, 6},{0x0007, 4},{0x0011, 5} - }, - { - {0x0001, 3},{0x001A, 5},{0x0029, 6},{0x002A, 6}, - {0x00A0, 8},{0x0285,10},{0x1425,13},{0x0002, 5}, - {0x0000, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4}, - {0x000B, 4},{0x0008, 4},{0x0012, 5},{0x0001, 6}, - {0x0051, 7},{0x0001, 7},{0x0143, 9},{0x0508,11}, - {0x1424,13},{0x1427,13},{0x1426,13},{0x000F, 4}, - {0x001C, 5},{0x0003, 5},{0x0037, 6},{0x002B, 6}, - {0x0013, 5},{0x0036, 6},{0x001D, 5},{0x0001, 5} - }, - { - {0x0004, 3},{0x001F, 5},{0x003D, 6},{0x0006, 5}, - {0x0016, 7},{0x0053, 9},{0x014A,11},{0x0034, 6}, - {0x002A, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4}, - {0x000C, 4},{0x001C, 5},{0x0037, 6},{0x0017, 7}, - {0x002B, 8},{0x0028, 8},{0x00A4,10},{0x052D,13}, - {0x052C,13},{0x052F,13},{0x052E,13},{0x0000, 3}, - {0x001D, 5},{0x0007, 5},{0x0004, 5},{0x0035, 6}, - {0x0014, 5},{0x0036, 6},{0x0015, 5},{0x003C, 6} - }, - { - {0x0004, 3},{0x000A, 4},{0x0007, 5},{0x001D, 5}, - {0x0009, 6},{0x01F3, 9},{0x07C7,11},{0x0008, 6}, - {0x01F0, 9},{0x0003, 3},{0x0002, 3},{0x000D, 4}, - {0x000C, 4},{0x0017, 5},{0x007D, 7},{0x01F2, 9}, - {0x07C6,11},{0x07C5,11},{0x1F12,13},{0x3E27,14}, - {0x3E26,14},{0x1F11,13},{0x1F10,13},{0x0000, 3}, - {0x001E, 5},{0x0006, 5},{0x0039, 6},{0x0038, 6}, - {0x003F, 6},{0x002C, 6},{0x0005, 5},{0x002D, 6} - }, - { - {0x0002, 3},{0x0007, 4},{0x0018, 5},{0x0003, 4}, - {0x0005, 5},{0x0035, 7},{0x004F, 9},{0x0012, 7}, - {0x04E5,13},{0x0005, 3},{0x0004, 3},{0x000D, 4}, - {0x000E, 4},{0x0033, 6},{0x0026, 8},{0x009D,10}, - {0x04E4,13},{0x04E7,13},{0x04E6,13},{0x04E1,13}, - {0x04E0,13},{0x04E3,13},{0x04E2,13},{0x0000, 3}, - {0x001F, 5},{0x000C, 5},{0x003D, 6},{0x003C, 6}, - {0x0032, 6},{0x0034, 7},{0x001B, 6},{0x0008, 6} - }, - { - {0x0000, 3},{0x0004, 4},{0x001C, 5},{0x000F, 4}, - {0x0002, 4},{0x0007, 5},{0x0075, 7},{0x00E8, 8}, - {0x1D2A,13},{0x0005, 3},{0x0004, 3},{0x000D, 4}, - {0x000C, 4},{0x0077, 7},{0x0E96,12},{0x3A57,14}, - {0x3A56,14},{0x3A5D,14},{0x3A5C,14},{0x3A5F,14}, - {0x3A5E,14},{0x1D29,13},{0x1D28,13},{0x0003, 3}, - {0x0006, 5},{0x000A, 5},{0x002C, 7},{0x0017, 6}, - {0x0076, 7},{0x01D3, 9},{0x03A4,10},{0x002D, 7} - }, - { - {0x000A, 4},{0x0024, 6},{0x00BF, 8},{0x0085, 8}, - {0x0211,10},{0x0842,12},{0x1087,13},{0x0018, 5}, - {0x0020, 6},{0x0001, 3},{0x0002, 3},{0x000E, 4}, - {0x000D, 4},{0x0007, 4},{0x0013, 5},{0x0025, 6}, - {0x005E, 7},{0x0043, 7},{0x00BE, 8},{0x0109, 9}, - {0x1086,13},{0x0841,12},{0x0840,12},{0x000F, 4}, - {0x0001, 4},{0x0011, 5},{0x0000, 5},{0x002E, 6}, - {0x0019, 5},{0x0001, 5},{0x0006, 4},{0x0016, 5} - }, - { - {0x0002, 3},{0x000F, 5},{0x006F, 7},{0x0061, 7}, - {0x0374,10},{0x1BA8,13},{0x3753,14},{0x0012, 5}, - {0x0036, 6},{0x0000, 3},{0x0001, 3},{0x000A, 4}, - {0x000B, 4},{0x001A, 5},{0x0031, 6},{0x0060, 7}, - {0x00DC, 8},{0x01BB, 9},{0x06EB,11},{0x1BAB,13}, - {0x3752,14},{0x3755,14},{0x3754,14},{0x000E, 4}, - {0x0006, 4},{0x0013, 5},{0x000E, 5},{0x003E, 6}, - {0x0008, 4},{0x001E, 5},{0x0019, 5},{0x003F, 6} - }, - { - {0x0003, 3},{0x001C, 5},{0x0025, 6},{0x0024, 6}, - {0x01DA, 9},{0x1DBD,13},{0x3B7C,14},{0x003C, 6}, - {0x003D, 6},{0x0000, 3},{0x0001, 3},{0x000B, 4}, - {0x000A, 4},{0x000B, 5},{0x0077, 7},{0x00EC, 8}, - {0x03B6,10},{0x076E,11},{0x1DBF,13},{0x76FB,15}, - {0x76FA,15},{0x3B79,14},{0x3B78,14},{0x000D, 4}, - {0x001F, 5},{0x0013, 5},{0x000A, 5},{0x0008, 5}, - {0x000C, 4},{0x0008, 4},{0x0009, 5},{0x003A, 6} - }, - { - {0x0005, 3},{0x0003, 4},{0x0004, 5},{0x0010, 5}, - {0x008F, 8},{0x0475,11},{0x11D1,13},{0x0079, 7}, - {0x0027, 6},{0x0002, 3},{0x0003, 3},{0x0001, 4}, - {0x0000, 4},{0x0026, 6},{0x0046, 7},{0x011C, 9}, - {0x0477,11},{0x08ED,12},{0x11D0,13},{0x11D3,13}, - {0x11D2,13},{0x11D9,13},{0x11D8,13},{0x000D, 4}, - {0x001F, 5},{0x0012, 5},{0x0005, 5},{0x003D, 6}, - {0x000C, 4},{0x000E, 4},{0x0022, 6},{0x0078, 7} - }, - { - {0x0005, 3},{0x000C, 4},{0x001B, 5},{0x0000, 4}, - {0x0006, 6},{0x03E2,10},{0x3E3D,14},{0x000F, 7}, - {0x0034, 6},{0x0003, 3},{0x0002, 3},{0x001E, 5}, - {0x001D, 5},{0x007D, 7},{0x01F0, 9},{0x07C6,11}, - {0x3E3C,14},{0x3E3F,14},{0x3E3E,14},{0x3E39,14}, - {0x3E38,14},{0x3E3B,14},{0x3E3A,14},{0x0008, 4}, - {0x001C, 5},{0x0002, 5},{0x003F, 6},{0x0035, 6}, - {0x0009, 4},{0x0001, 3},{0x000E, 7},{0x00F9, 8} - }, - { - {0x0004, 3},{0x000B, 4},{0x0001, 4},{0x000A, 4}, - {0x001E, 6},{0x00E0, 9},{0x0E1E,13},{0x0071, 8}, - {0x0039, 7},{0x0007, 3},{0x0006, 3},{0x000D, 5}, - {0x000C, 5},{0x0020, 7},{0x01C2,10},{0x1C3F,14}, - {0x1C3E,14},{0x0E19,13},{0x0E18,13},{0x0E1B,13}, - {0x0E1A,13},{0x0E1D,13},{0x0E1C,13},{0x0000, 4}, - {0x0009, 5},{0x001D, 6},{0x001F, 6},{0x0011, 6}, - {0x0005, 4},{0x0001, 3},{0x0043, 8},{0x0042, 8} - }, - { - {0x0004, 3},{0x000D, 4},{0x0007, 4},{0x0002, 3}, - {0x0014, 5},{0x016C, 9},{0x16D1,13},{0x02DF,10}, - {0x016E, 9},{0x0000, 2},{0x0007, 3},{0x002C, 6}, - {0x002B, 6},{0x02DE,10},{0x16D0,13},{0x16D3,13}, - {0x16D2,13},{0x2DB5,14},{0x2DB4,14},{0x2DB7,14}, - {0x2DB6,14},{0x16D9,13},{0x16D8,13},{0x000C, 5}, - {0x002A, 6},{0x005A, 7},{0x001B, 6},{0x001A, 6}, - {0x0017, 5},{0x000C, 4},{0x05B7,11},{0x05B5,11} - }, - { - {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4}, - {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14}, - {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7}, - {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14}, - {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14}, - {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9}, - {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8}, - {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14} - }, - { - {0x0000, 3},{0x0010, 5},{0x0072, 7},{0x0071, 7}, - {0x0154, 9},{0x0AAB,12},{0x0AA8,12},{0x0014, 5}, - {0x0070, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4}, - {0x000B, 4},{0x0003, 4},{0x0011, 5},{0x0073, 7}, - {0x0054, 7},{0x00AB, 8},{0x02AB,10},{0x1553,13}, - {0x1552,13},{0x1555,13},{0x1554,13},{0x000D, 4}, - {0x001E, 5},{0x0012, 5},{0x003E, 6},{0x002B, 6}, - {0x0002, 4},{0x003F, 6},{0x001D, 5},{0x0013, 5} - }, - { - {0x0003, 3},{0x001F, 5},{0x0029, 6},{0x003D, 6}, - {0x000C, 7},{0x0069,10},{0x0345,13},{0x0002, 5}, - {0x0028, 6},{0x0002, 3},{0x0001, 3},{0x000E, 4}, - {0x000C, 4},{0x0015, 5},{0x0007, 6},{0x001B, 8}, - {0x006B,10},{0x006A,10},{0x0344,13},{0x0347,13}, - {0x0346,13},{0x01A1,12},{0x01A0,12},{0x000B, 4}, - {0x001A, 5},{0x0012, 5},{0x0000, 5},{0x003C, 6}, - {0x0008, 4},{0x001B, 5},{0x0013, 5},{0x0001, 5} - }, - { - {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0014, 5}, - {0x0056, 7},{0x015C, 9},{0x15D5,13},{0x003C, 6}, - {0x002A, 6},{0x0000, 3},{0x0001, 3},{0x000E, 4}, - {0x000D, 4},{0x000C, 5},{0x00AF, 8},{0x02BB,10}, - {0x15D4,13},{0x15D7,13},{0x15D6,13},{0x15D1,13}, - {0x15D0,13},{0x15D3,13},{0x15D2,13},{0x000B, 4}, - {0x0019, 5},{0x000D, 5},{0x003E, 6},{0x0031, 6}, - {0x0007, 4},{0x0005, 4},{0x003D, 6},{0x0030, 6} - }, - { - {0x0005, 3},{0x0008, 4},{0x001A, 5},{0x0000, 4}, - {0x0036, 6},{0x0011, 8},{0x0106,12},{0x000A, 7}, - {0x006E, 7},{0x0002, 3},{0x0003, 3},{0x0003, 4}, - {0x0002, 4},{0x006F, 7},{0x0021, 9},{0x020F,13}, - {0x020E,13},{0x0101,12},{0x0100,12},{0x0103,12}, - {0x0102,12},{0x0105,12},{0x0104,12},{0x000C, 4}, - {0x001E, 5},{0x0003, 5},{0x003E, 6},{0x003F, 6}, - {0x0009, 4},{0x000E, 4},{0x000B, 7},{0x0009, 7} - }, - { - {0x0002, 3},{0x000E, 4},{0x001E, 5},{0x000C, 4}, - {0x001F, 5},{0x006E, 7},{0x00AD,10},{0x00AF,10}, - {0x0014, 7},{0x0004, 3},{0x0003, 3},{0x001A, 5}, - {0x0017, 5},{0x002A, 8},{0x0576,13},{0x0AEF,14}, - {0x0AEE,14},{0x0571,13},{0x0570,13},{0x0573,13}, - {0x0572,13},{0x0575,13},{0x0574,13},{0x0003, 4}, - {0x0016, 5},{0x0004, 5},{0x0036, 6},{0x000B, 6}, - {0x000A, 4},{0x0000, 3},{0x006F, 7},{0x00AC,10} - }, - { - {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3}, - {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13}, - {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6}, - {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13}, - {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13}, - {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3}, - {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8}, - {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14} - }, - { - {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3}, - {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13}, - {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6}, - {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13}, - {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13}, - {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3}, - {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8}, - {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14} - }, - { - {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3}, - {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13}, - {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6}, - {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13}, - {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13}, - {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3}, - {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8}, - {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14} - }, - { - {0x0003, 3},{0x0011, 5},{0x0020, 6},{0x0074, 7}, - {0x010D, 9},{0x0863,12},{0x0860,12},{0x000A, 5}, - {0x0075, 7},{0x0001, 3},{0x0000, 3},{0x000B, 4}, - {0x000A, 4},{0x0018, 5},{0x0038, 6},{0x0042, 7}, - {0x010F, 9},{0x010E, 9},{0x0219,10},{0x10C3,13}, - {0x10C2,13},{0x10C5,13},{0x10C4,13},{0x000F, 4}, - {0x0004, 4},{0x0019, 5},{0x000B, 5},{0x0039, 6}, - {0x0009, 4},{0x001B, 5},{0x001A, 5},{0x003B, 6} - }, - { - {0x0005, 3},{0x0001, 4},{0x003E, 6},{0x0001, 5}, - {0x00E2, 8},{0x1C6F,13},{0x38D9,14},{0x0039, 6}, - {0x001F, 6},{0x0002, 3},{0x0001, 3},{0x0009, 4}, - {0x0008, 4},{0x0000, 5},{0x0070, 7},{0x01C7, 9}, - {0x038C,10},{0x071A,11},{0x38D8,14},{0x38DB,14}, - {0x38DA,14},{0x38DD,14},{0x38DC,14},{0x000D, 4}, - {0x001D, 5},{0x000E, 5},{0x003F, 6},{0x003C, 6}, - {0x000C, 4},{0x0006, 4},{0x003D, 6},{0x001E, 6} - }, - { - {0x0006, 3},{0x000B, 4},{0x0011, 5},{0x001E, 5}, - {0x0074, 7},{0x03AA,10},{0x1D5C,13},{0x0001, 6}, - {0x0021, 6},{0x0001, 3},{0x0002, 3},{0x0007, 4}, - {0x0006, 4},{0x003E, 6},{0x00EB, 8},{0x01D4, 9}, - {0x0EAF,12},{0x3ABB,14},{0x3ABA,14},{0x1D59,13}, - {0x1D58,13},{0x1D5B,13},{0x1D5A,13},{0x000A, 4}, - {0x001C, 5},{0x0001, 5},{0x003F, 6},{0x003B, 6}, - {0x0001, 4},{0x0009, 4},{0x0020, 6},{0x0000, 6} - }, - { - {0x0004, 3},{0x000A, 4},{0x0017, 5},{0x0004, 4}, - {0x0016, 6},{0x016A, 9},{0x16B1,13},{0x0017, 7}, - {0x005B, 7},{0x0006, 3},{0x0007, 3},{0x0001, 4}, - {0x0000, 4},{0x000A, 6},{0x02D7,10},{0x0B5A,12}, - {0x16B0,13},{0x16B3,13},{0x16B2,13},{0x2D6D,14}, - {0x2D6C,14},{0x2D6F,14},{0x2D6E,14},{0x0006, 4}, - {0x000A, 5},{0x0004, 5},{0x002C, 6},{0x0017, 6}, - {0x0003, 4},{0x0007, 4},{0x0016, 7},{0x00B4, 8} - }, - { - {0x0005, 3},{0x000D, 4},{0x0005, 4},{0x0009, 4}, - {0x0033, 6},{0x0193, 9},{0x192C,13},{0x0061, 8}, - {0x0031, 7},{0x0000, 2},{0x0007, 3},{0x0010, 5}, - {0x0011, 5},{0x00C8, 8},{0x192F,13},{0x325B,14}, - {0x325A,14},{0x1929,13},{0x1928,13},{0x192B,13}, - {0x192A,13},{0x325D,14},{0x325C,14},{0x0018, 5}, - {0x001A, 6},{0x001B, 6},{0x0065, 7},{0x0019, 6}, - {0x0004, 4},{0x0007, 4},{0x0060, 8},{0x0324,10} - }, - { - {0x0006, 3},{0x0000, 3},{0x0002, 4},{0x000F, 4}, - {0x0039, 6},{0x01D9, 9},{0x1D82,13},{0x0761,11}, - {0x03BE,10},{0x0001, 2},{0x0002, 2},{0x000F, 6}, - {0x000E, 6},{0x0762,11},{0x3B07,14},{0x3B06,14}, - {0x3B1D,14},{0x3B1C,14},{0x3B1F,14},{0x3B1E,14}, - {0x3B19,14},{0x3B18,14},{0x3B1B,14},{0x0038, 6}, - {0x01DE, 9},{0x00ED, 8},{0x03BF,10},{0x00EE, 8}, - {0x003A, 6},{0x0006, 5},{0x0EC0,12},{0x3B1A,14} - }, - { - {0x0000, 2},{0x0002, 3},{0x000F, 5},{0x0006, 4}, - {0x001C, 6},{0x01D0,10},{0x0E8C,13},{0x1D1B,14}, - {0x1D1A,14},{0x0003, 2},{0x0002, 2},{0x00EA, 9}, - {0x00E9, 9},{0x0E89,13},{0x0E88,13},{0x0E8B,13}, - {0x0E8A,13},{0x1D65,14},{0x1D64,14},{0x1D67,14}, - {0x1D66,14},{0x1D61,14},{0x1D60,14},{0x03AD,11}, - {0x1D63,14},{0x1D62,14},{0x1D1D,14},{0x1D1C,14}, - {0x003B, 7},{0x01D7,10},{0x1D1F,14},{0x1D1E,14} - }, - { - {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4}, - {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14}, - {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7}, - {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14}, - {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14}, - {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9}, - {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8}, - {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14} - } -}; - - - -/*A description of a Huffman code value used when encoding the tree.*/ -typedef struct{ - /*The bit pattern, left-shifted so that the MSB of all patterns is - aligned.*/ - ogg_uint32_t pattern; - /*The amount the bit pattern was shifted.*/ - int shift; - /*The token this bit pattern represents.*/ - int token; -}oc_huff_entry; - - - -/*Compares two oc_huff_entry structures by their bit patterns. - _c1: The first entry to compare. - _c2: The second entry to compare. - Return: <0 if _c1<_c2, >0 if _c1>_c2.*/ -static int huff_entry_cmp(const void *_c1,const void *_c2){ - ogg_uint32_t b1; - ogg_uint32_t b2; - b1=((const oc_huff_entry *)_c1)->pattern; - b2=((const oc_huff_entry *)_c2)->pattern; - return b1b2?1:0; -} - -/*Encodes a description of the given Huffman tables. - Although the codes are stored in the encoder as flat arrays, in the bit - stream and in the decoder they are structured as a tree. - This function recovers the tree structure from the flat array and then - writes it out. - Note that the codes MUST form a Huffman code, and not merely a prefix-free - code, since the binary tree is assumed to be full. - _opb: The buffer to store the tree in. - _codes: The Huffman tables to pack. - Return: 0 on success, or a negative value if one of the given Huffman tables - does not form a full, prefix-free code.*/ -int oc_huff_codes_pack(oggpack_buffer *_opb, - const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){ - int i; - for(i=0;i>1)<<(maxlen+1>>1))-1; - /*Copy over the codes into our temporary workspace. - The bit patterns are aligned, and the original entry each code is from - is stored as well.*/ - for(j=0;jentries[j].shift;bpos--)oggpackB_write(_opb,0,1); - /*Mark this as a leaf node, and write its value.*/ - oggpackB_write(_opb,1,1); - oggpackB_write(_opb,entries[j].token,5); - /*For each 1 branch we've descended, back up the tree until we reach a - 0 branch.*/ - bit=1< -#include "internal.h" -#include "dct.h" - -/*Performs an inverse 8 point Type-II DCT transform. - The output is scaled by a factor of 2 relative to the orthonormal version of - the transform. - _y: The buffer to store the result in. - Data will be placed in every 8th entry (e.g., in a column of an 8x8 - block). - _x: The input coefficients. - The first 8 entries are used (e.g., from a row of an 8x8 block).*/ -static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ - ogg_int32_t t[8]; - ogg_int32_t r; - /*Stage 1:*/ - /*0-1 butterfly.*/ - t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; - t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; - /*2-3 rotation by 6pi/16.*/ - t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); - t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); - /*4-7 rotation by 7pi/16.*/ - t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); - /*5-6 rotation by 3pi/16.*/ - t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); - t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); - t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); - /*Stage 2:*/ - /*4-5 butterfly.*/ - r=t[4]+t[5]; - t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; - t[4]=r; - /*7-6 butterfly.*/ - r=t[7]+t[6]; - t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; - t[7]=r; - /*Stage 3:*/ - /*0-3 butterfly.*/ - r=t[0]+t[3]; - t[3]=t[0]-t[3]; - t[0]=r; - /*1-2 butterfly.*/ - r=t[1]+t[2]; - t[2]=t[1]-t[2]; - t[1]=r; - /*6-5 butterfly.*/ - r=t[6]+t[5]; - t[5]=t[6]-t[5]; - t[6]=r; - /*Stage 4:*/ - /*0-7 butterfly.*/ - _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); - /*1-6 butterfly.*/ - _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); - /*2-5 butterfly.*/ - _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); - /*3-4 butterfly.*/ - _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); - _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); - _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); - _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); - _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); -} - -/*Performs an inverse 8 point Type-II DCT transform. - The output is scaled by a factor of 2 relative to the orthonormal version of - the transform. - _y: The buffer to store the result in. - Data will be placed in every 8th entry (e.g., in a column of an 8x8 - block). - _x: The input coefficients. - Only the first 4 entries are used. - The other 4 are assumed to be 0.*/ -static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ - ogg_int32_t t[8]; - ogg_int32_t r; - /*Stage 1:*/ - t[0]=OC_C4S4*_x[0]>>16; - t[2]=OC_C6S2*_x[2]>>16; - t[3]=OC_C2S6*_x[2]>>16; - t[4]=OC_C7S1*_x[1]>>16; - t[5]=-(OC_C5S3*_x[3]>>16); - t[6]=OC_C3S5*_x[3]>>16; - t[7]=OC_C1S7*_x[1]>>16; - /*Stage 2:*/ - r=t[4]+t[5]; - t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; - t[4]=r; - r=t[7]+t[6]; - t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; - t[7]=r; - /*Stage 3:*/ - t[1]=t[0]+t[2]; - t[2]=t[0]-t[2]; - r=t[0]+t[3]; - t[3]=t[0]-t[3]; - t[0]=r; - r=t[6]+t[5]; - t[5]=t[6]-t[5]; - t[6]=r; - /*Stage 4:*/ - _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); - _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); - _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); - _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); - _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); - _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); - _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); - _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); -} - -/*Performs an inverse 8 point Type-II DCT transform. - The output is scaled by a factor of 2 relative to the orthonormal version of - the transform. - _y: The buffer to store the result in. - Data will be placed in every 8th entry (e.g., in a column of an 8x8 - block). - _x: The input coefficients. - Only the first 3 entries are used. - The other 5 are assumed to be 0.*/ -static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ - ogg_int32_t t[8]; - ogg_int32_t r; - /*Stage 1:*/ - t[0]=OC_C4S4*_x[0]>>16; - t[2]=OC_C6S2*_x[2]>>16; - t[3]=OC_C2S6*_x[2]>>16; - t[4]=OC_C7S1*_x[1]>>16; - t[7]=OC_C1S7*_x[1]>>16; - /*Stage 2:*/ - t[5]=OC_C4S4*t[4]>>16; - t[6]=OC_C4S4*t[7]>>16; - /*Stage 3:*/ - t[1]=t[0]+t[2]; - t[2]=t[0]-t[2]; - r=t[0]+t[3]; - t[3]=t[0]-t[3]; - t[0]=r; - r=t[6]+t[5]; - t[5]=t[6]-t[5]; - t[6]=r; - /*Stage 4:*/ - _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); - _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); - _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); - _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); - _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); - _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); - _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); - _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); -} - -/*Performs an inverse 8 point Type-II DCT transform. - The output is scaled by a factor of 2 relative to the orthonormal version of - the transform. - _y: The buffer to store the result in. - Data will be placed in every 8th entry (e.g., in a column of an 8x8 - block). - _x: The input coefficients. - Only the first 2 entries are used. - The other 6 are assumed to be 0.*/ -static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ - ogg_int32_t t[8]; - ogg_int32_t r; - /*Stage 1:*/ - t[0]=OC_C4S4*_x[0]>>16; - t[4]=OC_C7S1*_x[1]>>16; - t[7]=OC_C1S7*_x[1]>>16; - /*Stage 2:*/ - t[5]=OC_C4S4*t[4]>>16; - t[6]=OC_C4S4*t[7]>>16; - /*Stage 3:*/ - r=t[6]+t[5]; - t[5]=t[6]-t[5]; - t[6]=r; - /*Stage 4:*/ - _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); - _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); - _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); - _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); - _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); - _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); - _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); - _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); -} - -/*Performs an inverse 8 point Type-II DCT transform. - The output is scaled by a factor of 2 relative to the orthonormal version of - the transform. - _y: The buffer to store the result in. - Data will be placed in every 8th entry (e.g., in a column of an 8x8 - block). - _x: The input coefficients. - Only the first entry is used. - The other 7 are assumed to be 0.*/ -static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ - _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= - _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); -} - -/*Performs an inverse 8x8 Type-II DCT transform. - The input is assumed to be scaled by a factor of 4 relative to orthonormal - version of the transform. - All coefficients but the first 3 in zig-zag scan order are assumed to be 0: - x x 0 0 0 0 0 0 - x 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - _y: The buffer to store the result in. - This may be the same as _x. - _x: The input coefficients.*/ -static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - const ogg_int16_t *in; - ogg_int16_t *end; - ogg_int16_t *out; - ogg_int16_t w[64]; - /*Transform rows of x into columns of w.*/ - idct8_2(w,_x); - idct8_1(w+1,_x+8); - /*Transform rows of w into columns of y.*/ - for(in=w,out=_y,end=out+8;out>4); -} - -/*Performs an inverse 8x8 Type-II DCT transform. - The input is assumed to be scaled by a factor of 4 relative to orthonormal - version of the transform. - All coefficients but the first 10 in zig-zag scan order are assumed to be 0: - x x x x 0 0 0 0 - x x x 0 0 0 0 0 - x x 0 0 0 0 0 0 - x 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - _y: The buffer to store the result in. - This may be the same as _x. - _x: The input coefficients.*/ -static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - const ogg_int16_t *in; - ogg_int16_t *end; - ogg_int16_t *out; - ogg_int16_t w[64]; - /*Transform rows of x into columns of w.*/ - idct8_4(w,_x); - idct8_3(w+1,_x+8); - idct8_2(w+2,_x+16); - idct8_1(w+3,_x+24); - /*Transform rows of w into columns of y.*/ - for(in=w,out=_y,end=out+8;out>4); -} - -/*Performs an inverse 8x8 Type-II DCT transform. - The input is assumed to be scaled by a factor of 4 relative to orthonormal - version of the transform. - _y: The buffer to store the result in. - This may be the same as _x. - _x: The input coefficients.*/ -static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - const ogg_int16_t *in; - ogg_int16_t *end; - ogg_int16_t *out; - ogg_int16_t w[64]; - /*Transform rows of x into columns of w.*/ - for(in=_x,out=w,end=out+8;out>4); -} - -void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64], - int _last_zzi){ - (*_state->opt_vtable.idct8x8)(_y,_last_zzi); -} - -/*Performs an inverse 8x8 Type-II DCT transform. - The input is assumed to be scaled by a factor of 4 relative to orthonormal - version of the transform.*/ -void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){ - /*_last_zzi is subtly different from an actual count of the number of - coefficients we decoded for this block. - It contains the value of zzi BEFORE the final token in the block was - decoded. - In most cases this is an EOB token (the continuation of an EOB run from a - previous block counts), and so this is the same as the coefficient count. - However, in the case that the last token was NOT an EOB token, but filled - the block up with exactly 64 coefficients, _last_zzi will be less than 64. - Provided the last token was not a pure zero run, the minimum value it can - be is 46, and so that doesn't affect any of the cases in this routine. - However, if the last token WAS a pure zero run of length 63, then _last_zzi - will be 1 while the number of coefficients decoded is 64. - Thus, we will trigger the following special case, where the real - coefficient count would not. - Note also that a zero run of length 64 will give _last_zzi a value of 0, - but we still process the DC coefficient, which might have a non-zero value - due to DC prediction. - Although convoluted, this is arguably the correct behavior: it allows us to - use a smaller transform when the block ends with a long zero run instead - of a normal EOB token. - It could be smarter... multiple separate zero runs at the end of a block - will fool it, but an encoder that generates these really deserves what it - gets. - Needless to say we inherited this approach from VP3.*/ - /*Then perform the iDCT.*/ - if(_last_zzi<3)oc_idct8x8_3(_y,_y); - else if(_last_zzi<10)oc_idct8x8_10(_y,_y); - else oc_idct8x8_slow(_y,_y); -} diff --git a/drivers/theora/info.c b/drivers/theora/info.c deleted file mode 100644 index 6b9762978b..0000000000 --- a/drivers/theora/info.c +++ /dev/null @@ -1,131 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#include -#include -#include "internal.h" - - - -/*This is more or less the same as strncasecmp, but that doesn't exist - everywhere, and this is a fairly trivial function, so we include it. - Note: We take advantage of the fact that we know _n is less than or equal to - the length of at least one of the strings.*/ -static int oc_tagcompare(const char *_s1,const char *_s2,int _n){ - int c; - for(c=0;c<_n;c++){ - if(toupper(_s1[c])!=toupper(_s2[c]))return !0; - } - return _s1[c]!='='; -} - - - -void th_info_init(th_info *_info){ - memset(_info,0,sizeof(*_info)); - _info->version_major=TH_VERSION_MAJOR; - _info->version_minor=TH_VERSION_MINOR; - _info->version_subminor=TH_VERSION_SUB; - _info->keyframe_granule_shift=6; -} - -void th_info_clear(th_info *_info){ - memset(_info,0,sizeof(*_info)); -} - - - -void th_comment_init(th_comment *_tc){ - memset(_tc,0,sizeof(*_tc)); -} - -void th_comment_add(th_comment *_tc,char *_comment){ - char **user_comments; - int *comment_lengths; - int comment_len; - user_comments=_ogg_realloc(_tc->user_comments, - (_tc->comments+2)*sizeof(*_tc->user_comments)); - if(user_comments==NULL)return; - _tc->user_comments=user_comments; - comment_lengths=_ogg_realloc(_tc->comment_lengths, - (_tc->comments+2)*sizeof(*_tc->comment_lengths)); - if(comment_lengths==NULL)return; - _tc->comment_lengths=comment_lengths; - comment_len=strlen(_comment); - comment_lengths[_tc->comments]=comment_len; - user_comments[_tc->comments]=_ogg_malloc(comment_len+1); - if(user_comments[_tc->comments]==NULL)return; - memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1); - _tc->comments++; - _tc->user_comments[_tc->comments]=NULL; -} - -void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){ - char *comment; - int tag_len; - int val_len; - tag_len=strlen(_tag); - val_len=strlen(_val); - /*+2 for '=' and '\0'.*/ - comment=_ogg_malloc(tag_len+val_len+2); - if(comment==NULL)return; - memcpy(comment,_tag,tag_len); - comment[tag_len]='='; - memcpy(comment+tag_len+1,_val,val_len+1); - th_comment_add(_tc,comment); - _ogg_free(comment); -} - -char *th_comment_query(th_comment *_tc,char *_tag,int _count){ - long i; - int found; - int tag_len; - tag_len=strlen(_tag); - found=0; - for(i=0;i<_tc->comments;i++){ - if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){ - /*We return a pointer to the data, not a copy.*/ - if(_count==found++)return _tc->user_comments[i]+tag_len+1; - } - } - /*Didn't find anything.*/ - return NULL; -} - -int th_comment_query_count(th_comment *_tc,char *_tag){ - long i; - int tag_len; - int count; - tag_len=strlen(_tag); - count=0; - for(i=0;i<_tc->comments;i++){ - if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++; - } - return count; -} - -void th_comment_clear(th_comment *_tc){ - if(_tc!=NULL){ - long i; - for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]); - _ogg_free(_tc->user_comments); - _ogg_free(_tc->comment_lengths); - _ogg_free(_tc->vendor); - memset(_tc,0,sizeof(*_tc)); - } -} diff --git a/drivers/theora/internal.c b/drivers/theora/internal.c deleted file mode 100644 index 0fe4f63e72..0000000000 --- a/drivers/theora/internal.c +++ /dev/null @@ -1,262 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: internal.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#include -#include -#include "internal.h" - - - -/*A map from the index in the zig zag scan to the coefficient number in a - block. - All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs - past the end of a block in bogus streams get mapped to a known location.*/ -const unsigned char OC_FZIG_ZAG[128]={ - 0, 1, 8,16, 9, 2, 3,10, - 17,24,32,25,18,11, 4, 5, - 12,19,26,33,40,48,41,34, - 27,20,13, 6, 7,14,21,28, - 35,42,49,56,57,50,43,36, - 29,22,15,23,30,37,44,51, - 58,59,52,45,38,31,39,46, - 53,60,61,54,47,55,62,63, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64 -}; - -/*A map from the coefficient number in a block to its index in the zig zag - scan.*/ -const unsigned char OC_IZIG_ZAG[64]={ - 0, 1, 5, 6,14,15,27,28, - 2, 4, 7,13,16,26,29,42, - 3, 8,12,17,25,30,41,43, - 9,11,18,24,31,40,44,53, - 10,19,23,32,39,45,52,54, - 20,22,33,38,46,51,55,60, - 21,34,37,47,50,56,59,61, - 35,36,48,49,57,58,62,63 -}; - -/*A map from physical macro block ordering to bitstream macro block - ordering within a super block.*/ -const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}}; - -/*A list of the indices in the oc_mb.map array that can be valid for each of - the various chroma decimation types.*/ -const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={ - {0,1,2,3,4,8}, - {0,1,2,3,4,5,8,9}, - {0,1,2,3,4,6,8,10}, - {0,1,2,3,4,5,6,7,8,9,10,11} -}; - -/*The number of indices in the oc_mb.map array that can be valid for each of - the various chroma decimation types.*/ -const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12}; - -/*The number of extra bits that are coded with each of the DCT tokens. - Each DCT token has some fixed number of additional bits (possibly 0) stored - after the token itself, containing, for example, coefficient magnitude, - sign bits, etc.*/ -const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={ - 0,0,0,2,3,4,12,3,6, - 0,0,0,0, - 1,1,1,1,2,3,4,5,6,10, - 1,1,1,1,1,3,4, - 2,3 -}; - - - -int oc_ilog(unsigned _v){ - int ret; - for(ret=0;_v;ret++)_v>>=1; - return ret; -} - - - -/*The function used to fill in the chroma plane motion vectors for a macro - block when 4 different motion vectors are specified in the luma plane. - This version is for use with chroma decimated in the X and Y directions - (4:2:0). - _cbmvs: The chroma block-level motion vectors to fill in. - _lbmvs: The luma block-level motion vectors.*/ -static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ - int dx; - int dy; - dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0]; - dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1]; - _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2); - _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2); -} - -/*The function used to fill in the chroma plane motion vectors for a macro - block when 4 different motion vectors are specified in the luma plane. - This version is for use with chroma decimated in the Y direction. - _cbmvs: The chroma block-level motion vectors to fill in. - _lbmvs: The luma block-level motion vectors.*/ -static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ - int dx; - int dy; - dx=_lbmvs[0][0]+_lbmvs[2][0]; - dy=_lbmvs[0][1]+_lbmvs[2][1]; - _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); - _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); - dx=_lbmvs[1][0]+_lbmvs[3][0]; - dy=_lbmvs[1][1]+_lbmvs[3][1]; - _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); - _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); -} - -/*The function used to fill in the chroma plane motion vectors for a macro - block when 4 different motion vectors are specified in the luma plane. - This version is for use with chroma decimated in the X direction (4:2:2). - _cbmvs: The chroma block-level motion vectors to fill in. - _lbmvs: The luma block-level motion vectors.*/ -static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ - int dx; - int dy; - dx=_lbmvs[0][0]+_lbmvs[1][0]; - dy=_lbmvs[0][1]+_lbmvs[1][1]; - _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); - _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); - dx=_lbmvs[2][0]+_lbmvs[3][0]; - dy=_lbmvs[2][1]+_lbmvs[3][1]; - _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); - _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); -} - -/*The function used to fill in the chroma plane motion vectors for a macro - block when 4 different motion vectors are specified in the luma plane. - This version is for use with no chroma decimation (4:4:4). - _cbmvs: The chroma block-level motion vectors to fill in. - _lmbmv: The luma macro-block level motion vector to fill in for use in - prediction. - _lbmvs: The luma block-level motion vectors.*/ -static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ - memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0])); -} - -/*A table of functions used to fill in the chroma plane motion vectors for a - macro block when 4 different motion vectors are specified in the luma - plane.*/ -const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ - (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, - (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, - (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, - (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 -}; - - - -void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){ - size_t rowsz; - size_t colsz; - size_t datsz; - char *ret; - colsz=_height*sizeof(void *); - rowsz=_sz*_width; - datsz=rowsz*_height; - /*Alloc array and row pointers.*/ - ret=(char *)_ogg_malloc(datsz+colsz); - if(ret==NULL)return NULL; - /*Initialize the array.*/ - if(ret!=NULL){ - size_t i; - void **p; - char *datptr; - p=(void **)ret; - i=_height; - for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr; - } - return (void **)ret; -} - -void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){ - size_t colsz; - size_t rowsz; - size_t datsz; - char *ret; - colsz=_height*sizeof(void *); - rowsz=_sz*_width; - datsz=rowsz*_height; - /*Alloc array and row pointers.*/ - ret=(char *)_ogg_calloc(datsz+colsz,1); - if(ret==NULL)return NULL; - /*Initialize the array.*/ - if(ret!=NULL){ - size_t i; - void **p; - char *datptr; - p=(void **)ret; - i=_height; - for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr; - } - return (void **)ret; -} - -void oc_free_2d(void *_ptr){ - _ogg_free(_ptr); -} - -/*Fills in a Y'CbCr buffer with a pointer to the image data in the first - buffer, but with the opposite vertical orientation. - _dst: The destination buffer. - This can be the same as _src. - _src: The source buffer.*/ -void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, - const th_ycbcr_buffer _src){ - int pli; - for(pli=0;pli<3;pli++){ - _dst[pli].width=_src[pli].width; - _dst[pli].height=_src[pli].height; - _dst[pli].stride=-_src[pli].stride; - _dst[pli].data=_src[pli].data - +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride; - } -} - -const char *th_version_string(void){ - return OC_VENDOR_STRING; -} - -ogg_uint32_t th_version_number(void){ - return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB; -} - -/*Determines the packet type. - Note that this correctly interprets a 0-byte packet as a video data packet. - Return: 1 for a header packet, 0 for a data packet.*/ -int th_packet_isheader(ogg_packet *_op){ - return _op->bytes>0?_op->packet[0]>>7:0; -} - -/*Determines the frame type of a video data packet. - Note that this correctly interprets a 0-byte packet as a delta frame. - Return: 1 for a key frame, 0 for a delta frame, and -1 for a header - packet.*/ -int th_packet_iskeyframe(ogg_packet *_op){ - return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40); -} diff --git a/drivers/theora/internal.h b/drivers/theora/internal.h deleted file mode 100644 index d81263e13e..0000000000 --- a/drivers/theora/internal.h +++ /dev/null @@ -1,509 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: internal.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#if !defined(_internal_H) -# define _internal_H (1) -# include -# include -# if defined(HAVE_CONFIG_H) -# include -# endif -# include "theora/codec.h" -# include "theora/theora.h" - -# if defined(_MSC_VER) -/*Disable missing EMMS warnings.*/ -# pragma warning(disable:4799) -/*Thank you Microsoft, I know the order of operations.*/ -# pragma warning(disable:4554) -# endif -/*You, too, gcc.*/ -# if defined(__GNUC_PREREQ) -# if __GNUC_PREREQ(4,2) -# pragma GCC diagnostic ignored "-Wparentheses" -# endif -# endif - -# include "ocintrin.h" -# include "huffman.h" -# include "quant.h" - -/*Some assembly constructs require aligned operands.*/ -# if defined(OC_X86_ASM) -# if defined(__GNUC__) -# define OC_ALIGN8(expr) expr __attribute__((aligned(8))) -# define OC_ALIGN16(expr) expr __attribute__((aligned(16))) -# elif defined(_MSC_VER) -# define OC_ALIGN8(expr) __declspec (align(8)) expr -# define OC_ALIGN16(expr) __declspec (align(16)) expr -# endif -# endif -# if !defined(OC_ALIGN8) -# define OC_ALIGN8(expr) expr -# endif -# if !defined(OC_ALIGN16) -# define OC_ALIGN16(expr) expr -# endif - - - -typedef struct oc_sb_flags oc_sb_flags; -typedef struct oc_border_info oc_border_info; -typedef struct oc_fragment oc_fragment; -typedef struct oc_fragment_plane oc_fragment_plane; -typedef struct oc_base_opt_vtable oc_base_opt_vtable; -typedef struct oc_base_opt_data oc_base_opt_data; -typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable; -typedef struct oc_theora_state oc_theora_state; - - - -/*This library's version.*/ -# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)" - -/*Theora bitstream version.*/ -# define TH_VERSION_MAJOR (3) -# define TH_VERSION_MINOR (2) -# define TH_VERSION_SUB (1) -# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ - ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \ - ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \ - (_info)->version_subminor>=(_sub))) - -/*A keyframe.*/ -#define OC_INTRA_FRAME (0) -/*A predicted frame.*/ -#define OC_INTER_FRAME (1) -/*A frame of unknown type (frame type decision has not yet been made).*/ -#define OC_UNKWN_FRAME (-1) - -/*The amount of padding to add to the reconstructed frame buffers on all - sides. - This is used to allow unrestricted motion vectors without special casing. - This must be a multiple of 2.*/ -#define OC_UMV_PADDING (16) - -/*Frame classification indices.*/ -/*The previous golden frame.*/ -#define OC_FRAME_GOLD (0) -/*The previous frame.*/ -#define OC_FRAME_PREV (1) -/*The current frame.*/ -#define OC_FRAME_SELF (2) - -/*The input or output buffer.*/ -#define OC_FRAME_IO (3) - -/*Macroblock modes.*/ -/*Macro block is invalid: It is never coded.*/ -#define OC_MODE_INVALID (-1) -/*Encoded difference from the same macro block in the previous frame.*/ -#define OC_MODE_INTER_NOMV (0) -/*Encoded with no motion compensated prediction.*/ -#define OC_MODE_INTRA (1) -/*Encoded difference from the previous frame offset by the given motion - vector.*/ -#define OC_MODE_INTER_MV (2) -/*Encoded difference from the previous frame offset by the last coded motion - vector.*/ -#define OC_MODE_INTER_MV_LAST (3) -/*Encoded difference from the previous frame offset by the second to last - coded motion vector.*/ -#define OC_MODE_INTER_MV_LAST2 (4) -/*Encoded difference from the same macro block in the previous golden - frame.*/ -#define OC_MODE_GOLDEN_NOMV (5) -/*Encoded difference from the previous golden frame offset by the given motion - vector.*/ -#define OC_MODE_GOLDEN_MV (6) -/*Encoded difference from the previous frame offset by the individual motion - vectors given for each block.*/ -#define OC_MODE_INTER_MV_FOUR (7) -/*The number of (coded) modes.*/ -#define OC_NMODES (8) - -/*Determines the reference frame used for a given MB mode.*/ -#define OC_FRAME_FOR_MODE(_x) \ - OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \ - OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x)) - -/*Constants for the packet state machine common between encoder and decoder.*/ - -/*Next packet to emit/read: Codec info header.*/ -#define OC_PACKET_INFO_HDR (-3) -/*Next packet to emit/read: Comment header.*/ -#define OC_PACKET_COMMENT_HDR (-2) -/*Next packet to emit/read: Codec setup header.*/ -#define OC_PACKET_SETUP_HDR (-1) -/*No more packets to emit/read.*/ -#define OC_PACKET_DONE (INT_MAX) - - - -/*Super blocks are 32x32 segments of pixels in a single color plane indexed - in image order. - Internally, super blocks are broken up into four quadrants, each of which - contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels. - Quadrants, and the blocks within them, are indexed in a special order called - a "Hilbert curve" within the super block. - - In order to differentiate between the Hilbert-curve indexing strategy and - the regular image order indexing strategy, blocks indexed in image order - are called "fragments". - Fragments are indexed in image order, left to right, then bottom to top, - from Y' plane to Cb plane to Cr plane. - - The co-located fragments in all image planes corresponding to the location - of a single quadrant of a luma plane super block form a macro block. - Thus there is only a single set of macro blocks for all planes, each of which - contains between 6 and 12 fragments, depending on the pixel format. - Therefore macro block information is kept in a separate set of arrays from - super blocks to avoid unused space in the other planes. - The lists are indexed in super block order. - That is, the macro block corresponding to the macro block mbi in (luma plane) - super block sbi is at index (sbi<<2|mbi). - Thus the number of macro blocks in each dimension is always twice the number - of super blocks, even when only an odd number fall inside the coded frame. - These "extra" macro blocks are just an artifact of our internal data layout, - and not part of the coded stream; they are flagged with a negative MB mode.*/ - - - -/*A single quadrant of the map from a super block to fragment numbers.*/ -typedef ptrdiff_t oc_sb_map_quad[4]; -/*A map from a super block to fragment numbers.*/ -typedef oc_sb_map_quad oc_sb_map[4]; -/*A single plane of the map from a macro block to fragment numbers.*/ -typedef ptrdiff_t oc_mb_map_plane[4]; -/*A map from a macro block to fragment numbers.*/ -typedef oc_mb_map_plane oc_mb_map[3]; -/*A motion vector.*/ -typedef signed char oc_mv[2]; - - - -/*Super block information.*/ -struct oc_sb_flags{ - unsigned char coded_fully:1; - unsigned char coded_partially:1; - unsigned char quad_valid:4; -}; - - - -/*Information about a fragment which intersects the border of the displayable - region. - This marks which pixels belong to the displayable region.*/ -struct oc_border_info{ - /*A bit mask marking which pixels are in the displayable region. - Pixel (x,y) corresponds to bit (y<<3|x).*/ - ogg_int64_t mask; - /*The number of pixels in the displayable region. - This is always positive, and always less than 64.*/ - int npixels; -}; - - - -/*Fragment information.*/ -struct oc_fragment{ - /*A flag indicating whether or not this fragment is coded.*/ - unsigned coded:1; - /*A flag indicating that this entire fragment lies outside the displayable - region of the frame. - Note the contrast with an invalid macro block, which is outside the coded - frame, not just the displayable one. - There are no fragments outside the coded frame by construction.*/ - unsigned invalid:1; - /*The index of the quality index used for this fragment's AC coefficients.*/ - unsigned qii:6; - /*The mode of the macroblock this fragment belongs to.*/ - unsigned mb_mode:3; - /*The index of the associated border information for fragments which lie - partially outside the displayable region. - For fragments completely inside or outside this region, this is -1. - Note that the C standard requires an explicit signed keyword for bitfield - types, since some compilers may treat them as unsigned without it.*/ - signed int borderi:5; - /*The prediction-corrected DC component. - Note that the C standard requires an explicit signed keyword for bitfield - types, since some compilers may treat them as unsigned without it.*/ - signed int dc:16; -}; - - - -/*A description of each fragment plane.*/ -struct oc_fragment_plane{ - /*The number of fragments in the horizontal direction.*/ - int nhfrags; - /*The number of fragments in the vertical direction.*/ - int nvfrags; - /*The offset of the first fragment in the plane.*/ - ptrdiff_t froffset; - /*The total number of fragments in the plane.*/ - ptrdiff_t nfrags; - /*The number of super blocks in the horizontal direction.*/ - unsigned nhsbs; - /*The number of super blocks in the vertical direction.*/ - unsigned nvsbs; - /*The offset of the first super block in the plane.*/ - unsigned sboffset; - /*The total number of super blocks in the plane.*/ - unsigned nsbs; -}; - - - -/*The shared (encoder and decoder) functions that have accelerated variants.*/ -struct oc_base_opt_vtable{ - void (*frag_copy)(unsigned char *_dst, - const unsigned char *_src,int _ystride); - void (*frag_recon_intra)(unsigned char *_dst,int _ystride, - const ogg_int16_t _residue[64]); - void (*frag_recon_inter)(unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); - void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); - void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi); - void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); - void (*state_frag_copy_list)(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli); - void (*state_loop_filter_frag_rows)(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); - void (*restore_fpu)(void); -}; - -/*The shared (encoder and decoder) tables that vary according to which variants - of the above functions are used.*/ -struct oc_base_opt_data{ - const unsigned char *dct_fzig_zag; -}; - - -/*State information common to both the encoder and decoder.*/ -struct oc_theora_state{ - /*The stream information.*/ - th_info info; - /*Table for shared accelerated functions.*/ - oc_base_opt_vtable opt_vtable; - /*Table for shared data used by accelerated functions.*/ - oc_base_opt_data opt_data; - /*CPU flags to detect the presence of extended instruction sets.*/ - ogg_uint32_t cpu_flags; - /*The fragment plane descriptions.*/ - oc_fragment_plane fplanes[3]; - /*The list of fragments, indexed in image order.*/ - oc_fragment *frags; - /*The the offset into the reference frame buffer to the upper-left pixel of - each fragment.*/ - ptrdiff_t *frag_buf_offs; - /*The motion vector for each fragment.*/ - oc_mv *frag_mvs; - /*The total number of fragments in a single frame.*/ - ptrdiff_t nfrags; - /*The list of super block maps, indexed in image order.*/ - oc_sb_map *sb_maps; - /*The list of super block flags, indexed in image order.*/ - oc_sb_flags *sb_flags; - /*The total number of super blocks in a single frame.*/ - unsigned nsbs; - /*The fragments from each color plane that belong to each macro block. - Fragments are stored in image order (left to right then top to bottom). - When chroma components are decimated, the extra fragments have an index of - -1.*/ - oc_mb_map *mb_maps; - /*The list of macro block modes. - A negative number indicates the macro block lies entirely outside the - coded frame.*/ - signed char *mb_modes; - /*The number of macro blocks in the X direction.*/ - unsigned nhmbs; - /*The number of macro blocks in the Y direction.*/ - unsigned nvmbs; - /*The total number of macro blocks.*/ - size_t nmbs; - /*The list of coded fragments, in coded order. - Uncoded fragments are stored in reverse order from the end of the list.*/ - ptrdiff_t *coded_fragis; - /*The number of coded fragments in each plane.*/ - ptrdiff_t ncoded_fragis[3]; - /*The total number of coded fragments.*/ - ptrdiff_t ntotal_coded_fragis; - /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ - int ref_frame_idx[4]; - /*The actual buffers used for the previously decoded frames.*/ - th_ycbcr_buffer ref_frame_bufs[4]; - /*The storage for the reference frame buffers.*/ - unsigned char *ref_frame_data[4]; - /*The strides for each plane in the reference frames.*/ - int ref_ystride[3]; - /*The number of unique border patterns.*/ - int nborders; - /*The unique border patterns for all border fragments. - The borderi field of fragments which straddle the border indexes this - list.*/ - oc_border_info borders[16]; - /*The frame number of the last keyframe.*/ - ogg_int64_t keyframe_num; - /*The frame number of the current frame.*/ - ogg_int64_t curframe_num; - /*The granpos of the current frame.*/ - ogg_int64_t granpos; - /*The type of the current frame.*/ - unsigned char frame_type; - /*The bias to add to the frame count when computing granule positions.*/ - unsigned char granpos_bias; - /*The number of quality indices used in the current frame.*/ - unsigned char nqis; - /*The quality indices of the current frame.*/ - unsigned char qis[3]; - /*The dequantization tables, stored in zig-zag order, and indexed by - qi, pli, qti, and zzi.*/ - ogg_uint16_t *dequant_tables[64][3][2]; - OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]); - /*Loop filter strength parameters.*/ - unsigned char loop_filter_limits[64]; -}; - - - -/*The function type used to fill in the chroma plane motion vectors for a - macro block when 4 different motion vectors are specified in the luma - plane. - _cbmvs: The chroma block-level motion vectors to fill in. - _lmbmv: The luma macro-block level motion vector to fill in for use in - prediction. - _lbmvs: The luma block-level motion vectors.*/ -typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]); - - - -/*A map from the index in the zig zag scan to the coefficient number in a - block.*/ -extern const unsigned char OC_FZIG_ZAG[128]; -/*A map from the coefficient number in a block to its index in the zig zag - scan.*/ -extern const unsigned char OC_IZIG_ZAG[64]; -/*A map from physical macro block ordering to bitstream macro block - ordering within a super block.*/ -extern const unsigned char OC_MB_MAP[2][2]; -/*A list of the indices in the oc_mb_map array that can be valid for each of - the various chroma decimation types.*/ -extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]; -/*The number of indices in the oc_mb_map array that can be valid for each of - the various chroma decimation types.*/ -extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]; -/*A table of functions used to fill in the Cb,Cr plane motion vectors for a - macro block when 4 different motion vectors are specified in the luma - plane.*/ -extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]; - - - -int oc_ilog(unsigned _v); -void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz); -void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz); -void oc_free_2d(void *_ptr); - -void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, - const th_ycbcr_buffer _src); - -int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs); -void oc_state_clear(oc_theora_state *_state); -void oc_state_vtable_init_c(oc_theora_state *_state); -void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, - int _y0,int _yend); -void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli); -void oc_state_borders_fill(oc_theora_state *_state,int _refi); -void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx, - th_ycbcr_buffer _img); -int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby); -int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], - int _pli,int _dx,int _dy); - -int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv); -void oc_state_loop_filter(oc_theora_state *_state,int _frame); -#if defined(OC_DUMP_IMAGES) -int oc_state_dump_frame(const oc_theora_state *_state,int _frame, - const char *_suf); -#endif - -/*Shared accelerated functions.*/ -void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, - const unsigned char *_src,int _ystride); -void oc_frag_recon_intra(const oc_theora_state *_state, - unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]); -void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); -void oc_frag_recon_inter2(const oc_theora_state *_state, - unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2, - int _ystride,const ogg_int16_t _residue[64]); -void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi); -void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); -void oc_state_frag_copy_list(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli); -void oc_state_loop_filter_frag_rows(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); -void oc_restore_fpu(const oc_theora_state *_state); - -/*Default pure-C implementations.*/ -void oc_frag_copy_c(unsigned char *_dst, - const unsigned char *_src,int _src_ystride); -void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride, - const ogg_int16_t _residue[64]); -void oc_frag_recon_inter_c(unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); -void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); -void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi); -void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); -void oc_state_frag_copy_list_c(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli); -void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); -void oc_restore_fpu_c(void); - -/*We need a way to call a few encoder functions without introducing a link-time - dependency into the decoder, while still allowing the old alpha API which - does not distinguish between encoder and decoder objects to be used. - We do this by placing a function table at the start of the encoder object - which can dispatch into the encoder library. - We do a similar thing for the decoder in case we ever decide to split off a - common base library.*/ -typedef void (*oc_state_clear_func)(theora_state *_th); -typedef int (*oc_state_control_func)(theora_state *th,int _req, - void *_buf,size_t _buf_sz); -typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th, - ogg_int64_t _granulepos); -typedef double (*oc_state_granule_time_func)(theora_state *_th, - ogg_int64_t _granulepos); - - -struct oc_state_dispatch_vtable{ - oc_state_clear_func clear; - oc_state_control_func control; - oc_state_granule_frame_func granule_frame; - oc_state_granule_time_func granule_time; -}; - -#endif diff --git a/drivers/theora/mathops.c b/drivers/theora/mathops.c deleted file mode 100644 index d3fb909194..0000000000 --- a/drivers/theora/mathops.c +++ /dev/null @@ -1,296 +0,0 @@ -#include "mathops.h" -#include - -/*The fastest fallback strategy for platforms with fast multiplication appears - to be based on de Bruijn sequences~\cite{LP98}. - Tests confirmed this to be true even on an ARM11, where it is actually faster - than using the native clz instruction. - Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where - multiplication or table lookups are too expensive. - - @UNPUBLISHED{LP98, - author="Charles E. Leiserson and Harald Prokop", - title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word", - month=Jun, - year=1998, - note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" - }*/ -#if !defined(OC_ILOG_NODEBRUIJN)&& \ - !defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL -static const unsigned char OC_DEBRUIJN_IDX32[32]={ - 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, - 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 -}; -#endif - -int oc_ilog32(ogg_uint32_t _v){ -#if defined(OC_CLZ32) - return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v; -#else -/*On a Pentium M, this branchless version tested as the fastest version without - multiplications on 1,000,000,000 random 32-bit integers, edging out a - similar version with branches, and a 256-entry LUT version.*/ -# if defined(OC_ILOG_NODEBRUIJN) - int ret; - int m; - ret=_v>0; - m=(_v>0xFFFFU)<<4; - _v>>=m; - ret|=m; - m=(_v>0xFFU)<<3; - _v>>=m; - ret|=m; - m=(_v>0xFU)<<2; - _v>>=m; - ret|=m; - m=(_v>3)<<1; - _v>>=m; - ret|=m; - ret+=_v>1; - return ret; -/*This de Bruijn sequence version is faster if you have a fast multiplier.*/ -# else - int ret; - ret=_v>0; - _v|=_v>>1; - _v|=_v>>2; - _v|=_v>>4; - _v|=_v>>8; - _v|=_v>>16; - _v=(_v>>1)+1; - ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; - return ret; -# endif -#endif -} - -int oc_ilog64(ogg_int64_t _v){ -#if defined(OC_CLZ64) - return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v; -#else -# if defined(OC_ILOG_NODEBRUIJN) - ogg_uint32_t v; - int ret; - int m; - ret=_v>0; - m=(_v>0xFFFFFFFFU)<<5; - v=(ogg_uint32_t)(_v>>m); - ret|=m; - m=(v>0xFFFFU)<<4; - v>>=m; - ret|=m; - m=(v>0xFFU)<<3; - v>>=m; - ret|=m; - m=(v>0xFU)<<2; - v>>=m; - ret|=m; - m=(v>3)<<1; - v>>=m; - ret|=m; - ret+=v>1; - return ret; -# else -/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ -# if LONG_MAX<9223372036854775807LL - ogg_uint32_t v; - int ret; - int m; - ret=_v>0; - m=(_v>0xFFFFFFFFU)<<5; - v=(ogg_uint32_t)(_v>>m); - ret|=m; - v|=v>>1; - v|=v>>2; - v|=v>>4; - v|=v>>8; - v|=v>>16; - v=(v>>1)+1; - ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; - return ret; -/*Otherwise do it in one 64-bit operation.*/ -# else - static const unsigned char OC_DEBRUIJN_IDX64[64]={ - 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, - 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, - 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, - 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 - }; - int ret; - ret=_v>0; - _v|=_v>>1; - _v|=_v>>2; - _v|=_v>>4; - _v|=_v>>8; - _v|=_v>>16; - _v|=_v>>32; - _v=(_v>>1)+1; - ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; - return ret; -# endif -# endif -#endif -} - -/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/ -static const ogg_int64_t OC_ATANH_LOG2[32]={ - 0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL, - 0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL, - 0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL, - 0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL, - 0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL, - 0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL, - 0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL, - 0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL, - 0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL, - 0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL, - 0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL -}; - -/*Computes the binary exponential of _z, a log base 2 in Q57 format.*/ -ogg_int64_t oc_bexp64(ogg_int64_t _z){ - ogg_int64_t w; - ogg_int64_t z; - int ipart; - ipart=(int)(_z>>57); - if(ipart<0)return 0; - if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL; - z=_z-OC_Q57(ipart); - if(z){ - ogg_int64_t mask; - long wlo; - int i; - /*C doesn't give us 64x64->128 muls, so we use CORDIC. - This is not particularly fast, but it's not being used in time-critical - code; it is very accurate.*/ - /*z is the fractional part of the log in Q62 format. - We need 1 bit of headroom since the magnitude can get larger than 1 - during the iteration, and a sign bit.*/ - z<<=5; - /*w is the exponential in Q61 format (since it also needs headroom and can - get as large as 2.0); we could get another bit if we dropped the sign, - but we'll recover that bit later anyway. - Ideally this should start out as - \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}} - but in order to guarantee convergence we have to repeat iterations 4, - 13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/ - w=0x26A3D0E401DD846DLL; - for(i=0;;i++){ - mask=-(z<0); - w+=(w>>i+1)+mask^mask; - z-=OC_ATANH_LOG2[i]+mask^mask; - /*Repeat iteration 4.*/ - if(i>=3)break; - z<<=1; - } - for(;;i++){ - mask=-(z<0); - w+=(w>>i+1)+mask^mask; - z-=OC_ATANH_LOG2[i]+mask^mask; - /*Repeat iteration 13.*/ - if(i>=12)break; - z<<=1; - } - for(;i<32;i++){ - mask=-(z<0); - w+=(w>>i+1)+mask^mask; - z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1; - } - wlo=0; - /*Skip the remaining iterations unless we really require that much - precision. - We could have bailed out earlier for smaller iparts, but that would - require initializing w from a table, as the limit doesn't converge to - 61-bit precision until n=30.*/ - if(ipart>30){ - /*For these iterations, we just update the low bits, as the high bits - can't possibly be affected. - OC_ATANH_LOG2 has also converged (it actually did so one iteration - earlier, but that's no reason for an extra special case).*/ - for(;;i++){ - mask=-(z<0); - wlo+=(w>>i)+mask^mask; - z-=OC_ATANH_LOG2[31]+mask^mask; - /*Repeat iteration 40.*/ - if(i>=39)break; - z<<=1; - } - for(;i<61;i++){ - mask=-(z<0); - wlo+=(w>>i)+mask^mask; - z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1; - } - } - w=(w<<1)+wlo; - } - else w=(ogg_int64_t)1<<62; - if(ipart<62)w=(w>>61-ipart)+1>>1; - return w; -} - -/*Computes the binary logarithm of _w, returned in Q57 format.*/ -ogg_int64_t oc_blog64(ogg_int64_t _w){ - ogg_int64_t z; - int ipart; - if(_w<=0)return -1; - ipart=OC_ILOGNZ_64(_w)-1; - if(ipart>61)_w>>=ipart-61; - else _w<<=61-ipart; - z=0; - if(_w&_w-1){ - ogg_int64_t x; - ogg_int64_t y; - ogg_int64_t u; - ogg_int64_t mask; - int i; - /*C doesn't give us 64x64->128 muls, so we use CORDIC. - This is not particularly fast, but it's not being used in time-critical - code; it is very accurate.*/ - /*z is the fractional part of the log in Q61 format.*/ - /*x and y are the cosh() and sinh(), respectively, in Q61 format. - We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/ - x=_w+((ogg_int64_t)1<<61); - y=_w-((ogg_int64_t)1<<61); - for(i=0;i<4;i++){ - mask=-(y<0); - z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; - u=x>>i+1; - x-=(y>>i+1)+mask^mask; - y-=u+mask^mask; - } - /*Repeat iteration 4.*/ - for(i--;i<13;i++){ - mask=-(y<0); - z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; - u=x>>i+1; - x-=(y>>i+1)+mask^mask; - y-=u+mask^mask; - } - /*Repeat iteration 13.*/ - for(i--;i<32;i++){ - mask=-(y<0); - z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; - u=x>>i+1; - x-=(y>>i+1)+mask^mask; - y-=u+mask^mask; - } - /*OC_ATANH_LOG2 has converged.*/ - for(;i<40;i++){ - mask=-(y<0); - z+=(OC_ATANH_LOG2[31]>>i)+mask^mask; - u=x>>i+1; - x-=(y>>i+1)+mask^mask; - y-=u+mask^mask; - } - /*Repeat iteration 40.*/ - for(i--;i<62;i++){ - mask=-(y<0); - z+=(OC_ATANH_LOG2[31]>>i)+mask^mask; - u=x>>i+1; - x-=(y>>i+1)+mask^mask; - y-=u+mask^mask; - } - z=z+8>>4; - } - return OC_Q57(ipart)+z; -} diff --git a/drivers/theora/mathops.h b/drivers/theora/mathops.h deleted file mode 100644 index efbc5377b0..0000000000 --- a/drivers/theora/mathops.h +++ /dev/null @@ -1,141 +0,0 @@ -#if !defined(_mathops_H) -# define _mathops_H (1) -# include - -# ifdef __GNUC_PREREQ -# if __GNUC_PREREQ(3,4) -# include -/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from - "upgrading" the type of an entire expression to an (unsigned) size_t.*/ -# if INT_MAX>=2147483647 -# define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT) -# define OC_CLZ32(_x) (__builtin_clz(_x)) -# elif LONG_MAX>=2147483647L -# define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) -# define OC_CLZ32(_x) (__builtin_clzl(_x)) -# endif -# if INT_MAX>=9223372036854775807LL -# define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT) -# define OC_CLZ64(_x) (__builtin_clz(_x)) -# elif LONG_MAX>=9223372036854775807LL -# define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) -# define OC_CLZ64(_x) (__builtin_clzl(_x)) -# elif LLONG_MAX>=9223372036854775807LL|| \ - __LONG_LONG_MAX__>=9223372036854775807LL -# define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT) -# define OC_CLZ64(_x) (__builtin_clzll(_x)) -# endif -# endif -# endif - - - -/** - * oc_ilog32 - Integer binary logarithm of a 32-bit value. - * @_v: A 32-bit value. - * Returns floor(log2(_v))+1, or 0 if _v==0. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin - * function instead, which should be faster. - */ -int oc_ilog32(ogg_uint32_t _v); -/** - * oc_ilog64 - Integer binary logarithm of a 64-bit value. - * @_v: A 64-bit value. - * Returns floor(log2(_v))+1, or 0 if _v==0. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin - * function instead, which should be faster. - */ -int oc_ilog64(ogg_int64_t _v); - - -# if defined(OC_CLZ32) -/** - * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value. - * @_v: A non-zero 32-bit value. - * Returns floor(log2(_v))+1. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - * If _v is zero, the return value is undefined; use OC_ILOG_32() instead. - */ -# define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v)) -/** - * OC_ILOG_32 - Integer binary logarithm of a 32-bit value. - * @_v: A 32-bit value. - * Returns floor(log2(_v))+1, or 0 if _v==0. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - */ -# define OC_ILOG_32(_v) (OC_ILOGNZ_32(_v)&-!!(_v)) -# else -# define OC_ILOGNZ_32(_v) (oc_ilog32(_v)) -# define OC_ILOG_32(_v) (oc_ilog32(_v)) -# endif - -# if defined(CLZ64) -/** - * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value. - * @_v: A non-zero 64-bit value. - * Returns floor(log2(_v))+1. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - * If _v is zero, the return value is undefined; use OC_ILOG_64() instead. - */ -# define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v)) -/** - * OC_ILOG_64 - Integer binary logarithm of a 64-bit value. - * @_v: A 64-bit value. - * Returns floor(log2(_v))+1, or 0 if _v==0. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - */ -# define OC_ILOG_64(_v) (OC_ILOGNZ_64(_v)&-!!(_v)) -# else -# define OC_ILOGNZ_64(_v) (oc_ilog64(_v)) -# define OC_ILOG_64(_v) (oc_ilog64(_v)) -# endif - -# define OC_STATIC_ILOG0(_v) (!!(_v)) -# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v)) -# define OC_STATIC_ILOG2(_v) \ - (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v)) -# define OC_STATIC_ILOG3(_v) \ - (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v)) -# define OC_STATIC_ILOG4(_v) \ - (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v)) -# define OC_STATIC_ILOG5(_v) \ - (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v)) -# define OC_STATIC_ILOG6(_v) \ - (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v)) -/** - * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant. - * @_v: A non-negative 32-bit constant. - * Returns floor(log2(_v))+1, or 0 if _v==0. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - * This macro is suitable for evaluation at compile time, but it should not be - * used on values that can change at runtime, as it operates via exhaustive - * search. - */ -# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v))) -/** - * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant. - * @_v: A non-negative 64-bit constant. - * Returns floor(log2(_v))+1, or 0 if _v==0. - * This is the number of bits that would be required to represent _v in two's - * complement notation with all of the leading zeros stripped. - * This macro is suitable for evaluation at compile time, but it should not be - * used on values that can change at runtime, as it operates via exhaustive - * search. - */ -# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v))) - -#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57) - -ogg_int64_t oc_bexp64(ogg_int64_t _z); -ogg_int64_t oc_blog64(ogg_int64_t _w); - -#endif diff --git a/drivers/theora/mcenc.c b/drivers/theora/mcenc.c deleted file mode 100644 index 797e81f4f9..0000000000 --- a/drivers/theora/mcenc.c +++ /dev/null @@ -1,767 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id$ - - ********************************************************************/ -#include -#include -#include -#include "encint.h" - - - -typedef struct oc_mcenc_ctx oc_mcenc_ctx; - - - -/*Temporary state used for motion estimation.*/ -struct oc_mcenc_ctx{ - /*The candidate motion vectors.*/ - int candidates[13][2]; - /*The start of the Set B candidates.*/ - int setb0; - /*The total number of candidates.*/ - int ncandidates; -}; - - - -/*The maximum Y plane SAD value for accepting the median predictor.*/ -#define OC_YSAD_THRESH1 (256) -/*The amount to right shift the minimum error by when inflating it for - computing the second maximum Y plane SAD threshold.*/ -#define OC_YSAD_THRESH2_SCALE_BITS (4) -/*The amount to add to the second maximum Y plane threshold when inflating - it.*/ -#define OC_YSAD_THRESH2_OFFSET (64) - -/*The vector offsets in the X direction for each search site in the square - pattern.*/ -static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1}; -/*The vector offsets in the Y direction for each search site in the square - pattern.*/ -static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1}; -/*The number of sites to search for each boundary condition in the square - pattern. - Bit flags for the boundary conditions are as follows: - 1: -16==dx - 2: dx==15(.5) - 4: -16==dy - 8: dy==15(.5)*/ -static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3}; -/*The list of sites to search for each boundary condition in the square - pattern.*/ -static const int OC_SQUARE_SITES[11][8]={ - /* -15.5mb_info; - /*Skip a position to store the median predictor in.*/ - ncandidates=1; - if(embs[_mbi].ncneighbors>0){ - /*Fill in the first part of set A: the vectors from adjacent blocks.*/ - for(i=0;icandidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0]; - _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1]; - ncandidates++; - } - } - /*Add a few additional vectors to set A: the vectors used in the previous - frames and the (0,0) vector.*/ - _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31); - _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31); - ncandidates++; - _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, - embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31); - _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, - embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31); - ncandidates++; - _mcenc->candidates[ncandidates][0]=0; - _mcenc->candidates[ncandidates][1]=0; - ncandidates++; - /*Use the first three vectors of set A to find our best predictor: their - median.*/ - memcpy(a,_mcenc->candidates+1,sizeof(a)); - OC_SORT2I(a[0][0],a[1][0]); - OC_SORT2I(a[0][1],a[1][1]); - OC_SORT2I(a[1][0],a[2][0]); - OC_SORT2I(a[1][1],a[2][1]); - OC_SORT2I(a[0][0],a[1][0]); - OC_SORT2I(a[0][1],a[1][1]); - _mcenc->candidates[0][0]=a[1][0]; - _mcenc->candidates[0][1]=a[1][1]; - /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/ - _mcenc->setb0=ncandidates; - /*The first time through the loop use the current macro block.*/ - nmbi=_mbi; - for(i=0;;i++){ - _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, - 2*embs[_mbi].analysis_mv[1][_frame][0] - -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31); - _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, - 2*embs[_mbi].analysis_mv[1][_frame][1] - -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31); - ncandidates++; - if(i>=embs[_mbi].npneighbors)break; - nmbi=embs[_mbi].pneighbors[i]; - } - /*Truncate to full-pel positions.*/ - for(i=0;icandidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]); - _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]); - } - _mcenc->ncandidates=ncandidates; -} - -#if 0 -static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc, - const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4], - int _mvoffset0,int _mvoffset1,const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _best_err){ - unsigned err; - int bi; - err=0; - for(bi=0;bi<4;bi++){ - ptrdiff_t frag_offs; - frag_offs=_frag_buf_offs[_fragis[bi]]; - err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0, - _ref+frag_offs+_mvoffset1,_ystride,_best_err-err); - } - return err; -} -#endif - -static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc, - const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4], - int _mvoffset0,int _mvoffset1,const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _best_err){ - unsigned err; - int bi; - err=0; - for(bi=0;bi<4;bi++){ - ptrdiff_t frag_offs; - frag_offs=_frag_buf_offs[_fragis[bi]]; - err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0, - _ref+frag_offs+_mvoffset1,_ystride,_best_err-err); - } - return err; -} - -static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc, - const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy, - const unsigned char *_src,const unsigned char *_ref,int _ystride, - unsigned _block_err[4]){ - unsigned err; - int mvoffset; - int bi; - mvoffset=_dx+_dy*_ystride; - err=0; - for(bi=0;bi<4;bi++){ - ptrdiff_t frag_offs; - unsigned block_err; - frag_offs=_frag_buf_offs[_fragis[bi]]; - block_err=oc_enc_frag_sad(_enc, - _src+frag_offs,_ref+frag_offs+mvoffset,_ystride); - _block_err[bi]=block_err; - err+=block_err; - } - return err; -} - -static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc, - const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy, - const unsigned char *_src,const unsigned char *_ref,int _ystride){ - int mvoffset; - int err; - int bi; - mvoffset=_dx+_dy*_ystride; - err=0; - for(bi=0;bi<4;bi++){ - ptrdiff_t frag_offs; - frag_offs=_frag_buf_offs[_fragis[bi]]; - err+=oc_enc_frag_satd_thresh(_enc, - _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX); - } - return err; -} - -static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc, - ptrdiff_t _frag_offs,int _dx,int _dy, - const unsigned char *_src,const unsigned char *_ref,int _ystride){ - return oc_enc_frag_satd_thresh(_enc, - _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX); -} - -/*Perform a motion vector search for this macro block against a single - reference frame. - As a bonus, individual block motion vectors are computed as well, as much of - the work can be shared. - The actual motion vector is stored in the appropriate place in the - oc_mb_enc_info structure. - _mcenc: The motion compensation context. - _accum: Drop frame/golden MV accumulators. - _mbi: The macro block index. - _frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/ -void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){ - /*Note: Traditionally this search is done using a rate-distortion objective - function of the form D+lambda*R. - However, xiphmont tested this and found it produced a small degredation, - while requiring extra computation. - This is most likely due to Theora's peculiar MV encoding scheme: MVs are - not coded relative to a predictor, and the only truly cheap way to use a - MV is in the LAST or LAST2 MB modes, which are not being considered here. - Therefore if we use the MV found here, it's only because both LAST and - LAST2 performed poorly, and therefore the MB is not likely to be uniform - or suffer from the aperture problem. - Furthermore we would like to re-use the MV found here for as many MBs as - possible, so picking a slightly sub-optimal vector to save a bit or two - may cause increased degredation in many blocks to come. - We could artificially reduce lambda to compensate, but it's faster to just - disable it entirely, and use D (the distortion) as the sole criterion.*/ - oc_mcenc_ctx mcenc; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *fragis; - const unsigned char *src; - const unsigned char *ref; - int ystride; - oc_mb_enc_info *embs; - ogg_int32_t hit_cache[31]; - ogg_int32_t hitbit; - unsigned best_block_err[4]; - unsigned block_err[4]; - unsigned best_err; - int best_vec[2]; - int best_block_vec[4][2]; - int candx; - int candy; - int bi; - embs=_enc->mb_info; - /*Find some candidate motion vectors.*/ - oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame); - /*Clear the cache of locations we've examined.*/ - memset(hit_cache,0,sizeof(hit_cache)); - /*Start with the median predictor.*/ - candx=mcenc.candidates[0][0]; - candy=mcenc.candidates[0][1]; - hit_cache[candy+15]|=(ogg_int32_t)1<state.frag_buf_offs; - fragis=_enc->state.mb_maps[_mbi][0]; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]]; - ystride=_enc->state.ref_ystride[0]; - /*TODO: customize error function for speed/(quality+size) tradeoff.*/ - best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, - frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); - best_vec[0]=candx; - best_vec[1]=candy; - if(_frame==OC_FRAME_PREV){ - for(bi=0;bi<4;bi++){ - best_block_err[bi]=block_err[bi]; - best_block_vec[bi][0]=candx; - best_block_vec[bi][1]=candy; - } - } - /*If this predictor fails, move on to set A.*/ - if(best_err>OC_YSAD_THRESH1){ - unsigned err; - unsigned t2; - int ncs; - int ci; - /*Compute the early termination threshold for set A.*/ - t2=embs[_mbi].error[_frame]; - ncs=OC_MINI(3,embs[_mbi].ncneighbors); - for(ci=0;ci>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET; - /*Examine the candidates in set A.*/ - for(ci=1;cit2){ - /*Examine the candidates in set B.*/ - for(;cit2){ - int best_site; - int nsites; - int sitei; - int site; - int b; - /*Square pattern search.*/ - for(;;){ - best_site=4; - /*Compose the bit flags for boundary conditions.*/ - b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1| - OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3; - nsites=OC_SQUARE_NSITES[b]; - for(sitei=0;sitei>=2; - for(bi=0;bi<4;bi++){ - if(best_block_err[bi]>t2){ - /*Square pattern search. - We do this in a slightly interesting manner. - We continue to check the SAD of all four blocks in the - macro block. - This gives us two things: - 1) We can continue to use the hit_cache to avoid duplicate - checks. - Otherwise we could continue to read it, but not write to it - without saving and restoring it for each block. - Note that we could still eliminate a large number of - duplicate checks by taking into account the site we came - from when choosing the site list. - We can still do that to avoid extra hit_cache queries, and - it might even be a speed win. - 2) It gives us a slightly better chance of escaping local - minima. - We would not be here if we weren't doing a fairly bad job - in finding a good vector, and checking these vectors can - save us from 100 to several thousand points off our SAD 1 - in 15 times. - TODO: Is this a good idea? - Who knows. - It needs more testing.*/ - for(;;){ - int bestx; - int besty; - int bj; - bestx=best_block_vec[bi][0]; - besty=best_block_vec[bi][1]; - /*Compose the bit flags for boundary conditions.*/ - b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1| - OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3; - nsites=OC_SQUARE_NSITES[b]; - for(sitei=0;siteimb_info[_mbi].analysis_mv; - if(_enc->prevframe_dropped){ - accum_p[0]=mvs[0][OC_FRAME_PREV][0]; - accum_p[1]=mvs[0][OC_FRAME_PREV][1]; - } - else accum_p[1]=accum_p[0]=0; - accum_g[0]=mvs[2][OC_FRAME_GOLD][0]; - accum_g[1]=mvs[2][OC_FRAME_GOLD][1]; - mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0]; - mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1]; - /*Move the motion vector predictors back a frame.*/ - memmove(mvs+1,mvs,2*sizeof(*mvs)); - /*Search the last frame.*/ - oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV); - mvs[2][OC_FRAME_PREV][0]=accum_p[0]; - mvs[2][OC_FRAME_PREV][1]=accum_p[1]; - /*GOLDEN MVs are different from PREV MVs in that they're each absolute - offsets from some frame in the past rather than relative offsets from the - frame before. - For predictor calculation to make sense, we need them to be in the same - form as PREV MVs.*/ - mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0]; - mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1]; - mvs[2][OC_FRAME_GOLD][0]-=accum_g[0]; - mvs[2][OC_FRAME_GOLD][1]-=accum_g[1]; - /*Search the golden frame.*/ - oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD); - /*Put GOLDEN MVs back into absolute offset form. - The newest MV is already an absolute offset.*/ - mvs[2][OC_FRAME_GOLD][0]+=accum_g[0]; - mvs[2][OC_FRAME_GOLD][1]+=accum_g[1]; - mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0]; - mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1]; -} - -#if 0 -static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi, - int _vec[2],int _best_err,int _frame){ - const unsigned char *src; - const unsigned char *ref; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *fragis; - int offset_y[9]; - int ystride; - int mvoffset_base; - int best_site; - int sitei; - int err; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]]; - frag_buf_offs=_enc->state.frag_buf_offs; - fragis=_enc->state.mb_maps[_mbi][0]; - ystride=_enc->state.ref_ystride[0]; - mvoffset_base=_vec[0]+_vec[1]*ystride; - offset_y[0]=offset_y[1]=offset_y[2]=-ystride; - offset_y[3]=offset_y[5]=0; - offset_y[6]=offset_y[7]=offset_y[8]=ystride; - best_site=4; - for(sitei=0;sitei<8;sitei++){ - int site; - int xmask; - int ymask; - int dx; - int dy; - int mvoffset0; - int mvoffset1; - site=OC_SQUARE_SITES[0][sitei]; - dx=OC_SQUARE_DX[site]; - dy=OC_SQUARE_DY[site]; - /*The following code SHOULD be equivalent to - oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, - (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); - However, it should also be much faster, as it involves no multiplies and - doesn't have to handle chroma vectors.*/ - xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); - ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); - mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask); - mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask); - err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis, - mvoffset0,mvoffset1,src,ref,ystride,_best_err); - if(err<_best_err){ - _best_err=err; - best_site=site; - } - } - _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; - _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; - return _best_err; -} -#endif - -static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc, - int _mbi,int _vec[2],unsigned _best_err,int _frame){ - const unsigned char *src; - const unsigned char *ref; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *fragis; - int offset_y[9]; - int ystride; - int mvoffset_base; - int best_site; - int sitei; - int err; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]]; - frag_buf_offs=_enc->state.frag_buf_offs; - fragis=_enc->state.mb_maps[_mbi][0]; - ystride=_enc->state.ref_ystride[0]; - mvoffset_base=_vec[0]+_vec[1]*ystride; - offset_y[0]=offset_y[1]=offset_y[2]=-ystride; - offset_y[3]=offset_y[5]=0; - offset_y[6]=offset_y[7]=offset_y[8]=ystride; - best_site=4; - for(sitei=0;sitei<8;sitei++){ - int site; - int xmask; - int ymask; - int dx; - int dy; - int mvoffset0; - int mvoffset1; - site=OC_SQUARE_SITES[0][sitei]; - dx=OC_SQUARE_DX[site]; - dy=OC_SQUARE_DY[site]; - /*The following code SHOULD be equivalent to - oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, - (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); - However, it should also be much faster, as it involves no multiplies and - doesn't have to handle chroma vectors.*/ - xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); - ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); - mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask); - mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask); - err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis, - mvoffset0,mvoffset1,src,ref,ystride,_best_err); - if(err<_best_err){ - _best_err=err; - best_site=site; - } - } - _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; - _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; - return _best_err; -} - -void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){ - oc_mb_enc_info *embs; - int vec[2]; - embs=_enc->mb_info; - vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]); - vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]); - embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc, - _mbi,vec,embs[_mbi].satd[_frame],_frame); - embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0]; - embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1]; -} - -#if 0 -static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc, - int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride, - int _offset_y[9],unsigned _best_err){ - int mvoffset_base; - int best_site; - int sitei; - mvoffset_base=_vec[0]+_vec[1]*_ystride; - best_site=4; - for(sitei=0;sitei<8;sitei++){ - unsigned err; - int site; - int xmask; - int ymask; - int dx; - int dy; - int mvoffset0; - int mvoffset1; - site=OC_SQUARE_SITES[0][sitei]; - dx=OC_SQUARE_DX[site]; - dy=OC_SQUARE_DY[site]; - /*The following code SHOULD be equivalent to - oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, - (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); - However, it should also be much faster, as it involves no multiplies and - doesn't have to handle chroma vectors.*/ - xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); - ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); - mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask); - mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask); - err=oc_enc_frag_sad2_thresh(_enc,_src, - _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err); - if(err<_best_err){ - _best_err=err; - best_site=site; - } - } - _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; - _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; - return _best_err; -} -#endif - -static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc, - int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride, - int _offset_y[9],unsigned _best_err){ - int mvoffset_base; - int best_site; - int sitei; - mvoffset_base=_vec[0]+_vec[1]*_ystride; - best_site=4; - for(sitei=0;sitei<8;sitei++){ - unsigned err; - int site; - int xmask; - int ymask; - int dx; - int dy; - int mvoffset0; - int mvoffset1; - site=OC_SQUARE_SITES[0][sitei]; - dx=OC_SQUARE_DX[site]; - dy=OC_SQUARE_DY[site]; - /*The following code SHOULD be equivalent to - oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0, - (_vec[0]<<1)+dx,(_vec[1]<<1)+dy); - However, it should also be much faster, as it involves no multiplies and - doesn't have to handle chroma vectors.*/ - xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); - ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); - mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask); - mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask); - err=oc_enc_frag_satd2_thresh(_enc,_src, - _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err); - if(err<_best_err){ - _best_err=err; - best_site=site; - } - } - _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; - _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; - return _best_err; -} - -void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){ - oc_mb_enc_info *embs; - const ptrdiff_t *frag_buf_offs; - const ptrdiff_t *fragis; - const unsigned char *src; - const unsigned char *ref; - int offset_y[9]; - int ystride; - int bi; - ystride=_enc->state.ref_ystride[0]; - frag_buf_offs=_enc->state.frag_buf_offs; - fragis=_enc->state.mb_maps[_mbi][0]; - src=_enc->state.ref_frame_data[OC_FRAME_IO]; - ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; - offset_y[0]=offset_y[1]=offset_y[2]=-ystride; - offset_y[3]=offset_y[5]=0; - offset_y[6]=offset_y[7]=offset_y[8]=ystride; - embs=_enc->mb_info; - for(bi=0;bi<4;bi++){ - ptrdiff_t frag_offs; - int vec[2]; - frag_offs=frag_buf_offs[fragis[bi]]; - vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]); - vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]); - embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec, - src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]); - embs[_mbi].ref_mv[bi][0]=(signed char)vec[0]; - embs[_mbi].ref_mv[bi][1]=(signed char)vec[1]; - } -} diff --git a/drivers/theora/modedec.h b/drivers/theora/modedec.h deleted file mode 100644 index ea12c64afd..0000000000 --- a/drivers/theora/modedec.h +++ /dev/null @@ -1,4027 +0,0 @@ -/*File generated by libtheora with OC_COLLECT_METRICS defined at compile time.*/ -#if !defined(_modedec_H) -# define _modedec_H (1) - - - -# if defined(OC_COLLECT_METRICS) -typedef struct oc_mode_metrics oc_mode_metrics; -# endif -typedef struct oc_mode_rd oc_mode_rd; - - - -/*The number of extra bits of precision at which to store rate metrics.*/ -# define OC_BIT_SCALE (6) -/*The number of extra bits of precision at which to store RMSE metrics. - This must be at least half OC_BIT_SCALE (rounded up).*/ -# define OC_RMSE_SCALE (5) -/*The number of bins to partition statistics into.*/ -# define OC_SAD_BINS (24) -/*The number of bits of precision to drop from SAD scores to assign them to a - bin.*/ -# define OC_SAD_SHIFT (9) - - - -# if defined(OC_COLLECT_METRICS) -struct oc_mode_metrics{ - double fragw; - double satd; - double rate; - double rmse; - double satd2; - double satdrate; - double rate2; - double satdrmse; - double rmse2; -}; - - -int oc_has_mode_metrics; -oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS]; -# endif - - - -struct oc_mode_rd{ - ogg_int16_t rate; - ogg_int16_t rmse; -}; - - -# if !defined(OC_COLLECT_METRICS) -static const -# endif -oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={ - { - { - /*Y' qi=0 INTRA*/ - { - { 87, -66},{ 132, 1611},{ 197, 3474},{ 285, 5130}, - { 376, 6419},{ 450, 7545},{ 521, 8587},{ 600, 9587}, - { 689,10498},{ 790,11348},{ 899,12158},{ 1030,12855}, - { 1166,13459},{ 1276,14052},{ 1353,14732},{ 1444,15425}, - { 1535,16101},{ 1609,16856},{ 1697,17532},{ 1823,17995}, - { 1962,18426},{ 2085,18919},{ 2201,19503},{ 2304,20307} - }, - /*Y' qi=0 INTER*/ - { - { 32, -105},{ 40, 1268},{ 54, 2919},{ 91, 4559}, - { 118, 6244},{ 132, 7932},{ 142, 9514},{ 149,10989}, - { 155,12375},{ 161,13679},{ 168,14958},{ 176,16215}, - { 187,17431},{ 196,18623},{ 207,19790},{ 218,20941}, - { 230,22083},{ 246,23213},{ 265,24333},{ 292,25439}, - { 328,26512},{ 372,27538},{ 427,28522},{ 494,29479} - } - }, - { - /*Cb qi=0 INTRA*/ - { - { 1, 6},{ 27, 368},{ 52, 738},{ 67, 1171}, - { 80, 1642},{ 99, 2134},{ 110, 2642},{ 112, 3144}, - { 126, 3578},{ 154, 3967},{ 167, 4387},{ 172, 4839}, - { 191, 5278},{ 208, 5666},{ 220, 6036},{ 223, 6398}, - { 227, 6814},{ 253, 7157},{ 284, 7403},{ 292, 7699}, - { 314, 7983},{ 339, 8203},{ 363, 8460},{ 399, 8919} - }, - /*Cb qi=0 INTER*/ - { - { 68, -55},{ 63, 275},{ 58, 602},{ 53, 936}, - { 50, 1290},{ 54, 1691},{ 58, 2116},{ 62, 2553}, - { 67, 2992},{ 72, 3422},{ 78, 3843},{ 84, 4253}, - { 89, 4658},{ 94, 5062},{ 98, 5455},{ 100, 5848}, - { 102, 6231},{ 104, 6604},{ 104, 6982},{ 105, 7359}, - { 105, 7733},{ 104, 8104},{ 105, 8465},{ 111, 8828} - } - }, - { - /*Cr qi=0 INTRA*/ - { - { 1, 8},{ 23, 375},{ 47, 759},{ 63, 1220}, - { 71, 1693},{ 82, 2171},{ 94, 2652},{ 109, 3103}, - { 125, 3567},{ 133, 3995},{ 151, 4375},{ 168, 4819}, - { 174, 5244},{ 190, 5635},{ 215, 6005},{ 242, 6347}, - { 257, 6758},{ 280, 7068},{ 311, 7336},{ 326, 7652}, - { 346, 7968},{ 372, 8213},{ 388, 8515},{ 408, 9060} - }, - /*Cr qi=0 INTER*/ - { - { 69, 0},{ 60, 314},{ 49, 624},{ 45, 943}, - { 45, 1285},{ 49, 1691},{ 55, 2130},{ 62, 2560}, - { 71, 2973},{ 79, 3385},{ 85, 3800},{ 89, 4207}, - { 92, 4620},{ 95, 5037},{ 96, 5436},{ 97, 5839}, - { 98, 6252},{ 99, 6653},{ 99, 7038},{ 103, 7426}, - { 107, 7810},{ 108, 8178},{ 107, 8539},{ 106, 8937} - } - } - }, - { - { - /*Y' qi=1 INTRA*/ - { - { 81, -71},{ 133, 1610},{ 203, 3460},{ 296, 5083}, - { 392, 6342},{ 467, 7454},{ 541, 8486},{ 625, 9466}, - { 716,10352},{ 823,11181},{ 940,11961},{ 1074,12643}, - { 1211,13233},{ 1324,13807},{ 1408,14489},{ 1504,15167}, - { 1598,15824},{ 1679,16544},{ 1788,17161},{ 1928,17579}, - { 2070,17991},{ 2202,18456},{ 2324,19021},{ 2425,19894} - }, - /*Y' qi=1 INTER*/ - { - { 34, 4},{ 40, 1307},{ 55, 2914},{ 93, 4555}, - { 120, 6243},{ 134, 7912},{ 144, 9468},{ 152,10918}, - { 158,12275},{ 164,13569},{ 171,14846},{ 180,16098}, - { 191,17310},{ 204,18484},{ 216,19636},{ 228,20779}, - { 242,21912},{ 261,23036},{ 286,24146},{ 320,25221}, - { 363,26265},{ 418,27261},{ 485,28203},{ 551,29148} - } - }, - { - /*Cb qi=1 INTRA*/ - { - { 1, 6},{ 28, 367},{ 52, 738},{ 68, 1172}, - { 86, 1644},{ 106, 2135},{ 115, 2642},{ 119, 3141}, - { 132, 3569},{ 157, 3951},{ 172, 4366},{ 177, 4819}, - { 194, 5258},{ 211, 5638},{ 224, 6006},{ 233, 6367}, - { 236, 6784},{ 258, 7121},{ 299, 7357},{ 319, 7637}, - { 337, 7921},{ 358, 8141},{ 381, 8367},{ 401, 8768} - }, - /*Cb qi=1 INTER*/ - { - { 95, -31},{ 81, 295},{ 67, 614},{ 53, 953}, - { 48, 1305},{ 51, 1700},{ 56, 2125},{ 61, 2563}, - { 67, 3008},{ 73, 3435},{ 79, 3844},{ 85, 4251}, - { 90, 4663},{ 95, 5073},{ 98, 5458},{ 100, 5844}, - { 101, 6231},{ 102, 6606},{ 102, 6980},{ 103, 7347}, - { 104, 7726},{ 105, 8096},{ 105, 8453},{ 105, 8789} - } - }, - { - /*Cr qi=1 INTRA*/ - { - { 1, 8},{ 25, 375},{ 50, 759},{ 65, 1221}, - { 74, 1695},{ 86, 2172},{ 101, 2651},{ 117, 3101}, - { 129, 3561},{ 135, 3985},{ 153, 4368},{ 171, 4807}, - { 182, 5223},{ 202, 5608},{ 225, 5964},{ 251, 6300}, - { 271, 6697},{ 295, 6978},{ 324, 7235},{ 348, 7558}, - { 367, 7877},{ 394, 8101},{ 413, 8386},{ 409, 8945} - }, - /*Cr qi=1 INTER*/ - { - { 66, 11},{ 59, 323},{ 51, 631},{ 44, 949}, - { 44, 1292},{ 49, 1703},{ 56, 2140},{ 62, 2566}, - { 69, 2991},{ 77, 3397},{ 84, 3799},{ 89, 4211}, - { 93, 4634},{ 94, 5049},{ 95, 5444},{ 96, 5854}, - { 94, 6260},{ 95, 6640},{ 96, 7032},{ 101, 7423}, - { 104, 7790},{ 105, 8158},{ 109, 8527},{ 108, 8872} - } - } - }, - { - { - /*Y' qi=2 INTRA*/ - { - { 87, -72},{ 139, 1607},{ 213, 3426},{ 315, 4992}, - { 416, 6217},{ 495, 7315},{ 574, 8317},{ 666, 9265}, - { 763,10124},{ 875,10906},{ 1001,11654},{ 1147,12305}, - { 1289,12865},{ 1407,13424},{ 1503,14076},{ 1610,14724}, - { 1720,15342},{ 1815,16020},{ 1937,16579},{ 2084,16981}, - { 2236,17371},{ 2385,17779},{ 2536,18250},{ 2689,18931} - }, - /*Y' qi=2 INTER*/ - { - { 30, -2},{ 40, 1308},{ 57, 2921},{ 96, 4567}, - { 122, 6260},{ 136, 7902},{ 148, 9418},{ 156,10826}, - { 162,12157},{ 169,13448},{ 177,14709},{ 188,15938}, - { 200,17133},{ 213,18295},{ 228,19433},{ 245,20564}, - { 264,21685},{ 289,22790},{ 323,23876},{ 368,24916}, - { 427,25906},{ 499,26837},{ 585,27700},{ 680,28514} - } - }, - { - /*Cb qi=2 INTRA*/ - { - { 1, 6},{ 30, 367},{ 58, 738},{ 77, 1172}, - { 93, 1645},{ 111, 2137},{ 123, 2642},{ 126, 3133}, - { 136, 3553},{ 162, 3934},{ 178, 4352},{ 183, 4803}, - { 199, 5231},{ 220, 5596},{ 235, 5957},{ 245, 6314}, - { 256, 6718},{ 286, 7048},{ 320, 7285},{ 336, 7568}, - { 366, 7829},{ 387, 8045},{ 405, 8261},{ 445, 8550} - }, - /*Cb qi=2 INTER*/ - { - { 115, -61},{ 93, 277},{ 71, 609},{ 54, 963}, - { 49, 1329},{ 53, 1715},{ 58, 2138},{ 63, 2583}, - { 69, 3017},{ 75, 3442},{ 81, 3857},{ 88, 4263}, - { 93, 4667},{ 96, 5065},{ 101, 5451},{ 101, 5832}, - { 102, 6213},{ 103, 6593},{ 103, 6968},{ 104, 7336}, - { 104, 7710},{ 105, 8076},{ 106, 8440},{ 106, 8822} - } - }, - { - /*Cr qi=2 INTRA*/ - { - { 1, 8},{ 27, 375},{ 54, 759},{ 70, 1222}, - { 79, 1696},{ 89, 2173},{ 106, 2652},{ 123, 3098}, - { 135, 3553},{ 143, 3972},{ 161, 4348},{ 181, 4782}, - { 194, 5189},{ 213, 5565},{ 235, 5907},{ 266, 6229}, - { 286, 6618},{ 311, 6897},{ 339, 7152},{ 362, 7454}, - { 392, 7721},{ 416, 7946},{ 429, 8227},{ 458, 8540} - }, - /*Cr qi=2 INTER*/ - { - { 74, 20},{ 63, 330},{ 51, 635},{ 44, 942}, - { 47, 1287},{ 54, 1710},{ 59, 2147},{ 65, 2571}, - { 72, 2996},{ 79, 3413},{ 86, 3820},{ 91, 4230}, - { 93, 4642},{ 95, 5046},{ 95, 5442},{ 95, 5839}, - { 96, 6243},{ 97, 6641},{ 99, 7021},{ 101, 7396}, - { 103, 7764},{ 106, 8138},{ 109, 8507},{ 114, 8851} - } - } - }, - { - { - /*Y' qi=3 INTRA*/ - { - { 91, -67},{ 141, 1606},{ 219, 3405},{ 328, 4929}, - { 433, 6122},{ 515, 7209},{ 598, 8204},{ 693, 9145}, - { 796, 9986},{ 912,10756},{ 1045,11471},{ 1200,12079}, - { 1345,12640},{ 1471,13179},{ 1571,13809},{ 1678,14450}, - { 1798,15047},{ 1905,15701},{ 2043,16205},{ 2202,16569}, - { 2351,16971},{ 2501,17393},{ 2660,17851},{ 2825,18455} - }, - /*Y' qi=3 INTER*/ - { - { 53, -164},{ 38, 1314},{ 59, 2917},{ 99, 4563}, - { 124, 6253},{ 139, 7882},{ 150, 9375},{ 159,10749}, - { 166,12059},{ 173,13349},{ 183,14608},{ 194,15826}, - { 208,17003},{ 223,18150},{ 240,19287},{ 259,20411}, - { 284,21508},{ 317,22593},{ 359,23656},{ 414,24671}, - { 483,25634},{ 569,26519},{ 670,27332},{ 786,28072} - } - }, - { - /*Cb qi=3 INTRA*/ - { - { 1, 5},{ 31, 367},{ 58, 739},{ 78, 1173}, - { 96, 1645},{ 113, 2134},{ 125, 2638},{ 133, 3127}, - { 148, 3542},{ 171, 3915},{ 184, 4328},{ 192, 4776}, - { 209, 5197},{ 230, 5556},{ 245, 5909},{ 252, 6261}, - { 272, 6641},{ 304, 6942},{ 330, 7184},{ 342, 7477}, - { 380, 7736},{ 404, 7962},{ 428, 8151},{ 469, 8430} - }, - /*Cb qi=3 INTER*/ - { - { 86, -29},{ 72, 296},{ 58, 618},{ 46, 964}, - { 47, 1338},{ 51, 1743},{ 56, 2158},{ 63, 2594}, - { 69, 3035},{ 77, 3455},{ 84, 3859},{ 89, 4266}, - { 94, 4673},{ 98, 5074},{ 101, 5460},{ 101, 5842}, - { 101, 6217},{ 101, 6593},{ 102, 6964},{ 104, 7325}, - { 103, 7696},{ 103, 8056},{ 104, 8430},{ 103, 8792} - } - }, - { - /*Cr qi=3 INTRA*/ - { - { 1, 8},{ 27, 374},{ 56, 759},{ 74, 1221}, - { 83, 1696},{ 96, 2173},{ 113, 2650},{ 127, 3091}, - { 140, 3542},{ 151, 3960},{ 164, 4334},{ 188, 4764}, - { 208, 5144},{ 224, 5493},{ 250, 5841},{ 278, 6162}, - { 298, 6548},{ 334, 6816},{ 365, 7045},{ 388, 7343}, - { 419, 7613},{ 443, 7836},{ 455, 8105},{ 484, 8445} - }, - /*Cr qi=3 INTER*/ - { - { 76, 26},{ 65, 332},{ 53, 638},{ 45, 945}, - { 45, 1304},{ 53, 1725},{ 60, 2153},{ 68, 2584}, - { 74, 3007},{ 81, 3425},{ 87, 3844},{ 91, 4253}, - { 94, 4657},{ 95, 5061},{ 94, 5462},{ 94, 5856}, - { 95, 6250},{ 96, 6635},{ 97, 7014},{ 101, 7393}, - { 104, 7761},{ 106, 8137},{ 109, 8506},{ 111, 8823} - } - } - }, - { - { - /*Y' qi=4 INTRA*/ - { - { 80, -67},{ 143, 1603},{ 227, 3378},{ 344, 4861}, - { 454, 6026},{ 537, 7104},{ 626, 8089},{ 725, 9006}, - { 830, 9827},{ 950,10581},{ 1089,11270},{ 1257,11826}, - { 1409,12366},{ 1535,12912},{ 1640,13528},{ 1753,14173}, - { 1884,14756},{ 2007,15368},{ 2148,15852},{ 2307,16212}, - { 2464,16591},{ 2614,17019},{ 2785,17455},{ 2970,17963} - }, - /*Y' qi=4 INTER*/ - { - { 50, -145},{ 38, 1324},{ 61, 2921},{ 102, 4566}, - { 127, 6248},{ 142, 7845},{ 154, 9300},{ 163,10656}, - { 169,11965},{ 177,13246},{ 188,14495},{ 202,15702}, - { 218,16864},{ 236,18003},{ 256,19124},{ 278,20233}, - { 307,21330},{ 347,22398},{ 398,23437},{ 463,24429}, - { 546,25343},{ 649,26170},{ 767,26935},{ 888,27674} - } - }, - { - /*Cb qi=4 INTRA*/ - { - { 1, 5},{ 33, 367},{ 61, 739},{ 80, 1173}, - { 98, 1646},{ 114, 2136},{ 126, 2639},{ 137, 3124}, - { 152, 3535},{ 176, 3903},{ 194, 4307},{ 206, 4753}, - { 222, 5165},{ 242, 5508},{ 260, 5857},{ 272, 6205}, - { 294, 6559},{ 332, 6848},{ 356, 7104},{ 364, 7389}, - { 396, 7637},{ 415, 7878},{ 446, 8064},{ 506, 8294} - }, - /*Cb qi=4 INTER*/ - { - { 86, -15},{ 73, 308},{ 60, 627},{ 46, 967}, - { 47, 1343},{ 51, 1754},{ 56, 2183},{ 63, 2615}, - { 70, 3044},{ 79, 3459},{ 85, 3866},{ 90, 4276}, - { 94, 4686},{ 97, 5088},{ 100, 5467},{ 102, 5837}, - { 102, 6205},{ 101, 6569},{ 103, 6939},{ 104, 7317}, - { 105, 7690},{ 107, 8043},{ 107, 8394},{ 111, 8736} - } - }, - { - /*Cr qi=4 INTRA*/ - { - { 1, 7},{ 28, 375},{ 57, 759},{ 79, 1221}, - { 92, 1697},{ 105, 2174},{ 122, 2648},{ 135, 3085}, - { 146, 3530},{ 157, 3947},{ 171, 4316},{ 195, 4737}, - { 218, 5117},{ 239, 5445},{ 268, 5767},{ 295, 6074}, - { 315, 6460},{ 355, 6735},{ 392, 6933},{ 418, 7218}, - { 448, 7495},{ 471, 7688},{ 481, 7954},{ 504, 8313} - }, - /*Cr qi=4 INTER*/ - { - { 68, 28},{ 57, 334},{ 47, 639},{ 43, 953}, - { 48, 1314},{ 54, 1736},{ 59, 2169},{ 69, 2592}, - { 78, 3017},{ 84, 3434},{ 88, 3850},{ 92, 4260}, - { 95, 4663},{ 96, 5068},{ 95, 5455},{ 95, 5839}, - { 96, 6243},{ 97, 6626},{ 98, 7006},{ 101, 7390}, - { 104, 7755},{ 108, 8115},{ 111, 8471},{ 110, 8825} - } - } - }, - { - { - /*Y' qi=5 INTRA*/ - { - { 84, -69},{ 147, 1599},{ 237, 3350},{ 360, 4796}, - { 475, 5934},{ 562, 6992},{ 657, 7953},{ 765, 8837}, - { 874, 9641},{ 998,10384},{ 1146,11047},{ 1322,11572}, - { 1484,12076},{ 1617,12609},{ 1731,13203},{ 1856,13806}, - { 1995,14367},{ 2132,14936},{ 2289,15386},{ 2460,15721}, - { 2635,16066},{ 2802,16442},{ 2980,16805},{ 3177,17272} - }, - /*Y' qi=5 INTER*/ - { - { 38, -86},{ 37, 1349},{ 64, 2920},{ 105, 4563}, - { 129, 6236},{ 145, 7809},{ 158, 9236},{ 167,10572}, - { 174,11871},{ 182,13141},{ 195,14368},{ 212,15558}, - { 230,16706},{ 250,17828},{ 274,18944},{ 303,20041}, - { 342,21116},{ 394,22152},{ 460,23144},{ 543,24073}, - { 648,24919},{ 773,25673},{ 922,26323},{ 1084,26924} - } - }, - { - /*Cb qi=5 INTRA*/ - { - { 1, 5},{ 34, 367},{ 63, 739},{ 82, 1174}, - { 102, 1647},{ 119, 2137},{ 134, 2639},{ 145, 3121}, - { 161, 3529},{ 189, 3891},{ 207, 4290},{ 216, 4721}, - { 232, 5113},{ 258, 5455},{ 277, 5798},{ 294, 6124}, - { 322, 6427},{ 352, 6697},{ 370, 6982},{ 384, 7283}, - { 423, 7529},{ 448, 7766},{ 478, 7943},{ 527, 8151} - }, - /*Cb qi=5 INTER*/ - { - { 83, -49},{ 69, 284},{ 55, 611},{ 48, 961}, - { 49, 1355},{ 52, 1769},{ 58, 2191},{ 65, 2616}, - { 73, 3041},{ 80, 3460},{ 87, 3868},{ 92, 4276}, - { 95, 4682},{ 98, 5077},{ 100, 5459},{ 102, 5827}, - { 102, 6200},{ 102, 6568},{ 103, 6930},{ 103, 7303}, - { 104, 7672},{ 106, 8032},{ 106, 8391},{ 106, 8727} - } - }, - { - /*Cr qi=5 INTRA*/ - { - { 1, 8},{ 28, 375},{ 57, 760},{ 81, 1222}, - { 99, 1696},{ 111, 2175},{ 125, 2648},{ 140, 3079}, - { 152, 3520},{ 162, 3927},{ 179, 4294},{ 203, 4714}, - { 225, 5080},{ 254, 5389},{ 286, 5703},{ 318, 5997}, - { 342, 6364},{ 380, 6640},{ 416, 6837},{ 445, 7103}, - { 473, 7370},{ 497, 7562},{ 514, 7811},{ 549, 8148} - }, - /*Cr qi=5 INTER*/ - { - { 60, 6},{ 54, 323},{ 46, 638},{ 43, 958}, - { 45, 1329},{ 54, 1749},{ 61, 2175},{ 70, 2600}, - { 79, 3021},{ 85, 3437},{ 89, 3847},{ 93, 4254}, - { 95, 4660},{ 96, 5065},{ 95, 5456},{ 95, 5849}, - { 96, 6243},{ 96, 6621},{ 97, 6996},{ 101, 7366}, - { 104, 7722},{ 107, 8088},{ 111, 8448},{ 119, 8816} - } - } - }, - { - { - /*Y' qi=6 INTRA*/ - { - { 88, -69},{ 151, 1593},{ 251, 3294},{ 387, 4681}, - { 507, 5790},{ 601, 6837},{ 702, 7787},{ 813, 8648}, - { 927, 9427},{ 1059,10152},{ 1213,10787},{ 1399,11284}, - { 1568,11781},{ 1705,12312},{ 1823,12890},{ 1957,13482}, - { 2106,14036},{ 2249,14600},{ 2411,15042},{ 2588,15359}, - { 2772,15699},{ 2947,16062},{ 3127,16429},{ 3320,16849} - }, - /*Y' qi=6 INTER*/ - { - { 44, -80},{ 36, 1346},{ 69, 2919},{ 111, 4563}, - { 136, 6216},{ 154, 7746},{ 168, 9139},{ 178,10461}, - { 185,11747},{ 195,13007},{ 211,14229},{ 230,15408}, - { 250,16547},{ 274,17663},{ 302,18769},{ 339,19851}, - { 386,20907},{ 446,21933},{ 527,22884},{ 631,23746}, - { 760,24512},{ 914,25178},{ 1087,25758},{ 1278,26262} - } - }, - { - /*Cb qi=6 INTRA*/ - { - { 1, 4},{ 36, 367},{ 66, 739},{ 84, 1174}, - { 105, 1648},{ 126, 2139},{ 140, 2639},{ 149, 3116}, - { 164, 3523},{ 194, 3880},{ 217, 4271},{ 226, 4694}, - { 243, 5077},{ 270, 5407},{ 291, 5742},{ 310, 6061}, - { 340, 6340},{ 373, 6609},{ 394, 6890},{ 409, 7189}, - { 444, 7434},{ 469, 7652},{ 499, 7853},{ 559, 8135} - }, - /*Cb qi=6 INTER*/ - { - { 68, -46},{ 60, 291},{ 50, 623},{ 49, 971}, - { 50, 1357},{ 55, 1781},{ 61, 2211},{ 69, 2634}, - { 78, 3052},{ 86, 3466},{ 91, 3882},{ 95, 4292}, - { 98, 4691},{ 101, 5080},{ 102, 5458},{ 103, 5830}, - { 103, 6192},{ 104, 6554},{ 104, 6916},{ 106, 7278}, - { 108, 7641},{ 110, 8004},{ 112, 8371},{ 112, 8758} - } - }, - { - /*Cr qi=6 INTRA*/ - { - { 1, 8},{ 29, 375},{ 59, 760},{ 84, 1223}, - { 99, 1698},{ 112, 2176},{ 129, 2647},{ 143, 3076}, - { 156, 3510},{ 168, 3906},{ 189, 4269},{ 220, 4682}, - { 241, 5047},{ 266, 5342},{ 299, 5649},{ 331, 5954}, - { 357, 6309},{ 393, 6579},{ 431, 6765},{ 467, 6997}, - { 501, 7276},{ 520, 7488},{ 525, 7749},{ 548, 8146} - }, - /*Cr qi=6 INTER*/ - { - { 94, 31},{ 69, 335},{ 47, 641},{ 43, 967}, - { 50, 1350},{ 57, 1772},{ 65, 2197},{ 74, 2625}, - { 83, 3043},{ 90, 3454},{ 94, 3867},{ 97, 4273}, - { 98, 4671},{ 99, 5068},{ 99, 5461},{ 98, 5857}, - { 98, 6245},{ 99, 6610},{ 103, 6975},{ 105, 7345}, - { 108, 7712},{ 111, 8073},{ 113, 8415},{ 119, 8768} - } - } - }, - { - { - /*Y' qi=7 INTRA*/ - { - { 92, -70},{ 156, 1590},{ 261, 3267},{ 403, 4618}, - { 529, 5704},{ 628, 6730},{ 736, 7657},{ 856, 8491}, - { 978, 9246},{ 1118, 9943},{ 1281,10550},{ 1472,11028}, - { 1645,11507},{ 1793,12008},{ 1924,12565},{ 2067,13130}, - { 2229,13638},{ 2388,14160},{ 2558,14584},{ 2744,14886}, - { 2932,15194},{ 3116,15531},{ 3311,15858},{ 3538,16197} - }, - /*Y' qi=7 INTER*/ - { - { 43, -8},{ 36, 1351},{ 71, 2923},{ 112, 4568}, - { 138, 6201},{ 157, 7705},{ 171, 9083},{ 181,10390}, - { 189,11664},{ 202,12910},{ 220,14121},{ 241,15281}, - { 266,16401},{ 295,17507},{ 328,18608},{ 371,19677}, - { 430,20701},{ 508,21676},{ 604,22588},{ 727,23397}, - { 878,24093},{ 1055,24690},{ 1263,25151},{ 1496,25504} - } - }, - { - /*Cb qi=7 INTRA*/ - { - { 1, 5},{ 40, 367},{ 72, 740},{ 89, 1175}, - { 108, 1649},{ 129, 2140},{ 143, 2637},{ 154, 3110}, - { 169, 3507},{ 198, 3860},{ 224, 4237},{ 235, 4652}, - { 253, 5037},{ 282, 5358},{ 307, 5674},{ 329, 5986}, - { 361, 6273},{ 393, 6527},{ 419, 6777},{ 435, 7078}, - { 467, 7342},{ 495, 7554},{ 529, 7757},{ 591, 8053} - }, - /*Cb qi=7 INTER*/ - { - { 79, -33},{ 68, 299},{ 56, 627},{ 50, 978}, - { 51, 1366},{ 55, 1786},{ 61, 2213},{ 70, 2642}, - { 80, 3062},{ 87, 3474},{ 92, 3886},{ 96, 4292}, - { 99, 4684},{ 102, 5072},{ 103, 5450},{ 104, 5814}, - { 104, 6176},{ 104, 6538},{ 107, 6905},{ 110, 7270}, - { 110, 7625},{ 110, 7978},{ 111, 8340},{ 117, 8674} - } - }, - { - /*Cr qi=7 INTRA*/ - { - { 2, 7},{ 31, 375},{ 62, 760},{ 87, 1223}, - { 103, 1698},{ 115, 2175},{ 131, 2644},{ 147, 3066}, - { 161, 3494},{ 175, 3889},{ 199, 4250},{ 229, 4653}, - { 250, 5001},{ 279, 5275},{ 311, 5577},{ 343, 5889}, - { 376, 6227},{ 417, 6486},{ 457, 6689},{ 484, 6925}, - { 518, 7174},{ 544, 7393},{ 549, 7662},{ 577, 8050} - }, - /*Cr qi=7 INTER*/ - { - { 89, 22},{ 62, 332},{ 45, 641},{ 47, 976}, - { 52, 1363},{ 59, 1779},{ 67, 2203},{ 76, 2628}, - { 84, 3046},{ 90, 3460},{ 94, 3875},{ 98, 4272}, - { 99, 4666},{ 98, 5063},{ 98, 5459},{ 98, 5849}, - { 99, 6226},{ 101, 6594},{ 104, 6957},{ 109, 7324}, - { 109, 7686},{ 111, 8042},{ 115, 8379},{ 119, 8699} - } - } - }, - { - { - /*Y' qi=8 INTRA*/ - { - { 91, -69},{ 160, 1585},{ 274, 3226},{ 423, 4538}, - { 557, 5596},{ 664, 6595},{ 778, 7506},{ 905, 8319}, - { 1038, 9035},{ 1186, 9701},{ 1355,10292},{ 1554,10754}, - { 1739,11196},{ 1904,11639},{ 2047,12184},{ 2194,12763}, - { 2361,13256},{ 2529,13753},{ 2709,14155},{ 2902,14433}, - { 3100,14723},{ 3292,15026},{ 3489,15327},{ 3714,15705} - }, - /*Y' qi=8 INTER*/ - { - { 32, -157},{ 33, 1346},{ 74, 2914},{ 116, 4554}, - { 142, 6172},{ 162, 7648},{ 177, 9004},{ 186,10300}, - { 196,11570},{ 210,12808},{ 231,14001},{ 256,15150}, - { 285,16259},{ 319,17352},{ 359,18435},{ 415,19475}, - { 489,20470},{ 584,21400},{ 703,22246},{ 852,22968}, - { 1038,23556},{ 1253,24032},{ 1503,24367},{ 1778,24628} - } - }, - { - /*Cb qi=8 INTRA*/ - { - { 1, 4},{ 42, 367},{ 75, 740},{ 93, 1176}, - { 111, 1649},{ 128, 2139},{ 144, 2635},{ 157, 3103}, - { 174, 3494},{ 206, 3844},{ 233, 4207},{ 251, 4605}, - { 277, 4980},{ 304, 5284},{ 335, 5584},{ 359, 5888}, - { 393, 6152},{ 432, 6398},{ 455, 6656},{ 471, 6956}, - { 502, 7193},{ 528, 7405},{ 562, 7630},{ 603, 7922} - }, - /*Cb qi=8 INTER*/ - { - { 77, -37},{ 68, 299},{ 58, 632},{ 50, 991}, - { 50, 1382},{ 55, 1799},{ 62, 2226},{ 73, 2647}, - { 82, 3066},{ 90, 3480},{ 94, 3891},{ 96, 4296}, - { 98, 4687},{ 101, 5073},{ 103, 5456},{ 104, 5817}, - { 105, 6170},{ 106, 6523},{ 107, 6886},{ 108, 7250}, - { 109, 7600},{ 110, 7955},{ 111, 8305},{ 112, 8641} - } - }, - { - /*Cr qi=8 INTRA*/ - { - { 2, 7},{ 33, 375},{ 64, 760},{ 92, 1224}, - { 111, 1700},{ 122, 2173},{ 137, 2637},{ 156, 3055}, - { 172, 3476},{ 186, 3856},{ 211, 4211},{ 242, 4597}, - { 263, 4939},{ 292, 5214},{ 335, 5489},{ 376, 5772}, - { 406, 6099},{ 440, 6378},{ 483, 6578},{ 517, 6797}, - { 550, 7049},{ 571, 7283},{ 583, 7560},{ 618, 7967} - }, - /*Cr qi=8 INTER*/ - { - { 74, 25},{ 58, 328},{ 43, 637},{ 45, 980}, - { 51, 1371},{ 59, 1788},{ 69, 2207},{ 79, 2630}, - { 86, 3051},{ 91, 3470},{ 95, 3880},{ 97, 4280}, - { 98, 4680},{ 97, 5074},{ 96, 5456},{ 97, 5839}, - { 99, 6219},{ 101, 6583},{ 103, 6945},{ 106, 7312}, - { 110, 7671},{ 114, 8009},{ 115, 8345},{ 117, 8686} - } - } - }, - { - { - /*Y' qi=9 INTRA*/ - { - { 104, -68},{ 164, 1580},{ 288, 3173},{ 448, 4439}, - { 587, 5485},{ 702, 6465},{ 824, 7351},{ 958, 8148}, - { 1096, 8845},{ 1253, 9480},{ 1432,10047},{ 1640,10494}, - { 1835,10926},{ 2015,11350},{ 2166,11871},{ 2321,12428}, - { 2508,12876},{ 2684,13345},{ 2866,13741},{ 3069,13991}, - { 3281,14243},{ 3487,14518},{ 3689,14813},{ 3911,15175} - }, - /*Y' qi=9 INTER*/ - { - { 47, -140},{ 34, 1348},{ 77, 2915},{ 119, 4552}, - { 145, 6150},{ 166, 7600},{ 182, 8936},{ 192,10221}, - { 203,11482},{ 220,12711},{ 244,13886},{ 274,15012}, - { 308,16111},{ 349,17190},{ 401,18244},{ 470,19257}, - { 561,20209},{ 680,21069},{ 830,21822},{ 1010,22463}, - { 1227,22971},{ 1482,23328},{ 1769,23544},{ 2077,23655} - } - }, - { - /*Cb qi=9 INTRA*/ - { - { 1, 5},{ 43, 367},{ 76, 740},{ 95, 1176}, - { 114, 1649},{ 135, 2138},{ 153, 2629},{ 165, 3091}, - { 184, 3481},{ 217, 3831},{ 244, 4187},{ 260, 4572}, - { 290, 4930},{ 320, 5231},{ 351, 5521},{ 379, 5812}, - { 414, 6055},{ 452, 6307},{ 483, 6564},{ 502, 6848}, - { 525, 7115},{ 554, 7321},{ 589, 7533},{ 626, 7833} - }, - /*Cb qi=9 INTER*/ - { - { 101, -43},{ 81, 298},{ 62, 637},{ 49, 989}, - { 51, 1381},{ 56, 1806},{ 65, 2231},{ 74, 2653}, - { 84, 3071},{ 91, 3482},{ 95, 3892},{ 97, 4293}, - { 99, 4684},{ 101, 5066},{ 103, 5437},{ 103, 5793}, - { 103, 6148},{ 104, 6511},{ 105, 6867},{ 107, 7221}, - { 110, 7572},{ 111, 7926},{ 112, 8283},{ 116, 8625} - } - }, - { - /*Cr qi=9 INTRA*/ - { - { 2, 7},{ 35, 375},{ 66, 761},{ 93, 1224}, - { 112, 1700},{ 126, 2173},{ 144, 2633},{ 165, 3047}, - { 183, 3458},{ 199, 3835},{ 224, 4191},{ 257, 4558}, - { 283, 4887},{ 309, 5176},{ 351, 5446},{ 397, 5713}, - { 433, 6017},{ 469, 6283},{ 508, 6480},{ 546, 6687}, - { 579, 6945},{ 600, 7182},{ 610, 7434},{ 623, 7793} - }, - /*Cr qi=9 INTER*/ - { - { 77, 15},{ 57, 330},{ 45, 640},{ 48, 980}, - { 54, 1380},{ 61, 1802},{ 70, 2220},{ 80, 2639}, - { 87, 3057},{ 92, 3474},{ 94, 3882},{ 98, 4282}, - { 98, 4675},{ 97, 5062},{ 97, 5450},{ 98, 5829}, - { 100, 6197},{ 101, 6561},{ 104, 6927},{ 107, 7289}, - { 113, 7638},{ 117, 7978},{ 119, 8311},{ 117, 8629} - } - } - }, - { - { - /*Y' qi=10 INTRA*/ - { - { 101, -69},{ 168, 1574},{ 299, 3143},{ 465, 4386}, - { 610, 5410},{ 736, 6353},{ 866, 7207},{ 1006, 7982}, - { 1153, 8655},{ 1319, 9261},{ 1504, 9812},{ 1719,10248}, - { 1928,10653},{ 2116,11056},{ 2282,11550},{ 2458,12070}, - { 2654,12492},{ 2846,12923},{ 3043,13291},{ 3249,13537}, - { 3466,13764},{ 3682,13999},{ 3896,14268},{ 4145,14548} - }, - /*Y' qi=10 INTER*/ - { - { 48, -94},{ 34, 1355},{ 81, 2920},{ 124, 4545}, - { 151, 6113},{ 174, 7532},{ 190, 8850},{ 201,10125}, - { 214,11379},{ 235,12591},{ 264,13745},{ 299,14859}, - { 338,15948},{ 388,17008},{ 456,18029},{ 546,18988}, - { 661,19877},{ 808,20666},{ 993,21321},{ 1218,21835}, - { 1481,22203},{ 1783,22420},{ 2117,22504},{ 2469,22481} - } - }, - { - /*Cb qi=10 INTRA*/ - { - { 2, 4},{ 44, 367},{ 79, 740},{ 99, 1178}, - { 117, 1652},{ 137, 2141},{ 156, 2630},{ 170, 3089}, - { 192, 3474},{ 227, 3813},{ 259, 4157},{ 282, 4526}, - { 310, 4860},{ 342, 5140},{ 377, 5425},{ 400, 5714}, - { 436, 5952},{ 475, 6194},{ 496, 6468},{ 522, 6748}, - { 559, 6996},{ 587, 7216},{ 617, 7433},{ 673, 7678} - }, - /*Cb qi=10 INTER*/ - { - { 87, -37},{ 72, 301},{ 58, 636},{ 49, 995}, - { 51, 1394},{ 57, 1819},{ 66, 2241},{ 78, 2660}, - { 87, 3074},{ 93, 3482},{ 97, 3891},{ 99, 4294}, - { 101, 4678},{ 103, 5050},{ 105, 5414},{ 106, 5773}, - { 107, 6134},{ 108, 6485},{ 110, 6832},{ 113, 7187}, - { 113, 7547},{ 114, 7887},{ 117, 8230},{ 112, 8590} - } - }, - { - /*Cr qi=10 INTRA*/ - { - { 2, 7},{ 38, 375},{ 69, 761},{ 96, 1224}, - { 116, 1701},{ 131, 2175},{ 148, 2634},{ 168, 3041}, - { 190, 3439},{ 211, 3802},{ 238, 4151},{ 271, 4506}, - { 297, 4824},{ 331, 5103},{ 373, 5360},{ 415, 5632}, - { 459, 5928},{ 500, 6176},{ 535, 6386},{ 573, 6586}, - { 608, 6834},{ 629, 7079},{ 642, 7337},{ 686, 7680} - }, - /*Cr qi=10 INTER*/ - { - { 81, 34},{ 63, 333},{ 50, 633},{ 48, 987}, - { 53, 1397},{ 61, 1820},{ 71, 2237},{ 83, 2651}, - { 91, 3065},{ 95, 3479},{ 98, 3882},{ 100, 4279}, - { 101, 4673},{ 101, 5054},{ 100, 5429},{ 101, 5801}, - { 102, 6173},{ 104, 6541},{ 108, 6904},{ 110, 7264}, - { 114, 7609},{ 119, 7945},{ 123, 8275},{ 128, 8615} - } - } - }, - { - { - /*Y' qi=11 INTRA*/ - { - { 110, -66},{ 176, 1564},{ 316, 3087},{ 492, 4296}, - { 645, 5299},{ 781, 6217},{ 924, 7039},{ 1075, 7776}, - { 1232, 8421},{ 1410, 9005},{ 1607, 9532},{ 1834, 9929}, - { 2053,10300},{ 2249,10697},{ 2427,11184},{ 2619,11682}, - { 2826,12083},{ 3019,12508},{ 3225,12869},{ 3452,13064}, - { 3670,13280},{ 3890,13519},{ 4123,13750},{ 4367,14059} - }, - /*Y' qi=11 INTER*/ - { - { 72, -115},{ 32, 1354},{ 83, 2911},{ 126, 4534}, - { 154, 6080},{ 178, 7475},{ 194, 8779},{ 205,10047}, - { 222,11290},{ 246,12488},{ 281,13621},{ 322,14714}, - { 372,15786},{ 436,16821},{ 519,17813},{ 628,18728}, - { 770,19549},{ 950,20254},{ 1175,20800},{ 1443,21197}, - { 1752,21446},{ 2095,21555},{ 2457,21553},{ 2808,21544} - } - }, - { - /*Cb qi=11 INTRA*/ - { - { 2, 4},{ 45, 367},{ 81, 740},{ 101, 1177}, - { 121, 1650},{ 142, 2136},{ 159, 2621},{ 174, 3075}, - { 199, 3451},{ 234, 3778},{ 265, 4117},{ 297, 4473}, - { 333, 4789},{ 367, 5054},{ 402, 5319},{ 427, 5613}, - { 462, 5871},{ 503, 6107},{ 532, 6336},{ 560, 6584}, - { 601, 6842},{ 631, 7092},{ 662, 7292},{ 721, 7497} - }, - /*Cb qi=11 INTER*/ - { - { 117, -24},{ 93, 308},{ 69, 638},{ 52, 993}, - { 52, 1395},{ 58, 1822},{ 68, 2246},{ 80, 2665}, - { 89, 3082},{ 94, 3492},{ 96, 3900},{ 98, 4299}, - { 101, 4679},{ 103, 5047},{ 104, 5405},{ 106, 5763}, - { 106, 6120},{ 107, 6474},{ 109, 6823},{ 112, 7163}, - { 115, 7516},{ 117, 7868},{ 118, 8213},{ 119, 8561} - } - }, - { - /*Cr qi=11 INTRA*/ - { - { 2, 7},{ 40, 375},{ 75, 761},{ 100, 1224}, - { 119, 1700},{ 137, 2169},{ 154, 2622},{ 178, 3025}, - { 198, 3416},{ 220, 3770},{ 255, 4114},{ 294, 4459}, - { 323, 4756},{ 359, 5028},{ 399, 5292},{ 438, 5556}, - { 483, 5827},{ 518, 6073},{ 551, 6298},{ 598, 6501}, - { 634, 6754},{ 652, 6997},{ 670, 7211},{ 689, 7560} - }, - /*Cr qi=11 INTER*/ - { - { 75, 30},{ 61, 334},{ 51, 639},{ 49, 995}, - { 53, 1403},{ 62, 1821},{ 73, 2237},{ 84, 2654}, - { 91, 3070},{ 95, 3485},{ 96, 3890},{ 98, 4287}, - { 98, 4672},{ 99, 5050},{ 99, 5427},{ 100, 5798}, - { 103, 6169},{ 105, 6528},{ 107, 6881},{ 113, 7233}, - { 118, 7580},{ 121, 7916},{ 125, 8240},{ 130, 8551} - } - } - }, - { - { - /*Y' qi=12 INTRA*/ - { - { 104, -69},{ 182, 1557},{ 335, 3040},{ 521, 4205}, - { 684, 5178},{ 831, 6068},{ 986, 6854},{ 1151, 7559}, - { 1323, 8169},{ 1523, 8704},{ 1736, 9192},{ 1978, 9558}, - { 2213, 9908},{ 2421,10298},{ 2613,10757},{ 2822,11208}, - { 3042,11585},{ 3250,11991},{ 3474,12308},{ 3710,12480}, - { 3939,12687},{ 4174,12902},{ 4416,13102},{ 4672,13369} - }, - /*Y' qi=12 INTER*/ - { - { 52, -91},{ 34, 1355},{ 86, 2911},{ 129, 4518}, - { 159, 6037},{ 184, 7405},{ 200, 8694},{ 213, 9955}, - { 232,11185},{ 263,12360},{ 304,13479},{ 354,14555}, - { 415,15601},{ 495,16608},{ 601,17549},{ 738,18400}, - { 915,19136},{ 1139,19724},{ 1414,20150},{ 1731,20412}, - { 2090,20520},{ 2473,20509},{ 2851,20442},{ 3227,20328} - } - }, - { - /*Cb qi=12 INTRA*/ - { - { 1, 4},{ 46, 367},{ 85, 740},{ 109, 1178}, - { 126, 1650},{ 145, 2134},{ 165, 2617},{ 182, 3061}, - { 209, 3428},{ 245, 3749},{ 281, 4077},{ 316, 4417}, - { 354, 4718},{ 392, 4970},{ 430, 5217},{ 456, 5501}, - { 490, 5771},{ 534, 5996},{ 571, 6207},{ 600, 6458}, - { 644, 6697},{ 675, 6942},{ 707, 7151},{ 766, 7342} - }, - /*Cb qi=12 INTER*/ - { - { 84, -24},{ 73, 311},{ 60, 644},{ 52, 998}, - { 53, 1398},{ 60, 1825},{ 71, 2249},{ 83, 2665}, - { 90, 3081},{ 94, 3490},{ 97, 3893},{ 99, 4286}, - { 102, 4663},{ 104, 5032},{ 105, 5393},{ 106, 5751}, - { 107, 6102},{ 108, 6445},{ 111, 6788},{ 113, 7136}, - { 114, 7483},{ 117, 7828},{ 121, 8163},{ 122, 8496} - } - }, - { - /*Cr qi=12 INTRA*/ - { - { 3, 7},{ 41, 375},{ 78, 761},{ 106, 1225}, - { 124, 1700},{ 140, 2167},{ 163, 2616},{ 188, 3010}, - { 213, 3385},{ 240, 3718},{ 271, 4062},{ 309, 4406}, - { 345, 4691},{ 387, 4956},{ 430, 5212},{ 469, 5467}, - { 513, 5729},{ 554, 5970},{ 587, 6176},{ 633, 6395}, - { 673, 6659},{ 692, 6868},{ 712, 7061},{ 758, 7259} - }, - /*Cr qi=12 INTER*/ - { - { 73, 31},{ 59, 335},{ 48, 638},{ 50, 998}, - { 56, 1410},{ 65, 1827},{ 75, 2240},{ 85, 2657}, - { 92, 3073},{ 95, 3485},{ 97, 3888},{ 99, 4279}, - { 98, 4663},{ 99, 5042},{ 101, 5412},{ 102, 5779}, - { 105, 6142},{ 107, 6498},{ 108, 6848},{ 113, 7198}, - { 118, 7540},{ 121, 7867},{ 127, 8188},{ 132, 8508} - } - } - }, - { - { - /*Y' qi=13 INTRA*/ - { - { 109, -68},{ 187, 1551},{ 347, 3010},{ 541, 4153}, - { 709, 5107},{ 864, 5975},{ 1026, 6745},{ 1194, 7433}, - { 1375, 8021},{ 1581, 8550},{ 1803, 9026},{ 2054, 9371}, - { 2301, 9713},{ 2522,10082},{ 2728,10515},{ 2949,10956}, - { 3184,11297},{ 3408,11653},{ 3643,11946},{ 3886,12100}, - { 4124,12277},{ 4377,12459},{ 4632,12635},{ 4898,12861} - }, - /*Y' qi=13 INTER*/ - { - { 48, -78},{ 35, 1357},{ 89, 2914},{ 133, 4512}, - { 164, 6004},{ 190, 7348},{ 207, 8627},{ 222, 9881}, - { 247,11096},{ 284,12251},{ 333,13350},{ 392,14407}, - { 466,15426},{ 565,16391},{ 696,17279},{ 865,18058}, - { 1085,18689},{ 1358,19156},{ 1684,19456},{ 2050,19605}, - { 2447,19614},{ 2855,19524},{ 3243,19398},{ 3611,19201} - } - }, - { - /*Cb qi=13 INTRA*/ - { - { 2, 4},{ 47, 367},{ 86, 741},{ 108, 1179}, - { 127, 1651},{ 150, 2133},{ 173, 2611},{ 194, 3050}, - { 222, 3417},{ 262, 3733},{ 303, 4048},{ 337, 4375}, - { 378, 4657},{ 420, 4897},{ 456, 5148},{ 486, 5422}, - { 518, 5682},{ 558, 5903},{ 592, 6113},{ 623, 6372}, - { 662, 6628},{ 700, 6833},{ 751, 6989},{ 805, 7147} - }, - /*Cb qi=13 INTER*/ - { - { 94, -34},{ 78, 303},{ 60, 638},{ 51, 994}, - { 54, 1406},{ 61, 1836},{ 73, 2253},{ 84, 2668}, - { 92, 3082},{ 96, 3492},{ 99, 3894},{ 101, 4284}, - { 103, 4659},{ 105, 5023},{ 106, 5376},{ 108, 5726}, - { 109, 6070},{ 110, 6418},{ 113, 6765},{ 117, 7105}, - { 119, 7448},{ 122, 7784},{ 126, 8119},{ 131, 8463} - } - }, - { - /*Cr qi=13 INTRA*/ - { - { 3, 7},{ 43, 375},{ 80, 762},{ 110, 1226}, - { 131, 1701},{ 149, 2166},{ 172, 2610},{ 196, 2999}, - { 221, 3359},{ 254, 3679},{ 292, 4005},{ 332, 4329}, - { 369, 4612},{ 408, 4880},{ 456, 5139},{ 500, 5388}, - { 544, 5631},{ 581, 5877},{ 615, 6101},{ 660, 6316}, - { 692, 6594},{ 714, 6795},{ 736, 6997},{ 789, 7290} - }, - /*Cr qi=13 INTER*/ - { - { 73, 28},{ 61, 336},{ 46, 642},{ 50, 1003}, - { 58, 1414},{ 67, 1832},{ 79, 2245},{ 87, 2660}, - { 93, 3075},{ 97, 3484},{ 99, 3888},{ 100, 4277}, - { 100, 4651},{ 100, 5027},{ 101, 5403},{ 102, 5765}, - { 105, 6116},{ 109, 6470},{ 113, 6825},{ 119, 7163}, - { 124, 7497},{ 127, 7827},{ 131, 8137},{ 135, 8437} - } - } - }, - { - { - /*Y' qi=14 INTRA*/ - { - { 113, -68},{ 191, 1545},{ 358, 2981},{ 559, 4104}, - { 733, 5044},{ 896, 5890},{ 1066, 6636},{ 1241, 7304}, - { 1428, 7886},{ 1642, 8402},{ 1872, 8871},{ 2128, 9219}, - { 2380, 9547},{ 2609, 9908},{ 2825,10321},{ 3055,10728}, - { 3294,11076},{ 3523,11425},{ 3766,11689},{ 4013,11845}, - { 4254,12022},{ 4506,12209},{ 4759,12383},{ 5013,12637} - }, - /*Y' qi=14 INTER*/ - { - { 58, -82},{ 38, 1362},{ 93, 2914},{ 138, 4492}, - { 171, 5962},{ 198, 7289},{ 216, 8559},{ 234, 9804}, - { 263,11005},{ 306,12143},{ 363,13222},{ 434,14259}, - { 523,15255},{ 639,16188},{ 794,17021},{ 1000,17717}, - { 1262,18260},{ 1575,18645},{ 1943,18841},{ 2356,18872}, - { 2782,18802},{ 3194,18682},{ 3576,18559},{ 3923,18447} - } - }, - { - /*Cb qi=14 INTRA*/ - { - { 2, 3},{ 50, 367},{ 91, 741},{ 114, 1180}, - { 134, 1651},{ 157, 2131},{ 181, 2601},{ 208, 3028}, - { 239, 3391},{ 279, 3706},{ 322, 4000},{ 361, 4309}, - { 406, 4587},{ 445, 4822},{ 482, 5067},{ 515, 5344}, - { 546, 5612},{ 589, 5821},{ 626, 6020},{ 655, 6276}, - { 701, 6523},{ 748, 6717},{ 796, 6876},{ 815, 7151} - }, - /*Cb qi=14 INTER*/ - { - { 80, -43},{ 68, 301},{ 56, 644},{ 50, 1004}, - { 54, 1412},{ 63, 1836},{ 75, 2253},{ 87, 2670}, - { 94, 3083},{ 98, 3487},{ 101, 3885},{ 103, 4271}, - { 106, 4645},{ 107, 5004},{ 108, 5358},{ 109, 5705}, - { 112, 6047},{ 115, 6388},{ 118, 6731},{ 121, 7081}, - { 126, 7421},{ 129, 7747},{ 132, 8076},{ 137, 8419} - } - }, - { - /*Cr qi=14 INTRA*/ - { - { 3, 6},{ 45, 375},{ 85, 762},{ 116, 1226}, - { 138, 1700},{ 158, 2163},{ 180, 2602},{ 206, 2985}, - { 236, 3333},{ 270, 3639},{ 310, 3956},{ 359, 4258}, - { 397, 4524},{ 430, 4802},{ 478, 5068},{ 527, 5316}, - { 572, 5560},{ 613, 5802},{ 654, 6012},{ 699, 6216}, - { 734, 6489},{ 755, 6707},{ 775, 6898},{ 841, 7111} - }, - /*Cr qi=14 INTER*/ - { - { 78, 0},{ 59, 322},{ 46, 649},{ 51, 1016}, - { 58, 1422},{ 68, 1839},{ 81, 2253},{ 90, 2666}, - { 95, 3080},{ 98, 3486},{ 101, 3881},{ 102, 4268}, - { 102, 4644},{ 103, 5017},{ 105, 5382},{ 106, 5743}, - { 108, 6093},{ 112, 6442},{ 118, 6791},{ 124, 7130}, - { 127, 7463},{ 133, 7784},{ 138, 8085},{ 142, 8395} - } - } - }, - { - { - /*Y' qi=15 INTRA*/ - { - { 111, -66},{ 197, 1538},{ 370, 2949},{ 579, 4050}, - { 762, 4968},{ 933, 5798},{ 1112, 6520},{ 1299, 7161}, - { 1497, 7725},{ 1723, 8219},{ 1967, 8654},{ 2234, 8990}, - { 2499, 9302},{ 2740, 9637},{ 2968,10039},{ 3215,10414}, - { 3473,10709},{ 3721,11015},{ 3971,11270},{ 4228,11402}, - { 4487,11543},{ 4752,11707},{ 5011,11871},{ 5290,12099} - }, - /*Y' qi=15 INTER*/ - { - { 59, -113},{ 37, 1349},{ 95, 2904},{ 139, 4478}, - { 174, 5929},{ 201, 7244},{ 220, 8505},{ 241, 9736}, - { 275,10922},{ 327,12040},{ 395,13097},{ 477,14114}, - { 585,15071},{ 730,15947},{ 917,16714},{ 1162,17326}, - { 1468,17770},{ 1833,18029},{ 2251,18111},{ 2694,18068}, - { 3125,17968},{ 3529,17845},{ 3908,17713},{ 4260,17587} - } - }, - { - /*Cb qi=15 INTRA*/ - { - { 2, 3},{ 51, 367},{ 94, 741},{ 120, 1180}, - { 140, 1651},{ 160, 2129},{ 184, 2591},{ 213, 3010}, - { 246, 3371},{ 289, 3680},{ 335, 3969},{ 374, 4274}, - { 418, 4546},{ 460, 4783},{ 498, 5019},{ 532, 5280}, - { 565, 5553},{ 608, 5765},{ 647, 5958},{ 683, 6193}, - { 732, 6433},{ 782, 6620},{ 832, 6769},{ 848, 7027} - }, - /*Cb qi=15 INTER*/ - { - { 71, -52},{ 63, 296},{ 54, 644},{ 50, 1010}, - { 53, 1417},{ 64, 1837},{ 77, 2253},{ 88, 2666}, - { 95, 3079},{ 98, 3487},{ 100, 3882},{ 103, 4264}, - { 106, 4633},{ 108, 4991},{ 109, 5343},{ 109, 5693}, - { 112, 6038},{ 114, 6371},{ 119, 6709},{ 123, 7051}, - { 125, 7385},{ 130, 7716},{ 135, 8050},{ 140, 8374} - } - }, - { - /*Cr qi=15 INTRA*/ - { - { 2, 6},{ 47, 375},{ 87, 763},{ 119, 1225}, - { 143, 1699},{ 162, 2158},{ 185, 2595},{ 213, 2971}, - { 246, 3315},{ 279, 3618},{ 320, 3920},{ 372, 4210}, - { 409, 4480},{ 446, 4756},{ 496, 5017},{ 542, 5263}, - { 590, 5487},{ 639, 5721},{ 687, 5923},{ 724, 6132}, - { 753, 6417},{ 781, 6622},{ 805, 6806},{ 856, 6977} - }, - /*Cr qi=15 INTER*/ - { - { 71, 3},{ 61, 326},{ 52, 651},{ 50, 1017}, - { 58, 1422},{ 69, 1837},{ 82, 2251},{ 90, 2668}, - { 95, 3080},{ 98, 3484},{ 101, 3877},{ 102, 4257}, - { 102, 4632},{ 101, 5005},{ 103, 5370},{ 106, 5733}, - { 110, 6082},{ 116, 6424},{ 120, 6774},{ 124, 7106}, - { 130, 7427},{ 135, 7748},{ 141, 8052},{ 147, 8333} - } - } - }, - { - { - /*Y' qi=16 INTRA*/ - { - { 114, -63},{ 206, 1525},{ 396, 2887},{ 618, 3945}, - { 816, 4832},{ 1002, 5626},{ 1196, 6319},{ 1401, 6923}, - { 1616, 7458},{ 1857, 7928},{ 2121, 8334},{ 2405, 8645}, - { 2685, 8934},{ 2938, 9255},{ 3175, 9638},{ 3433, 9990}, - { 3707,10263},{ 3958,10577},{ 4218,10807},{ 4488,10906}, - { 4760,11028},{ 5037,11148},{ 5306,11286},{ 5625,11463} - }, - /*Y' qi=16 INTER*/ - { - { 69, -153},{ 39, 1348},{ 98, 2894},{ 144, 4448}, - { 181, 5872},{ 209, 7167},{ 228, 8422},{ 254, 9644}, - { 297,10810},{ 359,11908},{ 438,12944},{ 539,13930}, - { 672,14842},{ 850,15650},{ 1085,16318},{ 1391,16793}, - { 1769,17082},{ 2200,17198},{ 2659,17174},{ 3116,17072}, - { 3547,16948},{ 3943,16819},{ 4299,16701},{ 4611,16644} - } - }, - { - /*Cb qi=16 INTRA*/ - { - { 3, 4},{ 54, 367},{ 97, 742},{ 122, 1181}, - { 143, 1651},{ 168, 2123},{ 197, 2575},{ 226, 2985}, - { 263, 3338},{ 314, 3631},{ 367, 3903},{ 409, 4200}, - { 453, 4468},{ 491, 4703},{ 528, 4932},{ 566, 5188}, - { 601, 5459},{ 647, 5672},{ 693, 5844},{ 734, 6058}, - { 784, 6305},{ 836, 6460},{ 882, 6602},{ 905, 6891} - }, - /*Cb qi=16 INTER*/ - { - { 75, -64},{ 67, 292},{ 56, 645},{ 51, 1016}, - { 54, 1421},{ 66, 1842},{ 79, 2257},{ 89, 2670}, - { 95, 3082},{ 98, 3488},{ 101, 3879},{ 104, 4258}, - { 106, 4623},{ 108, 4974},{ 109, 5321},{ 113, 5664}, - { 116, 6001},{ 117, 6341},{ 123, 6677},{ 128, 7004}, - { 130, 7336},{ 136, 7671},{ 143, 7996},{ 148, 8310} - } - }, - { - /*Cr qi=16 INTRA*/ - { - { 4, 7},{ 50, 375},{ 90, 763},{ 124, 1225}, - { 148, 1698},{ 168, 2154},{ 195, 2582},{ 227, 2948}, - { 263, 3279},{ 302, 3575},{ 343, 3865},{ 394, 4137}, - { 439, 4402},{ 482, 4672},{ 533, 4925},{ 579, 5165}, - { 626, 5382},{ 675, 5616},{ 725, 5812},{ 769, 5991}, - { 810, 6242},{ 848, 6430},{ 868, 6615},{ 944, 6732} - }, - /*Cr qi=16 INTER*/ - { - { 78, 11},{ 62, 327},{ 49, 650},{ 50, 1025}, - { 59, 1431},{ 72, 1841},{ 83, 2253},{ 90, 2671}, - { 95, 3084},{ 98, 3487},{ 100, 3879},{ 101, 4254}, - { 102, 4625},{ 103, 4994},{ 106, 5355},{ 108, 5708}, - { 111, 6058},{ 115, 6400},{ 121, 6733},{ 128, 7058}, - { 134, 7374},{ 140, 7691},{ 146, 7993},{ 146, 8317} - } - } - }, - { - { - /*Y' qi=17 INTRA*/ - { - { 112, -59},{ 210, 1515},{ 409, 2850},{ 640, 3882}, - { 844, 4748},{ 1038, 5529},{ 1240, 6206},{ 1452, 6803}, - { 1676, 7330},{ 1925, 7792},{ 2194, 8201},{ 2483, 8512}, - { 2766, 8801},{ 3027, 9121},{ 3279, 9482},{ 3548, 9810}, - { 3825,10069},{ 4088,10345},{ 4362,10544},{ 4638,10644}, - { 4915,10744},{ 5196,10850},{ 5471,10981},{ 5802,11136} - }, - /*Y' qi=17 INTER*/ - { - { 70, -147},{ 45, 1349},{ 106, 2894},{ 155, 4425}, - { 195, 5818},{ 225, 7099},{ 247, 8348},{ 278, 9565}, - { 328,10717},{ 399,11794},{ 491,12807},{ 609,13760}, - { 766,14623},{ 984,15349},{ 1274,15902},{ 1642,16256}, - { 2082,16411},{ 2563,16409},{ 3048,16315},{ 3508,16194}, - { 3924,16064},{ 4306,15938},{ 4656,15828},{ 4966,15733} - } - }, - { - /*Cb qi=17 INTRA*/ - { - { 3, 4},{ 57, 367},{ 101, 742},{ 126, 1182}, - { 148, 1650},{ 175, 2118},{ 207, 2565},{ 241, 2966}, - { 279, 3307},{ 331, 3588},{ 389, 3845},{ 435, 4132}, - { 474, 4408},{ 517, 4641},{ 560, 4869},{ 602, 5122}, - { 638, 5389},{ 672, 5610},{ 716, 5787},{ 758, 6002}, - { 817, 6226},{ 869, 6393},{ 916, 6530},{ 950, 6799} - }, - /*Cb qi=17 INTER*/ - { - { 105, -65},{ 86, 288},{ 66, 638},{ 54, 1014}, - { 59, 1427},{ 71, 1844},{ 86, 2257},{ 95, 2668}, - { 100, 3075},{ 103, 3476},{ 106, 3867},{ 110, 4241}, - { 112, 4598},{ 114, 4948},{ 117, 5294},{ 121, 5633}, - { 123, 5968},{ 126, 6301},{ 131, 6637},{ 136, 6968}, - { 144, 7287},{ 152, 7606},{ 158, 7931},{ 162, 8262} - } - }, - { - /*Cr qi=17 INTRA*/ - { - { 4, 6},{ 55, 376},{ 97, 765},{ 128, 1226}, - { 152, 1696},{ 175, 2144},{ 204, 2568},{ 241, 2928}, - { 282, 3250},{ 323, 3530},{ 368, 3811},{ 420, 4089}, - { 463, 4347},{ 505, 4609},{ 562, 4860},{ 609, 5094}, - { 655, 5303},{ 709, 5535},{ 759, 5740},{ 803, 5913}, - { 844, 6153},{ 879, 6350},{ 905, 6527},{ 972, 6637} - }, - /*Cr qi=17 INTER*/ - { - { 88, 8},{ 68, 330},{ 51, 653},{ 54, 1028}, - { 65, 1433},{ 77, 1845},{ 89, 2257},{ 96, 2669}, - { 100, 3081},{ 102, 3481},{ 105, 3867},{ 106, 4245}, - { 108, 4613},{ 110, 4971},{ 112, 5328},{ 115, 5679}, - { 120, 6019},{ 127, 6355},{ 133, 6686},{ 140, 7007}, - { 149, 7316},{ 158, 7618},{ 166, 7924},{ 170, 8232} - } - } - }, - { - { - /*Y' qi=18 INTRA*/ - { - { 122, -58},{ 216, 1506},{ 425, 2815},{ 665, 3822}, - { 882, 4666},{ 1088, 5425},{ 1301, 6084},{ 1529, 6653}, - { 1766, 7162},{ 2026, 7611},{ 2312, 7987},{ 2612, 8278}, - { 2913, 8551},{ 3196, 8840},{ 3454, 9184},{ 3734, 9490}, - { 4030, 9725},{ 4305, 9973},{ 4585,10162},{ 4864,10251}, - { 5150,10324},{ 5443,10420},{ 5727,10536},{ 6053,10682} - }, - /*Y' qi=18 INTER*/ - { - { 66, -143},{ 47, 1351},{ 108, 2886},{ 158, 4401}, - { 200, 5775},{ 232, 7044},{ 256, 8288},{ 292, 9493}, - { 351,10625},{ 434,11679},{ 541,12665},{ 681,13578}, - { 875,14379},{ 1136,15025},{ 1483,15475},{ 1914,15709}, - { 2399,15767},{ 2907,15699},{ 3400,15579},{ 3852,15453}, - { 4259,15332},{ 4630,15221},{ 4976,15121},{ 5294,15061} - } - }, - { - /*Cb qi=18 INTRA*/ - { - { 2, 3},{ 61, 367},{ 107, 743},{ 131, 1182}, - { 155, 1648},{ 183, 2110},{ 220, 2542},{ 260, 2927}, - { 303, 3265},{ 359, 3540},{ 416, 3785},{ 462, 4063}, - { 506, 4334},{ 553, 4567},{ 595, 4797},{ 636, 5049}, - { 676, 5304},{ 717, 5516},{ 759, 5698},{ 801, 5904}, - { 861, 6133},{ 911, 6311},{ 962, 6443},{ 1021, 6645} - }, - /*Cb qi=18 INTER*/ - { - { 126, 5},{ 95, 326},{ 66, 643},{ 55, 1015}, - { 60, 1427},{ 73, 1843},{ 87, 2256},{ 96, 2667}, - { 101, 3073},{ 104, 3470},{ 108, 3853},{ 111, 4226}, - { 114, 4584},{ 117, 4928},{ 119, 5274},{ 122, 5612}, - { 126, 5942},{ 130, 6271},{ 136, 6606},{ 141, 6931}, - { 148, 7247},{ 156, 7568},{ 164, 7891},{ 173, 8211} - } - }, - { - /*Cr qi=18 INTRA*/ - { - { 4, 6},{ 59, 376},{ 104, 765},{ 133, 1226}, - { 156, 1692},{ 184, 2136},{ 218, 2548},{ 260, 2893}, - { 308, 3204},{ 348, 3481},{ 397, 3751},{ 448, 4024}, - { 490, 4281},{ 541, 4523},{ 593, 4776},{ 634, 5022}, - { 685, 5236},{ 748, 5455},{ 812, 5638},{ 856, 5818}, - { 891, 6048},{ 928, 6230},{ 961, 6405},{ 1055, 6449} - }, - /*Cr qi=18 INTER*/ - { - { 81, 34},{ 68, 342},{ 57, 652},{ 59, 1027}, - { 67, 1439},{ 80, 1848},{ 91, 2257},{ 97, 2670}, - { 100, 3076},{ 103, 3473},{ 106, 3857},{ 108, 4231}, - { 109, 4599},{ 110, 4958},{ 113, 5307},{ 119, 5650}, - { 125, 5991},{ 130, 6325},{ 138, 6651},{ 147, 6971}, - { 153, 7278},{ 162, 7578},{ 172, 7874},{ 177, 8156} - } - } - }, - { - { - /*Y' qi=19 INTRA*/ - { - { 128, -55},{ 228, 1495},{ 448, 2775},{ 699, 3758}, - { 931, 4571},{ 1154, 5296},{ 1386, 5914},{ 1636, 6450}, - { 1894, 6930},{ 2177, 7342},{ 2479, 7698},{ 2792, 7976}, - { 3099, 8235},{ 3392, 8517},{ 3658, 8853},{ 3938, 9155}, - { 4242, 9371},{ 4527, 9605},{ 4810, 9781},{ 5089, 9853}, - { 5378, 9920},{ 5674,10009},{ 5972,10110},{ 6336,10196} - }, - /*Y' qi=19 INTER*/ - { - { 69, -147},{ 49, 1353},{ 111, 2883},{ 162, 4381}, - { 205, 5737},{ 237, 6996},{ 264, 8232},{ 307, 9421}, - { 376,10534},{ 472,11567},{ 596,12525},{ 761,13395}, - { 990,14130},{ 1298,14694},{ 1695,15053},{ 2172,15195}, - { 2696,15173},{ 3213,15075},{ 3696,14948},{ 4141,14829}, - { 4541,14721},{ 4910,14609},{ 5245,14506},{ 5536,14399} - } - }, - { - /*Cb qi=19 INTRA*/ - { - { 3, 3},{ 61, 367},{ 109, 743},{ 135, 1182}, - { 161, 1646},{ 191, 2101},{ 229, 2524},{ 273, 2898}, - { 318, 3221},{ 376, 3490},{ 436, 3731},{ 487, 3994}, - { 539, 4251},{ 584, 4485},{ 621, 4721},{ 664, 4967}, - { 709, 5225},{ 752, 5431},{ 801, 5595},{ 846, 5796}, - { 912, 6011},{ 959, 6193},{ 1015, 6321},{ 1121, 6504} - }, - /*Cb qi=19 INTER*/ - { - { 126, 4},{ 97, 329},{ 69, 649},{ 56, 1017}, - { 61, 1432},{ 74, 1846},{ 88, 2255},{ 98, 2663}, - { 103, 3065},{ 106, 3460},{ 110, 3844},{ 114, 4211}, - { 117, 4564},{ 120, 4911},{ 122, 5253},{ 125, 5588}, - { 129, 5916},{ 135, 6241},{ 142, 6567},{ 149, 6885}, - { 155, 7206},{ 163, 7527},{ 174, 7843},{ 188, 8145} - } - }, - { - /*Cr qi=19 INTRA*/ - { - { 5, 6},{ 61, 376},{ 106, 765},{ 135, 1225}, - { 160, 1689},{ 192, 2126},{ 229, 2531},{ 271, 2869}, - { 321, 3168},{ 370, 3433},{ 421, 3704},{ 476, 3965}, - { 520, 4212},{ 572, 4452},{ 629, 4691},{ 671, 4939}, - { 724, 5152},{ 792, 5347},{ 858, 5510},{ 895, 5696}, - { 939, 5905},{ 991, 6056},{ 1027, 6244},{ 1127, 6333} - }, - /*Cr qi=19 INTER*/ - { - { 80, 45},{ 66, 344},{ 55, 654},{ 56, 1030}, - { 66, 1440},{ 80, 1850},{ 91, 2259},{ 98, 2668}, - { 102, 3072},{ 104, 3466},{ 107, 3845},{ 109, 4215}, - { 110, 4578},{ 112, 4933},{ 116, 5283},{ 122, 5625}, - { 129, 5963},{ 136, 6287},{ 143, 6611},{ 151, 6927}, - { 160, 7229},{ 170, 7528},{ 181, 7818},{ 191, 8092} - } - } - }, - { - { - /*Y' qi=20 INTRA*/ - { - { 129, -50},{ 238, 1481},{ 469, 2728},{ 730, 3684}, - { 974, 4473},{ 1213, 5171},{ 1463, 5763},{ 1729, 6281}, - { 2002, 6744},{ 2299, 7146},{ 2613, 7492},{ 2940, 7746}, - { 3265, 7978},{ 3571, 8228},{ 3853, 8543},{ 4156, 8815}, - { 4476, 9001},{ 4775, 9218},{ 5070, 9373},{ 5352, 9446}, - { 5649, 9510},{ 5956, 9580},{ 6268, 9660},{ 6647, 9705} - }, - /*Y' qi=20 INTER*/ - { - { 64, -93},{ 52, 1340},{ 116, 2862},{ 170, 4344}, - { 216, 5678},{ 249, 6928},{ 281, 8155},{ 333, 9326}, - { 418,10410},{ 533,11411},{ 683,12329},{ 890,13127}, - { 1183,13750},{ 1579,14162},{ 2066,14357},{ 2611,14370}, - { 3159,14284},{ 3675,14167},{ 4142,14053},{ 4568,13953}, - { 4961,13852},{ 5320,13755},{ 5649,13675},{ 5933,13610} - } - }, - { - /*Cb qi=20 INTRA*/ - { - { 3, 3},{ 62, 367},{ 112, 743},{ 140, 1183}, - { 165, 1646},{ 196, 2099},{ 235, 2517},{ 284, 2883}, - { 334, 3198},{ 393, 3460},{ 457, 3690},{ 509, 3945}, - { 560, 4198},{ 605, 4435},{ 647, 4658},{ 699, 4888}, - { 742, 5155},{ 788, 5350},{ 835, 5517},{ 880, 5730}, - { 956, 5914},{ 1007, 6060},{ 1053, 6199},{ 1158, 6358} - }, - /*Cb qi=20 INTER*/ - { - { 128, -6},{ 96, 322},{ 66, 653},{ 54, 1025}, - { 63, 1431},{ 79, 1844},{ 91, 2256},{ 99, 2665}, - { 104, 3065},{ 107, 3455},{ 111, 3831},{ 115, 4189}, - { 120, 4539},{ 123, 4885},{ 126, 5219},{ 130, 5548}, - { 135, 5876},{ 141, 6199},{ 149, 6519},{ 156, 6837}, - { 166, 7153},{ 179, 7468},{ 189, 7784},{ 194, 8102} - } - }, - { - /*Cr qi=20 INTRA*/ - { - { 4, 6},{ 63, 376},{ 109, 765},{ 139, 1225}, - { 165, 1689},{ 199, 2124},{ 239, 2523},{ 285, 2852}, - { 340, 3140},{ 388, 3398},{ 438, 3662},{ 499, 3914}, - { 547, 4155},{ 596, 4392},{ 652, 4634},{ 699, 4877}, - { 759, 5074},{ 824, 5257},{ 883, 5428},{ 936, 5589}, - { 986, 5790},{ 1030, 5960},{ 1074, 6119},{ 1172, 6191} - }, - /*Cr qi=20 INTER*/ - { - { 92, 40},{ 70, 345},{ 55, 658},{ 57, 1034}, - { 69, 1441},{ 84, 1852},{ 94, 2261},{ 98, 2669}, - { 102, 3074},{ 105, 3465},{ 107, 3841},{ 110, 4206}, - { 112, 4562},{ 116, 4915},{ 121, 5260},{ 127, 5591}, - { 134, 5920},{ 142, 6246},{ 153, 6562},{ 163, 6870}, - { 173, 7170},{ 186, 7463},{ 198, 7746},{ 199, 8030} - } - } - }, - { - { - /*Y' qi=21 INTRA*/ - { - { 130, -51},{ 244, 1476},{ 483, 2705},{ 756, 3635}, - { 1013, 4396},{ 1266, 5070},{ 1530, 5647},{ 1806, 6153}, - { 2093, 6600},{ 2411, 6976},{ 2739, 7299},{ 3079, 7534}, - { 3422, 7744},{ 3738, 7987},{ 4032, 8274},{ 4348, 8533}, - { 4675, 8721},{ 4989, 8909},{ 5291, 9051},{ 5577, 9111}, - { 5879, 9163},{ 6190, 9228},{ 6506, 9286},{ 6899, 9295} - }, - /*Y' qi=21 INTER*/ - { - { 64, -56},{ 55, 1341},{ 119, 2859},{ 174, 4324}, - { 223, 5640},{ 258, 6880},{ 295, 8096},{ 359, 9246}, - { 460,10302},{ 595,11268},{ 778,12131},{ 1032,12857}, - { 1387,13385},{ 1850,13683},{ 2399,13774},{ 2976,13729}, - { 3527,13619},{ 4034,13504},{ 4492,13401},{ 4912,13291}, - { 5298,13209},{ 5648,13137},{ 5974,13046},{ 6308,12977} - } - }, - { - /*Cb qi=21 INTRA*/ - { - { 4, 3},{ 64, 367},{ 114, 743},{ 141, 1183}, - { 166, 1645},{ 201, 2092},{ 247, 2502},{ 299, 2856}, - { 352, 3158},{ 413, 3412},{ 480, 3642},{ 536, 3893}, - { 588, 4137},{ 637, 4367},{ 678, 4598},{ 725, 4834}, - { 774, 5083},{ 827, 5269},{ 883, 5420},{ 930, 5633}, - { 999, 5829},{ 1057, 5959},{ 1113, 6082},{ 1200, 6265} - }, - /*Cb qi=21 INTER*/ - { - { 109, -8},{ 84, 321},{ 62, 654},{ 54, 1028}, - { 64, 1434},{ 80, 1847},{ 92, 2259},{ 100, 2664}, - { 105, 3060},{ 109, 3445},{ 114, 3815},{ 118, 4172}, - { 122, 4519},{ 126, 4861},{ 128, 5194},{ 133, 5520}, - { 139, 5847},{ 146, 6169},{ 155, 6487},{ 166, 6801}, - { 177, 7114},{ 189, 7423},{ 201, 7729},{ 208, 8035} - } - }, - { - /*Cr qi=21 INTRA*/ - { - { 4, 6},{ 64, 377},{ 111, 766},{ 144, 1225}, - { 174, 1683},{ 206, 2114},{ 248, 2506},{ 302, 2824}, - { 357, 3099},{ 404, 3357},{ 455, 3622},{ 519, 3867}, - { 573, 4098},{ 625, 4331},{ 683, 4571},{ 733, 4802}, - { 793, 4994},{ 863, 5173},{ 926, 5337},{ 978, 5492}, - { 1030, 5685},{ 1079, 5856},{ 1126, 6027},{ 1217, 6159} - }, - /*Cr qi=21 INTER*/ - { - { 82, 29},{ 67, 341},{ 55, 660},{ 58, 1038}, - { 71, 1443},{ 85, 1851},{ 95, 2258},{ 99, 2666}, - { 103, 3069},{ 107, 3456},{ 110, 3826},{ 112, 4188}, - { 114, 4544},{ 118, 4891},{ 124, 5231},{ 132, 5567}, - { 139, 5894},{ 148, 6210},{ 159, 6520},{ 171, 6822}, - { 185, 7111},{ 196, 7403},{ 209, 7691},{ 225, 7945} - } - } - }, - { - { - /*Y' qi=22 INTRA*/ - { - { 128, -45},{ 254, 1463},{ 507, 2662},{ 794, 3562}, - { 1070, 4292},{ 1340, 4941},{ 1622, 5492},{ 1920, 5968}, - { 2229, 6387},{ 2565, 6742},{ 2911, 7047},{ 3263, 7264}, - { 3615, 7464},{ 3944, 7689},{ 4258, 7950},{ 4591, 8183}, - { 4934, 8347},{ 5259, 8517},{ 5573, 8634},{ 5870, 8683}, - { 6186, 8723},{ 6508, 8762},{ 6831, 8801},{ 7232, 8830} - }, - /*Y' qi=22 INTER*/ - { - { 77, -48},{ 57, 1343},{ 122, 2853},{ 180, 4299}, - { 231, 5597},{ 269, 6826},{ 314, 8025},{ 393, 9150}, - { 512,10179},{ 673,11103},{ 894,11908},{ 1207,12542}, - { 1635,12956},{ 2166,13148},{ 2755,13167},{ 3345,13088}, - { 3895,12966},{ 4386,12848},{ 4832,12746},{ 5252,12647}, - { 5634,12563},{ 5978,12497},{ 6299,12412},{ 6633,12338} - } - }, - { - /*Cb qi=22 INTRA*/ - { - { 4, 3},{ 66, 367},{ 122, 744},{ 153, 1182}, - { 177, 1640},{ 213, 2080},{ 263, 2475},{ 323, 2811}, - { 382, 3103},{ 451, 3346},{ 522, 3568},{ 581, 3814}, - { 633, 4054},{ 674, 4288},{ 719, 4523},{ 768, 4756}, - { 823, 4979},{ 883, 5162},{ 937, 5325},{ 996, 5510}, - { 1070, 5687},{ 1129, 5807},{ 1193, 5929},{ 1311, 6099} - }, - /*Cb qi=22 INTER*/ - { - { 107, -5},{ 83, 322},{ 61, 653},{ 55, 1030}, - { 66, 1436},{ 81, 1845},{ 94, 2253},{ 102, 2656}, - { 107, 3050},{ 111, 3435},{ 115, 3804},{ 119, 4158}, - { 124, 4501},{ 128, 4835},{ 132, 5164},{ 138, 5490}, - { 146, 5812},{ 154, 6128},{ 163, 6442},{ 174, 6754}, - { 188, 7060},{ 205, 7361},{ 219, 7662},{ 233, 7953} - } - }, - { - /*Cr qi=22 INTRA*/ - { - { 4, 6},{ 67, 378},{ 118, 767},{ 151, 1222}, - { 182, 1675},{ 221, 2097},{ 269, 2476},{ 329, 2774}, - { 389, 3039},{ 444, 3292},{ 500, 3545},{ 560, 3788}, - { 615, 4020},{ 671, 4251},{ 734, 4484},{ 781, 4712}, - { 850, 4887},{ 925, 5060},{ 981, 5229},{ 1031, 5369}, - { 1092, 5549},{ 1148, 5715},{ 1200, 5861},{ 1291, 5943} - }, - /*Cr qi=22 INTER*/ - { - { 88, 34},{ 69, 340},{ 57, 657},{ 60, 1039}, - { 73, 1445},{ 87, 1851},{ 96, 2257},{ 100, 2662}, - { 103, 3058},{ 107, 3442},{ 111, 3812},{ 115, 4172}, - { 118, 4524},{ 123, 4864},{ 129, 5199},{ 136, 5531}, - { 145, 5855},{ 156, 6168},{ 170, 6468},{ 184, 6765}, - { 193, 7066},{ 207, 7353},{ 222, 7628},{ 230, 7900} - } - } - }, - { - { - /*Y' qi=23 INTRA*/ - { - { 126, -40},{ 257, 1458},{ 521, 2636},{ 825, 3501}, - { 1111, 4207},{ 1391, 4842},{ 1684, 5385},{ 1992, 5858}, - { 2311, 6277},{ 2653, 6626},{ 3005, 6929},{ 3366, 7134}, - { 3729, 7311},{ 4071, 7526},{ 4396, 7770},{ 4734, 7986}, - { 5086, 8131},{ 5421, 8286},{ 5735, 8404},{ 6033, 8456}, - { 6357, 8486},{ 6682, 8525},{ 7003, 8573},{ 7387, 8604} - }, - /*Y' qi=23 INTER*/ - { - { 64, -57},{ 60, 1345},{ 124, 2853},{ 185, 4284}, - { 239, 5565},{ 282, 6783},{ 336, 7967},{ 429, 9069}, - { 568,10063},{ 758,10943},{ 1028,11679},{ 1407,12216}, - { 1909,12520},{ 2502,12616},{ 3126,12573},{ 3722,12461}, - { 4258,12344},{ 4742,12236},{ 5185,12136},{ 5590,12052}, - { 5970,11980},{ 6315,11901},{ 6631,11826},{ 6954,11769} - } - }, - { - /*Cb qi=23 INTRA*/ - { - { 3, 3},{ 70, 367},{ 124, 744},{ 151, 1182}, - { 181, 1637},{ 222, 2071},{ 276, 2460},{ 343, 2785}, - { 403, 3072},{ 468, 3317},{ 542, 3534},{ 605, 3773}, - { 659, 4009},{ 703, 4243},{ 747, 4479},{ 795, 4707}, - { 852, 4923},{ 908, 5105},{ 972, 5254},{ 1043, 5423}, - { 1118, 5594},{ 1172, 5731},{ 1240, 5853},{ 1365, 6005} - }, - /*Cb qi=23 INTER*/ - { - { 109, -10},{ 87, 325},{ 63, 650},{ 57, 1031}, - { 67, 1439},{ 83, 1847},{ 96, 2253},{ 103, 2652}, - { 109, 3041},{ 114, 3421},{ 117, 3789},{ 122, 4141}, - { 128, 4480},{ 134, 4811},{ 139, 5138},{ 144, 5463}, - { 152, 5781},{ 161, 6096},{ 174, 6404},{ 185, 6714}, - { 198, 7023},{ 216, 7320},{ 233, 7621},{ 245, 7935} - } - }, - { - /*Cr qi=23 INTRA*/ - { - { 5, 6},{ 70, 379},{ 122, 768},{ 155, 1222}, - { 187, 1671},{ 231, 2088},{ 283, 2459},{ 346, 2750}, - { 411, 3009},{ 465, 3261},{ 523, 3509},{ 585, 3746}, - { 639, 3980},{ 695, 4219},{ 754, 4449},{ 803, 4671}, - { 873, 4840},{ 953, 5001},{ 1015, 5156},{ 1071, 5286}, - { 1137, 5464},{ 1191, 5629},{ 1249, 5782},{ 1359, 5885} - }, - /*Cr qi=23 INTER*/ - { - { 84, 29},{ 69, 343},{ 58, 660},{ 62, 1041}, - { 75, 1448},{ 88, 1853},{ 97, 2258},{ 102, 2659}, - { 105, 3050},{ 108, 3430},{ 113, 3799},{ 116, 4155}, - { 121, 4505},{ 126, 4845},{ 132, 5176},{ 142, 5504}, - { 153, 5826},{ 165, 6133},{ 180, 6432},{ 197, 6722}, - { 212, 7005},{ 226, 7287},{ 244, 7555},{ 258, 7828} - } - } - }, - { - { - /*Y' qi=24 INTRA*/ - { - { 125, -34},{ 268, 1444},{ 547, 2590},{ 866, 3422}, - { 1172, 4098},{ 1476, 4702},{ 1790, 5222},{ 2117, 5678}, - { 2453, 6080},{ 2811, 6418},{ 3178, 6700},{ 3552, 6895}, - { 3928, 7055},{ 4286, 7243},{ 4627, 7477},{ 4981, 7674}, - { 5344, 7802},{ 5683, 7944},{ 6009, 8043},{ 6313, 8082}, - { 6633, 8111},{ 6959, 8151},{ 7280, 8197},{ 7660, 8221} - }, - /*Y' qi=24 INTER*/ - { - { 62, -63},{ 68, 1345},{ 134, 2840},{ 199, 4245}, - { 256, 5508},{ 304, 6715},{ 371, 7880},{ 484, 8950}, - { 652, 9899},{ 892,10709},{ 1238,11334},{ 1722,11722}, - { 2326,11875},{ 2983,11864},{ 3616,11783},{ 4189,11678}, - { 4707,11570},{ 5178,11476},{ 5617,11395},{ 6017,11319}, - { 6380,11252},{ 6720,11185},{ 7044,11126},{ 7377,11118} - } - }, - { - /*Cb qi=24 INTRA*/ - { - { 4, 3},{ 75, 367},{ 132, 745},{ 159, 1182}, - { 187, 1634},{ 230, 2061},{ 289, 2439},{ 361, 2753}, - { 425, 3034},{ 492, 3278},{ 566, 3490},{ 630, 3720}, - { 686, 3956},{ 732, 4190},{ 777, 4420},{ 829, 4637}, - { 894, 4840},{ 958, 5012},{ 1023, 5155},{ 1090, 5326}, - { 1165, 5502},{ 1226, 5622},{ 1299, 5717},{ 1408, 5887} - }, - /*Cb qi=24 INTER*/ - { - { 110, 35},{ 92, 337},{ 70, 651},{ 63, 1033}, - { 74, 1440},{ 91, 1846},{ 102, 2248},{ 109, 2644}, - { 114, 3031},{ 120, 3404},{ 127, 3762},{ 133, 4109}, - { 138, 4445},{ 144, 4772},{ 151, 5094},{ 159, 5411}, - { 168, 5728},{ 180, 6037},{ 195, 6338},{ 210, 6640}, - { 227, 6944},{ 249, 7236},{ 272, 7528},{ 299, 7809} - } - }, - { - /*Cr qi=24 INTRA*/ - { - { 5, 6},{ 72, 380},{ 124, 770},{ 158, 1222}, - { 195, 1668},{ 240, 2079},{ 297, 2438},{ 367, 2715}, - { 433, 2966},{ 488, 3218},{ 549, 3467},{ 609, 3701}, - { 664, 3935},{ 728, 4165},{ 792, 4379},{ 845, 4586}, - { 917, 4744},{ 995, 4898},{ 1063, 5049},{ 1120, 5187}, - { 1190, 5359},{ 1249, 5522},{ 1304, 5672},{ 1397, 5806} - }, - /*Cr qi=24 INTER*/ - { - { 91, 56},{ 73, 353},{ 61, 664},{ 66, 1045}, - { 80, 1449},{ 95, 1851},{ 103, 2250},{ 107, 2648}, - { 111, 3038},{ 116, 3413},{ 120, 3774},{ 124, 4128}, - { 130, 4471},{ 138, 4802},{ 145, 5130},{ 156, 5453}, - { 171, 5764},{ 187, 6061},{ 204, 6355},{ 220, 6643}, - { 238, 6923},{ 254, 7204},{ 275, 7475},{ 289, 7752} - } - } - }, - { - { - /*Y' qi=25 INTRA*/ - { - { 125, -28},{ 285, 1426},{ 582, 2540},{ 917, 3351}, - { 1244, 3997},{ 1569, 4570},{ 1903, 5071},{ 2258, 5498}, - { 2626, 5866},{ 3002, 6182},{ 3382, 6448},{ 3770, 6623}, - { 4162, 6760},{ 4528, 6934},{ 4882, 7144},{ 5249, 7328}, - { 5610, 7453},{ 5958, 7578},{ 6291, 7672},{ 6597, 7708}, - { 6928, 7715},{ 7258, 7737},{ 7575, 7781},{ 7950, 7829} - }, - /*Y' qi=25 INTER*/ - { - { 64, -16},{ 72, 1348},{ 139, 2832},{ 206, 4218}, - { 268, 5465},{ 322, 6659},{ 403, 7803},{ 540, 8838}, - { 747, 9734},{ 1044,10465},{ 1473,10981},{ 2048,11249}, - { 2717,11311},{ 3397,11257},{ 4025,11161},{ 4589,11052}, - { 5099,10947},{ 5560,10859},{ 5989,10786},{ 6389,10717}, - { 6753,10652},{ 7078,10592},{ 7389,10535},{ 7697,10460} - } - }, - { - /*Cb qi=25 INTRA*/ - { - { 3, 3},{ 78, 368},{ 133, 745},{ 159, 1180}, - { 193, 1627},{ 242, 2046},{ 304, 2411},{ 381, 2714}, - { 456, 2983},{ 527, 3224},{ 598, 3437},{ 667, 3655}, - { 726, 3888},{ 776, 4117},{ 826, 4333},{ 883, 4543}, - { 954, 4727},{ 1019, 4878},{ 1095, 5014},{ 1171, 5187}, - { 1255, 5342},{ 1319, 5458},{ 1396, 5546},{ 1536, 5678} - }, - /*Cb qi=25 INTER*/ - { - { 117, 32},{ 89, 342},{ 67, 660},{ 64, 1037}, - { 77, 1441},{ 93, 1845},{ 105, 2243},{ 113, 2633}, - { 120, 3016},{ 125, 3387},{ 131, 3739},{ 137, 4080}, - { 144, 4416},{ 152, 4741},{ 160, 5057},{ 169, 5369}, - { 180, 5680},{ 193, 5990},{ 209, 6294},{ 227, 6594}, - { 249, 6888},{ 269, 7180},{ 294, 7467},{ 317, 7768} - } - }, - { - /*Cr qi=25 INTRA*/ - { - { 6, 6},{ 74, 380},{ 129, 770},{ 165, 1220}, - { 201, 1658},{ 253, 2061},{ 315, 2410},{ 388, 2676}, - { 462, 2920},{ 523, 3166},{ 584, 3404},{ 647, 3637}, - { 701, 3870},{ 769, 4086},{ 838, 4296},{ 898, 4491}, - { 980, 4627},{ 1065, 4759},{ 1126, 4920},{ 1187, 5058}, - { 1283, 5180},{ 1347, 5332},{ 1404, 5475},{ 1527, 5534} - }, - /*Cr qi=25 INTER*/ - { - { 92, 41},{ 75, 347},{ 64, 664},{ 70, 1045}, - { 85, 1448},{ 98, 1849},{ 105, 2245},{ 110, 2637}, - { 115, 3023},{ 120, 3395},{ 126, 3753},{ 131, 4102}, - { 136, 4439},{ 145, 4768},{ 156, 5094},{ 168, 5410}, - { 184, 5717},{ 203, 6010},{ 221, 6300},{ 239, 6577}, - { 262, 6847},{ 282, 7123},{ 303, 7390},{ 322, 7665} - } - } - }, - { - { - /*Y' qi=26 INTRA*/ - { - { 130, -24},{ 292, 1423},{ 594, 2525},{ 943, 3307}, - { 1289, 3921},{ 1633, 4467},{ 1991, 4943},{ 2368, 5348}, - { 2753, 5696},{ 3148, 5991},{ 3545, 6247},{ 3942, 6415}, - { 4342, 6535},{ 4726, 6690},{ 5093, 6883},{ 5466, 7047}, - { 5840, 7159},{ 6202, 7274},{ 6545, 7351},{ 6855, 7375}, - { 7186, 7384},{ 7517, 7416},{ 7840, 7447},{ 8238, 7450} - }, - /*Y' qi=26 INTER*/ - { - { 52, 16},{ 75, 1336},{ 143, 2815},{ 213, 4191}, - { 278, 5427},{ 339, 6611},{ 436, 7734},{ 600, 8732}, - { 843, 9579},{ 1195,10243},{ 1702,10660},{ 2355,10825}, - { 3070,10820},{ 3755,10743},{ 4372,10643},{ 4925,10538}, - { 5426,10440},{ 5882,10354},{ 6296,10290},{ 6686,10224}, - { 7049,10163},{ 7380,10113},{ 7672,10062},{ 7937,10021} - } - }, - { - /*Cb qi=26 INTRA*/ - { - { 4, 3},{ 79, 368},{ 138, 745},{ 167, 1180}, - { 200, 1623},{ 252, 2034},{ 322, 2389},{ 403, 2682}, - { 480, 2941},{ 558, 3176},{ 631, 3393},{ 700, 3608}, - { 766, 3825},{ 819, 4046},{ 868, 4265},{ 926, 4472}, - { 1002, 4645},{ 1070, 4800},{ 1151, 4924},{ 1242, 5063}, - { 1325, 5221},{ 1393, 5338},{ 1464, 5431},{ 1595, 5559} - }, - /*Cb qi=26 INTER*/ - { - { 98, 33},{ 83, 343},{ 65, 662},{ 65, 1037}, - { 80, 1437},{ 96, 1839},{ 107, 2238},{ 115, 2628}, - { 122, 3007},{ 128, 3373},{ 134, 3722},{ 142, 4060}, - { 149, 4390},{ 158, 4713},{ 167, 5029},{ 178, 5341}, - { 191, 5647},{ 208, 5948},{ 227, 6244},{ 247, 6539}, - { 269, 6833},{ 295, 7114},{ 328, 7388},{ 369, 7658} - } - }, - { - /*Cr qi=26 INTRA*/ - { - { 5, 6},{ 75, 380},{ 133, 769},{ 172, 1217}, - { 212, 1652},{ 266, 2048},{ 333, 2384},{ 412, 2643}, - { 490, 2880},{ 552, 3124},{ 616, 3365},{ 681, 3594}, - { 739, 3816},{ 810, 4024},{ 880, 4224},{ 945, 4405}, - { 1029, 4538},{ 1114, 4674},{ 1183, 4822},{ 1254, 4946}, - { 1346, 5063},{ 1417, 5201},{ 1478, 5345},{ 1597, 5411} - }, - /*Cr qi=26 INTER*/ - { - { 97, 29},{ 75, 342},{ 62, 667},{ 70, 1047}, - { 87, 1447},{ 100, 1846},{ 107, 2242},{ 113, 2633}, - { 118, 3016},{ 123, 3382},{ 128, 3737},{ 135, 4082}, - { 142, 4417},{ 151, 4746},{ 162, 5066},{ 176, 5377}, - { 194, 5679},{ 217, 5963},{ 239, 6244},{ 260, 6522}, - { 284, 6789},{ 309, 7052},{ 335, 7313},{ 355, 7582} - } - } - }, - { - { - /*Y' qi=27 INTRA*/ - { - { 118, -10},{ 308, 1404},{ 630, 2473},{ 997, 3227}, - { 1360, 3819},{ 1719, 4354},{ 2086, 4829},{ 2470, 5233}, - { 2863, 5576},{ 3267, 5870},{ 3677, 6117},{ 4085, 6268}, - { 4499, 6376},{ 4888, 6521},{ 5257, 6705},{ 5638, 6865}, - { 6020, 6962},{ 6394, 7056},{ 6744, 7130},{ 7051, 7158}, - { 7386, 7164},{ 7717, 7185},{ 8042, 7209},{ 8444, 7206} - }, - /*Y' qi=27 INTER*/ - { - { 54, 19},{ 77, 1333},{ 147, 2806},{ 221, 4166}, - { 290, 5390},{ 360, 6564},{ 474, 7665},{ 664, 8630}, - { 949, 9423},{ 1370,10002},{ 1958,10323},{ 2670,10414}, - { 3406,10375},{ 4086,10285},{ 4691,10182},{ 5233,10085}, - { 5724, 9994},{ 6169, 9918},{ 6582, 9863},{ 6962, 9813}, - { 7316, 9759},{ 7645, 9707},{ 7948, 9660},{ 8262, 9623} - } - }, - { - /*Cb qi=27 INTRA*/ - { - { 4, 3},{ 79, 368},{ 137, 745},{ 166, 1180}, - { 200, 1622},{ 253, 2030},{ 324, 2381},{ 407, 2671}, - { 487, 2925},{ 567, 3156},{ 640, 3372},{ 712, 3580}, - { 782, 3792},{ 833, 4015},{ 887, 4227},{ 954, 4422}, - { 1031, 4592},{ 1103, 4738},{ 1187, 4856},{ 1280, 4990}, - { 1371, 5135},{ 1442, 5244},{ 1520, 5321},{ 1684, 5398} - }, - /*Cb qi=27 INTER*/ - { - { 113, 20},{ 90, 338},{ 66, 661},{ 67, 1034}, - { 82, 1438},{ 97, 1842},{ 108, 2238},{ 115, 2624}, - { 123, 3000},{ 130, 3361},{ 138, 3708},{ 146, 4040}, - { 155, 4367},{ 164, 4688},{ 174, 4999},{ 186, 5306}, - { 203, 5609},{ 222, 5908},{ 243, 6202},{ 268, 6494}, - { 295, 6781},{ 326, 7058},{ 367, 7319},{ 420, 7551} - } - }, - { - /*Cr qi=27 INTRA*/ - { - { 5, 6},{ 75, 380},{ 133, 770},{ 173, 1217}, - { 214, 1650},{ 268, 2040},{ 337, 2375},{ 418, 2631}, - { 496, 2862},{ 558, 3104},{ 625, 3346},{ 692, 3571}, - { 753, 3786},{ 825, 3989},{ 896, 4182},{ 969, 4352}, - { 1059, 4479},{ 1144, 4614},{ 1212, 4757},{ 1284, 4871}, - { 1380, 4982},{ 1457, 5125},{ 1528, 5267},{ 1651, 5346} - }, - /*Cr qi=27 INTER*/ - { - { 92, 24},{ 74, 341},{ 61, 669},{ 71, 1049}, - { 88, 1448},{ 100, 1849},{ 107, 2243},{ 113, 2631}, - { 119, 3010},{ 125, 3373},{ 131, 3723},{ 137, 4064}, - { 146, 4396},{ 159, 4720},{ 172, 5033},{ 189, 5340}, - { 210, 5636},{ 233, 5920},{ 256, 6197},{ 282, 6465}, - { 310, 6730},{ 332, 7000},{ 359, 7259},{ 385, 7515} - } - } - }, - { - { - /*Y' qi=28 INTRA*/ - { - { 116, -8},{ 314, 1400},{ 640, 2458},{ 1013, 3197}, - { 1386, 3768},{ 1762, 4279},{ 2151, 4733},{ 2558, 5117}, - { 2970, 5442},{ 3393, 5714},{ 3820, 5935},{ 4243, 6069}, - { 4671, 6161},{ 5074, 6289},{ 5456, 6457},{ 5849, 6598}, - { 6244, 6689},{ 6632, 6777},{ 6984, 6833},{ 7294, 6855}, - { 7625, 6862},{ 7961, 6875},{ 8302, 6890},{ 8720, 6883} - }, - /*Y' qi=28 INTER*/ - { - { 54, 8},{ 81, 1333},{ 154, 2793},{ 231, 4138}, - { 304, 5352},{ 384, 6512},{ 519, 7585},{ 743, 8508}, - { 1082, 9236},{ 1587, 9717},{ 2267, 9928},{ 3034, 9944}, - { 3775, 9878},{ 4438, 9786},{ 5031, 9686},{ 5563, 9601}, - { 6042, 9523},{ 6481, 9456},{ 6890, 9405},{ 7266, 9356}, - { 7614, 9313},{ 7933, 9265},{ 8238, 9220},{ 8545, 9193} - } - }, - { - /*Cb qi=28 INTRA*/ - { - { 3, 3},{ 80, 368},{ 138, 746},{ 168, 1179}, - { 208, 1615},{ 268, 2014},{ 345, 2354},{ 432, 2637}, - { 515, 2884},{ 595, 3108},{ 669, 3323},{ 745, 3533}, - { 818, 3740},{ 876, 3953},{ 932, 4160},{ 1003, 4349}, - { 1088, 4501},{ 1154, 4648},{ 1241, 4768},{ 1349, 4889}, - { 1441, 5023},{ 1524, 5113},{ 1611, 5187},{ 1783, 5283} - }, - /*Cb qi=28 INTER*/ - { - { 117, 29},{ 91, 341},{ 65, 663},{ 68, 1038}, - { 85, 1440},{ 100, 1841},{ 110, 2234},{ 119, 2616}, - { 127, 2985},{ 135, 3342},{ 142, 3685},{ 151, 4015}, - { 162, 4337},{ 174, 4652},{ 186, 4960},{ 201, 5264}, - { 218, 5567},{ 239, 5863},{ 266, 6149},{ 295, 6434}, - { 328, 6715},{ 371, 6976},{ 409, 7239},{ 460, 7477} - } - }, - { - /*Cr qi=28 INTRA*/ - { - { 6, 7},{ 79, 381},{ 138, 771},{ 178, 1215}, - { 222, 1644},{ 285, 2026},{ 359, 2347},{ 441, 2597}, - { 521, 2827},{ 588, 3066},{ 655, 3303},{ 725, 3523}, - { 791, 3728},{ 870, 3920},{ 950, 4103},{ 1030, 4265}, - { 1121, 4388},{ 1198, 4520},{ 1266, 4659},{ 1356, 4759}, - { 1461, 4865},{ 1540, 4993},{ 1619, 5115},{ 1786, 5160} - }, - /*Cr qi=28 INTER*/ - { - { 96, 18},{ 78, 340},{ 66, 672},{ 74, 1051}, - { 90, 1450},{ 103, 1845},{ 110, 2235},{ 116, 2619}, - { 122, 2995},{ 129, 3356},{ 137, 3702},{ 146, 4038}, - { 156, 4365},{ 168, 4684},{ 182, 4995},{ 203, 5297}, - { 227, 5588},{ 253, 5866},{ 282, 6131},{ 311, 6394}, - { 339, 6664},{ 366, 6918},{ 400, 7171},{ 424, 7450} - } - } - }, - { - { - /*Y' qi=29 INTRA*/ - { - { 112, 7},{ 334, 1382},{ 681, 2410},{ 1081, 3112}, - { 1484, 3650},{ 1894, 4128},{ 2316, 4547},{ 2749, 4905}, - { 3188, 5208},{ 3634, 5458},{ 4079, 5666},{ 4517, 5791}, - { 4952, 5870},{ 5359, 5983},{ 5754, 6137},{ 6165, 6268}, - { 6568, 6351},{ 6958, 6423},{ 7320, 6471},{ 7638, 6490}, - { 7979, 6490},{ 8313, 6499},{ 8651, 6517},{ 9085, 6499} - }, - /*Y' qi=29 INTER*/ - { - { 55, 15},{ 85, 1336},{ 160, 2780},{ 242, 4104}, - { 323, 5302},{ 418, 6443},{ 586, 7480},{ 859, 8342}, - { 1278, 8982},{ 1888, 9347},{ 2658, 9457},{ 3457, 9425}, - { 4192, 9343},{ 4842, 9247},{ 5417, 9162},{ 5935, 9086}, - { 6404, 9011},{ 6841, 8952},{ 7241, 8907},{ 7609, 8867}, - { 7953, 8832},{ 8267, 8792},{ 8562, 8740},{ 8836, 8701} - } - }, - { - /*Cb qi=29 INTRA*/ - { - { 5, 3},{ 84, 368},{ 144, 746},{ 176, 1175}, - { 219, 1604},{ 285, 1991},{ 372, 2318},{ 462, 2591}, - { 546, 2833},{ 628, 3058},{ 704, 3274},{ 788, 3473}, - { 870, 3664},{ 935, 3865},{ 995, 4059},{ 1072, 4239}, - { 1167, 4388},{ 1248, 4518},{ 1334, 4634},{ 1429, 4765}, - { 1536, 4884},{ 1628, 4964},{ 1716, 5038},{ 1885, 5128} - }, - /*Cb qi=29 INTER*/ - { - { 126, 25},{ 95, 340},{ 69, 662},{ 71, 1039}, - { 88, 1440},{ 102, 1839},{ 113, 2227},{ 122, 2604}, - { 132, 2969},{ 141, 3320},{ 151, 3659},{ 161, 3985}, - { 172, 4301},{ 186, 4612},{ 200, 4917},{ 219, 5213}, - { 241, 5509},{ 265, 5800},{ 296, 6081},{ 329, 6360}, - { 369, 6633},{ 414, 6899},{ 465, 7148},{ 520, 7387} - } - }, - { - /*Cr qi=29 INTRA*/ - { - { 6, 7},{ 82, 382},{ 142, 772},{ 185, 1211}, - { 233, 1632},{ 303, 2000},{ 388, 2306},{ 475, 2550}, - { 556, 2779},{ 627, 3007},{ 707, 3237},{ 778, 3459}, - { 843, 3654},{ 927, 3834},{ 1012, 4012},{ 1101, 4152}, - { 1197, 4262},{ 1275, 4399},{ 1359, 4511},{ 1455, 4596}, - { 1562, 4708},{ 1644, 4833},{ 1719, 4954},{ 1888, 4988} - }, - /*Cr qi=29 INTER*/ - { - { 101, 28},{ 81, 343},{ 67, 673},{ 75, 1053}, - { 93, 1450},{ 106, 1844},{ 113, 2230},{ 119, 2610}, - { 127, 2980},{ 135, 3334},{ 143, 3676},{ 153, 4007}, - { 165, 4330},{ 180, 4645},{ 201, 4951},{ 224, 5243}, - { 253, 5522},{ 284, 5794},{ 314, 6060},{ 345, 6322}, - { 381, 6578},{ 419, 6828},{ 455, 7073},{ 495, 7316} - } - } - }, - { - { - /*Y' qi=30 INTRA*/ - { - { 112, 8},{ 335, 1380},{ 682, 2401},{ 1083, 3093}, - { 1489, 3619},{ 1902, 4092},{ 2332, 4511},{ 2777, 4865}, - { 3231, 5156},{ 3693, 5394},{ 4153, 5585},{ 4605, 5689}, - { 5049, 5764},{ 5468, 5871},{ 5875, 6004},{ 6295, 6120}, - { 6706, 6201},{ 7099, 6273},{ 7461, 6311},{ 7785, 6320}, - { 8128, 6322},{ 8469, 6331},{ 8806, 6342},{ 9220, 6338} - }, - /*Y' qi=30 INTER*/ - { - { 58, 8},{ 90, 1340},{ 169, 2771},{ 257, 4079}, - { 345, 5266},{ 459, 6387},{ 660, 7383},{ 990, 8180}, - { 1496, 8726},{ 2203, 8992},{ 3029, 9038},{ 3833, 8984}, - { 4549, 8900},{ 5183, 8813},{ 5745, 8735},{ 6250, 8674}, - { 6715, 8619},{ 7138, 8565},{ 7529, 8528},{ 7899, 8495}, - { 8234, 8465},{ 8550, 8429},{ 8856, 8395},{ 9160, 8374} - } - }, - { - /*Cb qi=30 INTRA*/ - { - { 7, 3},{ 88, 369},{ 149, 747},{ 185, 1175}, - { 232, 1599},{ 304, 1976},{ 392, 2293},{ 486, 2557}, - { 573, 2797},{ 656, 3027},{ 735, 3243},{ 819, 3442}, - { 903, 3629},{ 966, 3828},{ 1025, 4027},{ 1105, 4204}, - { 1201, 4343},{ 1282, 4469},{ 1379, 4575},{ 1486, 4689}, - { 1588, 4813},{ 1678, 4900},{ 1767, 4969},{ 1911, 5080} - }, - /*Cb qi=30 INTER*/ - { - { 120, 23},{ 96, 336},{ 72, 661},{ 75, 1043}, - { 91, 1441},{ 105, 1837},{ 117, 2221},{ 127, 2592}, - { 137, 2953},{ 148, 3301},{ 159, 3635},{ 170, 3959}, - { 184, 4271},{ 199, 4578},{ 216, 4879},{ 238, 5175}, - { 262, 5466},{ 294, 5750},{ 332, 6027},{ 373, 6298}, - { 421, 6559},{ 473, 6805},{ 526, 7053},{ 587, 7298} - } - }, - { - /*Cr qi=30 INTRA*/ - { - { 10, 7},{ 89, 384},{ 147, 773},{ 192, 1211}, - { 245, 1627},{ 322, 1984},{ 412, 2280},{ 501, 2520}, - { 583, 2750},{ 654, 2982},{ 736, 3207},{ 810, 3419}, - { 873, 3614},{ 957, 3794},{ 1048, 3965},{ 1139, 4102}, - { 1237, 4208},{ 1327, 4328},{ 1408, 4448},{ 1496, 4545}, - { 1604, 4652},{ 1699, 4760},{ 1780, 4877},{ 1937, 4942} - }, - /*Cr qi=30 INTER*/ - { - { 115, 26},{ 89, 342},{ 70, 672},{ 79, 1055}, - { 96, 1451},{ 108, 1841},{ 116, 2222},{ 124, 2599}, - { 132, 2965},{ 141, 3316},{ 151, 3655},{ 163, 3984}, - { 178, 4301},{ 197, 4609},{ 219, 4909},{ 247, 5195}, - { 280, 5469},{ 317, 5734},{ 351, 5991},{ 383, 6248}, - { 423, 6500},{ 467, 6744},{ 502, 6995},{ 558, 7226} - } - } - }, - { - { - /*Y' qi=31 INTRA*/ - { - { 116, 20},{ 359, 1361},{ 732, 2350},{ 1162, 3010}, - { 1597, 3507},{ 2042, 3950},{ 2503, 4339},{ 2974, 4670}, - { 3446, 4951},{ 3922, 5179},{ 4394, 5357},{ 4858, 5454}, - { 5313, 5519},{ 5734, 5626},{ 6154, 5755},{ 6585, 5859}, - { 7004, 5928},{ 7408, 5998},{ 7775, 6039},{ 8102, 6048}, - { 8442, 6051},{ 8790, 6054},{ 9136, 6057},{ 9554, 6041} - }, - /*Y' qi=31 INTER*/ - { - { 53, 12},{ 90, 1340},{ 169, 2765},{ 259, 4062}, - { 353, 5236},{ 483, 6340},{ 713, 7305},{ 1086, 8059}, - { 1651, 8548},{ 2423, 8751},{ 3288, 8754},{ 4106, 8674}, - { 4827, 8572},{ 5451, 8482},{ 6007, 8407},{ 6514, 8344}, - { 6970, 8282},{ 7397, 8225},{ 7795, 8193},{ 8159, 8161}, - { 8498, 8120},{ 8814, 8093},{ 9127, 8066},{ 9432, 8040} - } - }, - { - /*Cb qi=31 INTRA*/ - { - { 7, 3},{ 88, 369},{ 149, 746},{ 185, 1173}, - { 234, 1595},{ 308, 1967},{ 399, 2278},{ 494, 2537}, - { 583, 2774},{ 669, 2997},{ 755, 3204},{ 847, 3390}, - { 936, 3569},{ 1008, 3759},{ 1078, 3942},{ 1162, 4104}, - { 1262, 4238},{ 1352, 4364},{ 1442, 4470},{ 1557, 4567}, - { 1676, 4674},{ 1759, 4781},{ 1850, 4853},{ 2043, 4897} - }, - /*Cb qi=31 INTER*/ - { - { 121, 23},{ 96, 335},{ 72, 660},{ 74, 1043}, - { 90, 1440},{ 105, 1834},{ 116, 2217},{ 127, 2586}, - { 138, 2945},{ 148, 3293},{ 159, 3626},{ 172, 3945}, - { 185, 4256},{ 202, 4559},{ 223, 4856},{ 245, 5150}, - { 272, 5440},{ 306, 5719},{ 346, 5989},{ 391, 6253}, - { 443, 6511},{ 510, 6743},{ 583, 6965},{ 651, 7182} - } - }, - { - /*Cr qi=31 INTRA*/ - { - { 10, 7},{ 88, 384},{ 147, 773},{ 192, 1209}, - { 247, 1622},{ 326, 1974},{ 417, 2262},{ 509, 2500}, - { 596, 2726},{ 670, 2949},{ 754, 3170},{ 836, 3370}, - { 912, 3548},{ 999, 3724},{ 1093, 3888},{ 1198, 4000}, - { 1304, 4095},{ 1384, 4230},{ 1470, 4347},{ 1577, 4422}, - { 1696, 4513},{ 1798, 4620},{ 1869, 4746},{ 1991, 4798} - }, - /*Cr qi=31 INTER*/ - { - { 113, 32},{ 88, 345},{ 69, 674},{ 79, 1055}, - { 96, 1451},{ 108, 1839},{ 115, 2218},{ 123, 2592}, - { 132, 2957},{ 141, 3308},{ 151, 3643},{ 163, 3968}, - { 179, 4285},{ 200, 4590},{ 225, 4886},{ 254, 5169}, - { 291, 5436},{ 330, 5696},{ 368, 5951},{ 409, 6200}, - { 452, 6448},{ 493, 6695},{ 536, 6940},{ 571, 7204} - } - } - }, - { - { - /*Y' qi=32 INTRA*/ - { - { 123, 26},{ 370, 1356},{ 756, 2321},{ 1211, 2944}, - { 1674, 3408},{ 2148, 3826},{ 2639, 4193},{ 3138, 4504}, - { 3634, 4765},{ 4133, 4973},{ 4625, 5137},{ 5101, 5225}, - { 5567, 5274},{ 6002, 5363},{ 6437, 5482},{ 6885, 5566}, - { 7312, 5625},{ 7723, 5686},{ 8101, 5721},{ 8429, 5732}, - { 8769, 5728},{ 9120, 5726},{ 9472, 5723},{ 9918, 5700} - }, - /*Y' qi=32 INTER*/ - { - { 54, -3},{ 95, 1343},{ 179, 2750},{ 276, 4027}, - { 382, 5185},{ 543, 6256},{ 830, 7161},{ 1301, 7815}, - { 2003, 8172},{ 2883, 8266},{ 3779, 8217},{ 4578, 8127}, - { 5274, 8035},{ 5886, 7952},{ 6430, 7887},{ 6929, 7835}, - { 7380, 7779},{ 7796, 7737},{ 8190, 7705},{ 8552, 7672}, - { 8896, 7640},{ 9210, 7612},{ 9510, 7589},{ 9746, 7552} - } - }, - { - /*Cb qi=32 INTRA*/ - { - { 6, 3},{ 89, 369},{ 153, 746},{ 193, 1167}, - { 247, 1577},{ 330, 1935},{ 429, 2236},{ 528, 2494}, - { 620, 2732},{ 712, 2948},{ 801, 3146},{ 898, 3325}, - { 999, 3489},{ 1078, 3664},{ 1155, 3832},{ 1251, 3985}, - { 1360, 4115},{ 1451, 4236},{ 1549, 4338},{ 1667, 4433}, - { 1797, 4522},{ 1891, 4613},{ 1989, 4687},{ 2162, 4776} - }, - /*Cb qi=32 INTER*/ - { - { 116, -1},{ 98, 321},{ 80, 656},{ 80, 1042}, - { 96, 1438},{ 110, 1827},{ 122, 2205},{ 133, 2570}, - { 144, 2925},{ 157, 3268},{ 170, 3597},{ 185, 3911}, - { 202, 4216},{ 221, 4516},{ 244, 4809},{ 273, 5096}, - { 308, 5376},{ 350, 5644},{ 401, 5907},{ 459, 6160}, - { 520, 6401},{ 592, 6630},{ 676, 6837},{ 758, 7050} - } - }, - { - /*Cr qi=32 INTRA*/ - { - { 12, 7},{ 91, 386},{ 152, 773},{ 201, 1202}, - { 261, 1603},{ 347, 1942},{ 447, 2223},{ 540, 2460}, - { 626, 2684},{ 711, 2901},{ 801, 3115},{ 887, 3312}, - { 969, 3480},{ 1068, 3633},{ 1176, 3779},{ 1283, 3885}, - { 1392, 3969},{ 1485, 4090},{ 1573, 4206},{ 1686, 4274}, - { 1813, 4354},{ 1911, 4459},{ 2004, 4563},{ 2162, 4590} - }, - /*Cr qi=32 INTER*/ - { - { 129, 5},{ 98, 334},{ 75, 673},{ 84, 1055}, - { 101, 1448},{ 113, 1832},{ 121, 2206},{ 129, 2577}, - { 140, 2937},{ 151, 3282},{ 163, 3614},{ 179, 3932}, - { 198, 4240},{ 221, 4542},{ 252, 4830},{ 290, 5102}, - { 329, 5364},{ 373, 5618},{ 420, 5864},{ 468, 6105}, - { 513, 6351},{ 564, 6587},{ 624, 6810},{ 697, 7017} - } - } - }, - { - { - /*Y' qi=33 INTRA*/ - { - { 115, 36},{ 388, 1338},{ 791, 2289},{ 1258, 2899}, - { 1732, 3352},{ 2220, 3760},{ 2730, 4117},{ 3244, 4415}, - { 3751, 4662},{ 4261, 4858},{ 4766, 5012},{ 5249, 5094}, - { 5719, 5141},{ 6159, 5225},{ 6597, 5333},{ 7044, 5416}, - { 7474, 5472},{ 7893, 5531},{ 8268, 5570},{ 8591, 5580}, - { 8931, 5578},{ 9283, 5579},{ 9634, 5582},{10067, 5560} - }, - /*Y' qi=33 INTER*/ - { - { 65, -14},{ 102, 1345},{ 190, 2736},{ 294, 3999}, - { 411, 5146},{ 597, 6192},{ 934, 7045},{ 1488, 7622}, - { 2281, 7895},{ 3213, 7937},{ 4108, 7871},{ 4883, 7784}, - { 5556, 7709},{ 6150, 7643},{ 6685, 7585},{ 7176, 7539}, - { 7620, 7502},{ 8034, 7466},{ 8427, 7435},{ 8793, 7409}, - { 9136, 7386},{ 9446, 7364},{ 9743, 7339},{10025, 7303} - } - }, - { - /*Cb qi=33 INTRA*/ - { - { 5, 3},{ 92, 369},{ 159, 746},{ 203, 1163}, - { 263, 1564},{ 353, 1911},{ 458, 2204},{ 557, 2460}, - { 650, 2697},{ 744, 2913},{ 836, 3110},{ 934, 3292}, - { 1036, 3454},{ 1125, 3616},{ 1204, 3781},{ 1298, 3932}, - { 1410, 4058},{ 1507, 4170},{ 1606, 4265},{ 1725, 4358}, - { 1853, 4445},{ 1955, 4535},{ 2067, 4597},{ 2258, 4663} - }, - /*Cb qi=33 INTER*/ - { - { 109, 37},{ 94, 343},{ 81, 662},{ 85, 1042}, - { 102, 1436},{ 116, 1823},{ 128, 2195},{ 141, 2554}, - { 154, 2906},{ 167, 3246},{ 183, 3570},{ 202, 3881}, - { 220, 4185},{ 241, 4482},{ 268, 4772},{ 302, 5053}, - { 341, 5328},{ 388, 5592},{ 446, 5846},{ 507, 6096}, - { 581, 6328},{ 670, 6534},{ 762, 6731},{ 842, 6922} - } - }, - { - /*Cr qi=33 INTRA*/ - { - { 11, 7},{ 93, 387},{ 158, 774},{ 211, 1197}, - { 278, 1589},{ 372, 1917},{ 475, 2191},{ 569, 2429}, - { 658, 2655},{ 744, 2868},{ 835, 3083},{ 926, 3271}, - { 1010, 3430},{ 1110, 3586},{ 1224, 3724},{ 1336, 3826}, - { 1449, 3908},{ 1547, 4021},{ 1636, 4136},{ 1751, 4200}, - { 1886, 4277},{ 1977, 4384},{ 2070, 4474},{ 2232, 4510} - }, - /*Cr qi=33 INTER*/ - { - { 77, 9},{ 90, 347},{ 80, 674},{ 91, 1053}, - { 107, 1444},{ 119, 1825},{ 127, 2196},{ 137, 2563}, - { 149, 2919},{ 161, 3259},{ 176, 3588},{ 194, 3905}, - { 217, 4209},{ 246, 4504},{ 280, 4786},{ 320, 5055}, - { 364, 5316},{ 409, 5565},{ 460, 5804},{ 517, 6039}, - { 578, 6264},{ 640, 6489},{ 701, 6721},{ 772, 6948} - } - } - }, - { - { - /*Y' qi=34 INTRA*/ - { - { 124, 40},{ 401, 1333},{ 823, 2262},{ 1318, 2842}, - { 1823, 3265},{ 2339, 3650},{ 2872, 3991},{ 3405, 4274}, - { 3926, 4513},{ 4448, 4704},{ 4961, 4845},{ 5450, 4921}, - { 5925, 4971},{ 6372, 5053},{ 6813, 5160},{ 7264, 5242}, - { 7704, 5291},{ 8124, 5346},{ 8500, 5382},{ 8831, 5384}, - { 9178, 5380},{ 9525, 5387},{ 9869, 5389},{10310, 5356} - }, - /*Y' qi=34 INTER*/ - { - { 64, -17},{ 101, 1344},{ 190, 2730},{ 299, 3981}, - { 430, 5110},{ 648, 6127},{ 1036, 6933},{ 1664, 7445}, - { 2535, 7652},{ 3504, 7653},{ 4402, 7572},{ 5173, 7479}, - { 5843, 7400},{ 6441, 7334},{ 6976, 7280},{ 7464, 7231}, - { 7910, 7189},{ 8332, 7157},{ 8730, 7125},{ 9091, 7103}, - { 9422, 7086},{ 9753, 7061},{10067, 7036},{10316, 7029} - } - }, - { - /*Cb qi=34 INTRA*/ - { - { 5, 3},{ 91, 369},{ 158, 746},{ 204, 1162}, - { 266, 1561},{ 358, 1903},{ 466, 2189},{ 570, 2439}, - { 665, 2671},{ 765, 2880},{ 864, 3069},{ 970, 3238}, - { 1079, 3392},{ 1174, 3545},{ 1265, 3693},{ 1360, 3841}, - { 1471, 3968},{ 1572, 4083},{ 1675, 4181},{ 1804, 4255}, - { 1939, 4332},{ 2048, 4411},{ 2155, 4484},{ 2339, 4584} - }, - /*Cb qi=34 INTER*/ - { - { 99, 44},{ 92, 345},{ 82, 661},{ 86, 1043}, - { 101, 1436},{ 116, 1821},{ 128, 2191},{ 140, 2549}, - { 154, 2898},{ 168, 3235},{ 185, 3556},{ 203, 3865}, - { 224, 4166},{ 248, 4457},{ 278, 4741},{ 315, 5021}, - { 361, 5289},{ 416, 5546},{ 483, 5792},{ 559, 6025}, - { 651, 6237},{ 752, 6432},{ 849, 6626},{ 967, 6790} - } - }, - { - /*Cr qi=34 INTRA*/ - { - { 11, 7},{ 93, 387},{ 158, 773},{ 212, 1195}, - { 282, 1584},{ 378, 1909},{ 483, 2179},{ 578, 2414}, - { 671, 2633},{ 766, 2837},{ 866, 3038},{ 960, 3223}, - { 1049, 3376},{ 1158, 3520},{ 1285, 3644},{ 1400, 3740}, - { 1505, 3828},{ 1616, 3928},{ 1713, 4030},{ 1820, 4104}, - { 1957, 4185},{ 2063, 4280},{ 2160, 4355},{ 2320, 4341} - }, - /*Cr qi=34 INTER*/ - { - { 78, 11},{ 89, 347},{ 79, 674},{ 90, 1053}, - { 106, 1444},{ 117, 1823},{ 127, 2192},{ 137, 2558}, - { 149, 2912},{ 163, 3249},{ 178, 3574},{ 197, 3888}, - { 222, 4189},{ 252, 4481},{ 293, 4755},{ 341, 5013}, - { 386, 5268},{ 436, 5512},{ 498, 5743},{ 563, 5970}, - { 622, 6200},{ 694, 6415},{ 776, 6622},{ 871, 6818} - } - } - }, - { - { - /*Y' qi=35 INTRA*/ - { - { 116, 51},{ 433, 1312},{ 881, 2221},{ 1406, 2771}, - { 1948, 3156},{ 2511, 3501},{ 3085, 3811},{ 3654, 4066}, - { 4212, 4273},{ 4763, 4444},{ 5298, 4572},{ 5799, 4638}, - { 6285, 4678},{ 6747, 4746},{ 7203, 4838},{ 7673, 4905}, - { 8124, 4950},{ 8552, 5003},{ 8938, 5027},{ 9275, 5026}, - { 9628, 5019},{ 9981, 5024},{10331, 5030},{10795, 5000} - }, - /*Y' qi=35 INTER*/ - { - { 71, -10},{ 108, 1348},{ 203, 2710},{ 325, 3938}, - { 485, 5040},{ 766, 6000},{ 1267, 6706},{ 2048, 7089}, - { 3037, 7191},{ 4032, 7146},{ 4903, 7061},{ 5648, 6977}, - { 6301, 6912},{ 6884, 6857},{ 7413, 6812},{ 7898, 6775}, - { 8342, 6739},{ 8764, 6710},{ 9160, 6688},{ 9519, 6668}, - { 9859, 6646},{10190, 6625},{10492, 6612},{10755, 6595} - } - }, - { - /*Cb qi=35 INTRA*/ - { - { 6, 3},{ 95, 369},{ 164, 746},{ 214, 1156}, - { 287, 1542},{ 390, 1869},{ 504, 2143},{ 611, 2388}, - { 712, 2613},{ 822, 2811},{ 937, 2987},{ 1055, 3147}, - { 1174, 3285},{ 1286, 3420},{ 1386, 3560},{ 1488, 3698}, - { 1604, 3814},{ 1714, 3916},{ 1825, 4008},{ 1958, 4088}, - { 2101, 4159},{ 2224, 4226},{ 2339, 4292},{ 2538, 4383} - }, - /*Cb qi=35 INTER*/ - { - { 98, 41},{ 90, 348},{ 86, 665},{ 92, 1042}, - { 108, 1432},{ 122, 1812},{ 136, 2175},{ 151, 2528}, - { 165, 2872},{ 182, 3202},{ 202, 3516},{ 225, 3819}, - { 251, 4112},{ 281, 4398},{ 320, 4675},{ 367, 4944}, - { 421, 5204},{ 493, 5450},{ 579, 5679},{ 672, 5892}, - { 785, 6082},{ 906, 6258},{ 1026, 6432},{ 1153, 6592} - } - }, - { - /*Cr qi=35 INTRA*/ - { - { 12, 7},{ 98, 388},{ 166, 773},{ 226, 1187}, - { 306, 1563},{ 411, 1874},{ 524, 2134},{ 622, 2365}, - { 721, 2577},{ 826, 2768},{ 947, 2946},{ 1066, 3106}, - { 1163, 3250},{ 1274, 3395},{ 1417, 3508},{ 1539, 3590}, - { 1639, 3671},{ 1754, 3765},{ 1865, 3855},{ 1979, 3921}, - { 2127, 3998},{ 2249, 4085},{ 2346, 4172},{ 2473, 4210} - }, - /*Cr qi=35 INTER*/ - { - { 86, 12},{ 94, 354},{ 85, 677},{ 96, 1052}, - { 113, 1439},{ 125, 1811},{ 135, 2177},{ 147, 2537}, - { 160, 2884},{ 177, 3215},{ 195, 3535},{ 219, 3842}, - { 252, 4133},{ 292, 4413},{ 339, 4680},{ 396, 4928}, - { 455, 5169},{ 514, 5408},{ 588, 5626},{ 672, 5835}, - { 750, 6051},{ 837, 6257},{ 943, 6442},{ 1073, 6595} - } - } - }, - { - { - /*Y' qi=36 INTRA*/ - { - { 116, 52},{ 432, 1312},{ 881, 2215},{ 1407, 2759}, - { 1948, 3140},{ 2511, 3484},{ 3090, 3789},{ 3672, 4036}, - { 4243, 4236},{ 4803, 4397},{ 5346, 4517},{ 5856, 4581}, - { 6350, 4614},{ 6821, 4675},{ 7286, 4763},{ 7754, 4832}, - { 8201, 4875},{ 8631, 4922},{ 9015, 4948},{ 9351, 4945}, - { 9706, 4941},{10061, 4948},{10408, 4949},{10878, 4923} - }, - /*Y' qi=36 INTER*/ - { - { 63, -16},{ 114, 1332},{ 216, 2690},{ 343, 3914}, - { 515, 5009},{ 829, 5939},{ 1399, 6586},{ 2263, 6901}, - { 3290, 6967},{ 4272, 6920},{ 5115, 6847},{ 5839, 6779}, - { 6478, 6726},{ 7051, 6685},{ 7571, 6649},{ 8050, 6614}, - { 8495, 6587},{ 8908, 6567},{ 9298, 6550},{ 9673, 6530}, - {10005, 6512},{10324, 6499},{10640, 6483},{10936, 6487} - } - }, - { - /*Cb qi=36 INTRA*/ - { - { 6, 3},{ 98, 370},{ 170, 746},{ 225, 1150}, - { 306, 1527},{ 416, 1845},{ 534, 2116},{ 642, 2363}, - { 743, 2591},{ 851, 2794},{ 964, 2972},{ 1081, 3133}, - { 1198, 3275},{ 1311, 3410},{ 1411, 3547},{ 1519, 3680}, - { 1642, 3789},{ 1750, 3892},{ 1860, 3982},{ 1998, 4054}, - { 2141, 4129},{ 2256, 4204},{ 2372, 4278},{ 2567, 4356} - }, - /*Cb qi=36 INTER*/ - { - { 107, 30},{ 96, 346},{ 88, 667},{ 100, 1039}, - { 115, 1426},{ 128, 1804},{ 142, 2164},{ 158, 2512}, - { 176, 2851},{ 195, 3178},{ 218, 3491},{ 243, 3791}, - { 270, 4084},{ 307, 4365},{ 348, 4638},{ 397, 4908}, - { 464, 5157},{ 545, 5392},{ 635, 5620},{ 734, 5831}, - { 854, 6015},{ 993, 6170},{ 1124, 6327},{ 1234, 6502} - } - }, - { - /*Cr qi=36 INTRA*/ - { - { 12, 7},{ 102, 388},{ 172, 773},{ 239, 1182}, - { 328, 1546},{ 439, 1848},{ 554, 2106},{ 651, 2341}, - { 747, 2561},{ 850, 2757},{ 972, 2934},{ 1086, 3097}, - { 1182, 3245},{ 1302, 3382},{ 1447, 3491},{ 1572, 3567}, - { 1677, 3641},{ 1793, 3733},{ 1899, 3828},{ 2013, 3894}, - { 2163, 3967},{ 2283, 4059},{ 2387, 4142},{ 2559, 4145} - }, - /*Cr qi=36 INTER*/ - { - { 98, -10},{ 96, 347},{ 89, 676},{ 102, 1048}, - { 118, 1433},{ 130, 1804},{ 141, 2167},{ 154, 2523}, - { 171, 2866},{ 190, 3194},{ 212, 3508},{ 240, 3809}, - { 276, 4099},{ 320, 4377},{ 372, 4638},{ 428, 4887}, - { 492, 5122},{ 560, 5353},{ 638, 5572},{ 725, 5779}, - { 814, 5985},{ 902, 6192},{ 1013, 6377},{ 1155, 6527} - } - } - }, - { - { - /*Y' qi=37 INTRA*/ - { - { 109, 58},{ 445, 1302},{ 927, 2177},{ 1489, 2689}, - { 2053, 3052},{ 2632, 3387},{ 3230, 3683},{ 3830, 3922}, - { 4417, 4114},{ 4992, 4266},{ 5546, 4375},{ 6067, 4430}, - { 6571, 4459},{ 7046, 4516},{ 7513, 4599},{ 7991, 4663}, - { 8445, 4706},{ 8883, 4749},{ 9273, 4771},{ 9612, 4770}, - { 9970, 4765},{10325, 4773},{10672, 4778},{11106, 4758} - }, - /*Y' qi=37 INTER*/ - { - { 56, -14},{ 114, 1333},{ 218, 2683},{ 354, 3894}, - { 550, 4966},{ 916, 5854},{ 1569, 6437},{ 2520, 6685}, - { 3596, 6704},{ 4585, 6635},{ 5424, 6556},{ 6147, 6489}, - { 6787, 6437},{ 7358, 6395},{ 7876, 6358},{ 8361, 6325}, - { 8807, 6294},{ 9229, 6271},{ 9631, 6253},{10002, 6238}, - {10356, 6228},{10678, 6212},{10975, 6197},{11274, 6185} - } - }, - { - /*Cb qi=37 INTRA*/ - { - { 6, 3},{ 99, 370},{ 171, 746},{ 227, 1149}, - { 309, 1522},{ 421, 1836},{ 541, 2104},{ 652, 2347}, - { 757, 2572},{ 871, 2768},{ 989, 2936},{ 1111, 3087}, - { 1238, 3223},{ 1357, 3352},{ 1465, 3486},{ 1576, 3612}, - { 1709, 3705},{ 1828, 3801},{ 1937, 3895},{ 2076, 3967}, - { 2220, 4035},{ 2345, 4104},{ 2466, 4173},{ 2680, 4265} - }, - /*Cb qi=37 INTER*/ - { - { 111, 27},{ 97, 344},{ 87, 667},{ 99, 1038}, - { 115, 1425},{ 128, 1802},{ 143, 2160},{ 159, 2506}, - { 176, 2843},{ 198, 3167},{ 220, 3477},{ 247, 3774}, - { 280, 4061},{ 321, 4338},{ 368, 4608},{ 427, 4867}, - { 501, 5109},{ 595, 5332},{ 701, 5544},{ 818, 5738}, - { 956, 5905},{ 1105, 6066},{ 1248, 6217},{ 1381, 6353} - } - }, - { - /*Cr qi=37 INTRA*/ - { - { 12, 7},{ 102, 388},{ 173, 773},{ 242, 1180}, - { 331, 1541},{ 444, 1839},{ 562, 2095},{ 662, 2326}, - { 763, 2540},{ 871, 2728},{ 1003, 2892},{ 1130, 3045}, - { 1230, 3188},{ 1350, 3321},{ 1503, 3418},{ 1634, 3492}, - { 1737, 3568},{ 1856, 3653},{ 1970, 3744},{ 2091, 3802}, - { 2247, 3871},{ 2371, 3962},{ 2477, 4041},{ 2655, 4052} - }, - /*Cr qi=37 INTER*/ - { - { 89, -9},{ 97, 347},{ 88, 677},{ 102, 1048}, - { 118, 1432},{ 130, 1802},{ 141, 2163},{ 154, 2517}, - { 172, 2857},{ 192, 3181},{ 216, 3494},{ 246, 3793}, - { 286, 4074},{ 337, 4343},{ 395, 4600},{ 464, 4837}, - { 534, 5066},{ 608, 5289},{ 694, 5501},{ 788, 5704}, - { 893, 5901},{ 1010, 6088},{ 1151, 6249},{ 1331, 6374} - } - } - }, - { - { - /*Y' qi=38 INTRA*/ - { - { 107, 65},{ 476, 1286},{ 968, 2148},{ 1548, 2641}, - { 2141, 2979},{ 2757, 3289},{ 3390, 3564},{ 4020, 3784}, - { 4632, 3957},{ 5224, 4097},{ 5794, 4201},{ 6326, 4250}, - { 6828, 4274},{ 7309, 4322},{ 7790, 4401},{ 8271, 4463}, - { 8729, 4498},{ 9165, 4540},{ 9552, 4566},{ 9901, 4560}, - {10266, 4552},{10617, 4563},{10964, 4572},{11393, 4567} - }, - /*Y' qi=38 INTER*/ - { - { 57, -13},{ 118, 1332},{ 233, 2665},{ 386, 3856}, - { 620, 4899},{ 1070, 5722},{ 1849, 6211},{ 2898, 6384}, - { 3989, 6376},{ 4947, 6311},{ 5754, 6249},{ 6454, 6199}, - { 7077, 6161},{ 7640, 6132},{ 8159, 6101},{ 8639, 6076}, - { 9081, 6054},{ 9502, 6037},{ 9900, 6027},{10274, 6012}, - {10621, 5999},{10938, 5991},{11237, 5977},{11557, 5966} - } - }, - { - /*Cb qi=38 INTRA*/ - { - { 8, 3},{ 104, 370},{ 179, 744},{ 243, 1139}, - { 338, 1498},{ 458, 1801},{ 584, 2060},{ 700, 2297}, - { 812, 2514},{ 935, 2699},{ 1061, 2858},{ 1189, 3007}, - { 1321, 3141},{ 1446, 3266},{ 1563, 3388},{ 1684, 3512}, - { 1816, 3614},{ 1942, 3702},{ 2055, 3793},{ 2201, 3857}, - { 2357, 3923},{ 2477, 3994},{ 2593, 4061},{ 2768, 4178} - }, - /*Cb qi=38 INTER*/ - { - { 118, 24},{ 102, 342},{ 91, 663},{ 101, 1040}, - { 116, 1427},{ 131, 1799},{ 147, 2152},{ 168, 2491}, - { 191, 2822},{ 215, 3139},{ 244, 3441},{ 276, 3731}, - { 316, 4013},{ 363, 4286},{ 423, 4546},{ 495, 4795}, - { 584, 5028},{ 691, 5242},{ 814, 5439},{ 959, 5608}, - { 1119, 5759},{ 1277, 5906},{ 1449, 6035},{ 1655, 6144} - } - }, - { - /*Cr qi=38 INTRA*/ - { - { 12, 6},{ 106, 387},{ 182, 771},{ 261, 1168}, - { 364, 1514},{ 483, 1802},{ 603, 2053},{ 707, 2282}, - { 817, 2489},{ 933, 2670},{ 1074, 2825},{ 1210, 2967}, - { 1320, 3104},{ 1444, 3229},{ 1599, 3324},{ 1735, 3396}, - { 1846, 3464},{ 1971, 3547},{ 2086, 3646},{ 2206, 3711}, - { 2366, 3773},{ 2499, 3859},{ 2603, 3945},{ 2766, 3952} - }, - /*Cr qi=38 INTER*/ - { - { 86, -9},{ 91, 352},{ 85, 680},{ 102, 1053}, - { 119, 1435},{ 132, 1799},{ 146, 2153},{ 162, 2501}, - { 183, 2835},{ 209, 3154},{ 240, 3458},{ 278, 3751}, - { 327, 4025},{ 388, 4284},{ 455, 4532},{ 529, 4766}, - { 616, 4980},{ 711, 5188},{ 815, 5386},{ 920, 5583}, - { 1042, 5770},{ 1186, 5936},{ 1348, 6080},{ 1542, 6196} - } - } - }, - { - { - /*Y' qi=39 INTRA*/ - { - { 103, 66},{ 479, 1283},{ 998, 2125},{ 1610, 2591}, - { 2223, 2913},{ 2855, 3214},{ 3501, 3482},{ 4146, 3698}, - { 4772, 3868},{ 5376, 3999},{ 5956, 4095},{ 6496, 4140}, - { 7008, 4162},{ 7499, 4209},{ 7987, 4282},{ 8478, 4338}, - { 8947, 4374},{ 9385, 4417},{ 9783, 4437},{10143, 4433}, - {10504, 4424},{10866, 4435},{11225, 4444},{11665, 4430} - }, - /*Y' qi=39 INTER*/ - { - { 56, 2},{ 118, 1332},{ 235, 2660},{ 395, 3843}, - { 653, 4867},{ 1153, 5652},{ 2003, 6089},{ 3113, 6214}, - { 4228, 6178},{ 5189, 6102},{ 6002, 6031},{ 6707, 5976}, - { 7336, 5936},{ 7901, 5900},{ 8424, 5870},{ 8915, 5844}, - { 9361, 5822},{ 9784, 5807},{10187, 5794},{10571, 5778}, - {10931, 5763},{11264, 5751},{11582, 5742},{11916, 5730} - } - }, - { - /*Cb qi=39 INTRA*/ - { - { 8, 3},{ 104, 370},{ 179, 744},{ 244, 1138}, - { 340, 1496},{ 461, 1796},{ 588, 2053},{ 705, 2288}, - { 820, 2503},{ 945, 2684},{ 1073, 2840},{ 1210, 2981}, - { 1352, 3106},{ 1480, 3225},{ 1603, 3342},{ 1728, 3464}, - { 1865, 3559},{ 1990, 3645},{ 2106, 3734},{ 2258, 3796}, - { 2413, 3856},{ 2540, 3920},{ 2667, 3986},{ 2887, 4060} - }, - /*Cb qi=39 INTER*/ - { - { 119, 19},{ 103, 340},{ 90, 664},{ 100, 1040}, - { 115, 1426},{ 131, 1797},{ 148, 2148},{ 169, 2486}, - { 192, 2816},{ 217, 3131},{ 247, 3432},{ 282, 3721}, - { 324, 3999},{ 374, 4268},{ 435, 4526},{ 520, 4766}, - { 621, 4990},{ 738, 5194},{ 878, 5376},{ 1035, 5543}, - { 1202, 5686},{ 1374, 5819},{ 1545, 5950},{ 1729, 6064} - } - }, - { - /*Cr qi=39 INTRA*/ - { - { 12, 6},{ 106, 387},{ 182, 771},{ 262, 1167}, - { 365, 1512},{ 486, 1798},{ 608, 2047},{ 713, 2274}, - { 824, 2479},{ 945, 2655},{ 1091, 2804},{ 1231, 2941}, - { 1346, 3073},{ 1475, 3194},{ 1633, 3282},{ 1778, 3345}, - { 1891, 3414},{ 2013, 3501},{ 2138, 3584},{ 2266, 3640}, - { 2428, 3701},{ 2568, 3782},{ 2674, 3863},{ 2816, 3894} - }, - /*Cr qi=39 INTER*/ - { - { 88, -7},{ 92, 352},{ 85, 680},{ 102, 1053}, - { 119, 1434},{ 132, 1797},{ 146, 2151},{ 163, 2498}, - { 185, 2830},{ 211, 3147},{ 243, 3451},{ 285, 3735}, - { 337, 4005},{ 401, 4260},{ 477, 4499},{ 565, 4721}, - { 655, 4937},{ 749, 5148},{ 858, 5344},{ 979, 5529}, - { 1110, 5710},{ 1264, 5871},{ 1460, 5990},{ 1677, 6086} - } - } - }, - { - { - /*Y' qi=40 INTRA*/ - { - { 98, 71},{ 491, 1274},{ 1023, 2103},{ 1641, 2559}, - { 2257, 2877},{ 2898, 3171},{ 3566, 3429},{ 4233, 3629}, - { 4881, 3784},{ 5499, 3906},{ 6088, 3997},{ 6631, 4040}, - { 7145, 4060},{ 7640, 4107},{ 8128, 4178},{ 8618, 4233}, - { 9077, 4267},{ 9514, 4304},{ 9919, 4324},{10277, 4317}, - {10635, 4312},{10985, 4324},{11338, 4331},{11792, 4334} - }, - /*Y' qi=40 INTER*/ - { - { 63, -26},{ 125, 1331},{ 256, 2640},{ 439, 3801}, - { 757, 4782},{ 1391, 5474},{ 2399, 5805},{ 3582, 5870}, - { 4678, 5824},{ 5600, 5763},{ 6386, 5710},{ 7076, 5667}, - { 7693, 5637},{ 8252, 5610},{ 8775, 5586},{ 9255, 5571}, - { 9694, 5556},{10115, 5541},{10530, 5530},{10903, 5522}, - {11242, 5515},{11596, 5501},{11904, 5482},{12205, 5475} - } - }, - { - /*Cb qi=40 INTRA*/ - { - { 8, 3},{ 108, 371},{ 189, 743},{ 265, 1128}, - { 371, 1475},{ 499, 1767},{ 628, 2022},{ 746, 2256}, - { 864, 2467},{ 991, 2647},{ 1124, 2801},{ 1270, 2933}, - { 1412, 3054},{ 1547, 3165},{ 1677, 3277},{ 1804, 3393}, - { 1946, 3483},{ 2078, 3569},{ 2201, 3651},{ 2352, 3711}, - { 2513, 3766},{ 2643, 3826},{ 2775, 3880},{ 3025, 3919} - }, - /*Cb qi=40 INTER*/ - { - { 114, 35},{ 104, 349},{ 96, 667},{ 106, 1040}, - { 121, 1423},{ 138, 1789},{ 158, 2132},{ 184, 2464}, - { 212, 2787},{ 242, 3095},{ 279, 3389},{ 321, 3671}, - { 374, 3941},{ 438, 4199},{ 517, 4446},{ 617, 4673}, - { 740, 4881},{ 891, 5064},{ 1058, 5225},{ 1239, 5372}, - { 1441, 5499},{ 1638, 5610},{ 1840, 5719},{ 2076, 5814} - } - }, - { - /*Cr qi=40 INTRA*/ - { - { 14, 7},{ 114, 389},{ 193, 771},{ 283, 1156}, - { 399, 1488},{ 523, 1768},{ 643, 2018},{ 752, 2245}, - { 865, 2450},{ 984, 2626},{ 1139, 2763},{ 1290, 2887}, - { 1413, 3014},{ 1550, 3128},{ 1711, 3211},{ 1865, 3268}, - { 1981, 3334},{ 2103, 3415},{ 2237, 3486},{ 2365, 3543}, - { 2529, 3610},{ 2666, 3700},{ 2775, 3779},{ 2929, 3803} - }, - /*Cr qi=40 INTER*/ - { - { 89, -8},{ 95, 353},{ 90, 681},{ 107, 1053}, - { 124, 1430},{ 139, 1787},{ 156, 2136},{ 177, 2477}, - { 203, 2803},{ 237, 3112},{ 276, 3406},{ 329, 3683}, - { 395, 3942},{ 475, 4182},{ 567, 4407},{ 665, 4624}, - { 767, 4834},{ 879, 5032},{ 1011, 5213},{ 1169, 5375}, - { 1348, 5525},{ 1547, 5654},{ 1785, 5743},{ 2066, 5787} - } - } - }, - { - { - /*Y' qi=41 INTRA*/ - { - { 98, 71},{ 495, 1272},{ 1040, 2090},{ 1675, 2533}, - { 2302, 2842},{ 2953, 3132},{ 3631, 3381},{ 4309, 3574}, - { 4966, 3726},{ 5593, 3846},{ 6189, 3934},{ 6738, 3972}, - { 7256, 3991},{ 7754, 4036},{ 8250, 4099},{ 8747, 4150}, - { 9207, 4185},{ 9650, 4222},{10057, 4242},{10411, 4237}, - {10771, 4230},{11127, 4244},{11486, 4254},{11933, 4252} - }, - /*Y' qi=41 INTER*/ - { - { 65, -25},{ 125, 1331},{ 260, 2633},{ 457, 3782}, - { 807, 4740},{ 1499, 5397},{ 2562, 5693},{ 3766, 5743}, - { 4859, 5695},{ 5776, 5638},{ 6556, 5590},{ 7243, 5554}, - { 7859, 5529},{ 8417, 5506},{ 8935, 5486},{ 9419, 5473}, - { 9869, 5460},{10296, 5446},{10711, 5436},{11089, 5430}, - {11445, 5421},{11802, 5412},{12129, 5404},{12465, 5393} - } - }, - { - /*Cb qi=41 INTRA*/ - { - { 8, 3},{ 108, 371},{ 189, 743},{ 267, 1126}, - { 374, 1471},{ 504, 1760},{ 635, 2011},{ 758, 2241}, - { 881, 2447},{ 1013, 2621},{ 1147, 2773},{ 1293, 2906}, - { 1441, 3023},{ 1580, 3131},{ 1712, 3243},{ 1844, 3360}, - { 1985, 3451},{ 2114, 3532},{ 2240, 3613},{ 2390, 3680}, - { 2550, 3740},{ 2687, 3800},{ 2825, 3862},{ 3052, 3944} - }, - /*Cb qi=41 INTER*/ - { - { 104, 39},{ 100, 350},{ 95, 667},{ 105, 1040}, - { 121, 1422},{ 137, 1787},{ 159, 2129},{ 185, 2459}, - { 216, 2778},{ 249, 3083},{ 287, 3374},{ 335, 3653}, - { 393, 3920},{ 462, 4175},{ 549, 4414},{ 660, 4636}, - { 791, 4839},{ 952, 5014},{ 1135, 5166},{ 1337, 5297}, - { 1552, 5411},{ 1752, 5530},{ 1972, 5634},{ 2224, 5724} - } - }, - { - /*Cr qi=41 INTRA*/ - { - { 15, 7},{ 115, 389},{ 193, 770},{ 284, 1154}, - { 401, 1484},{ 528, 1761},{ 652, 2005},{ 764, 2228}, - { 882, 2427},{ 1008, 2599},{ 1167, 2734},{ 1320, 2859}, - { 1443, 2990},{ 1580, 3103},{ 1743, 3181},{ 1894, 3241}, - { 2012, 3309},{ 2141, 3385},{ 2272, 3459},{ 2398, 3519}, - { 2566, 3584},{ 2707, 3680},{ 2816, 3762},{ 2991, 3770} - }, - /*Cr qi=41 INTER*/ - { - { 92, -9},{ 98, 354},{ 90, 682},{ 107, 1052}, - { 124, 1429},{ 139, 1786},{ 156, 2132},{ 178, 2471}, - { 207, 2794},{ 241, 3100},{ 285, 3391},{ 345, 3662}, - { 417, 3915},{ 503, 4151},{ 600, 4375},{ 703, 4589}, - { 815, 4791},{ 942, 4981},{ 1088, 5155},{ 1250, 5316}, - { 1432, 5462},{ 1653, 5575},{ 1930, 5639},{ 2250, 5655} - } - } - }, - { - { - /*Y' qi=42 INTRA*/ - { - { 109, 75},{ 534, 1257},{ 1114, 2047},{ 1793, 2456}, - { 2461, 2735},{ 3157, 2994},{ 3879, 3221},{ 4595, 3396}, - { 5282, 3531},{ 5931, 3638},{ 6546, 3714},{ 7105, 3749}, - { 7633, 3766},{ 8147, 3803},{ 8652, 3865},{ 9148, 3915}, - { 9613, 3946},{10075, 3976},{10489, 3997},{10835, 3994}, - {11195, 3985},{11553, 3997},{11909, 4004},{12369, 3990} - }, - /*Y' qi=42 INTER*/ - { - { 69, -23},{ 134, 1332},{ 287, 2611},{ 521, 3730}, - { 970, 4624},{ 1827, 5176},{ 3028, 5382},{ 4262, 5389}, - { 5325, 5338},{ 6214, 5291},{ 6976, 5255},{ 7651, 5228}, - { 8260, 5206},{ 8821, 5190},{ 9343, 5177},{ 9823, 5165}, - {10273, 5152},{10709, 5143},{11121, 5136},{11502, 5129}, - {11857, 5125},{12193, 5115},{12520, 5107},{12802, 5097} - } - }, - { - /*Cb qi=42 INTRA*/ - { - { 9, 3},{ 113, 371},{ 199, 743},{ 279, 1123}, - { 390, 1462},{ 525, 1743},{ 662, 1986},{ 789, 2208}, - { 916, 2406},{ 1057, 2571},{ 1204, 2712},{ 1362, 2835}, - { 1524, 2943},{ 1676, 3040},{ 1815, 3145},{ 1959, 3249}, - { 2117, 3325},{ 2249, 3406},{ 2377, 3488},{ 2537, 3547}, - { 2706, 3597},{ 2854, 3646},{ 2999, 3705},{ 3236, 3759} - }, - /*Cb qi=42 INTER*/ - { - { 114, 44},{ 107, 353},{ 101, 670},{ 111, 1041}, - { 129, 1418},{ 148, 1775},{ 174, 2110},{ 208, 2432}, - { 244, 2746},{ 283, 3046},{ 330, 3330},{ 388, 3602}, - { 460, 3858},{ 546, 4101},{ 655, 4326},{ 793, 4530}, - { 966, 4703},{ 1165, 4851},{ 1388, 4980},{ 1630, 5088}, - { 1869, 5189},{ 2122, 5268},{ 2403, 5328},{ 2667, 5417} - } - }, - { - /*Cr qi=42 INTRA*/ - { - { 15, 7},{ 120, 390},{ 202, 771},{ 298, 1150}, - { 421, 1473},{ 553, 1743},{ 681, 1982},{ 796, 2199}, - { 923, 2388},{ 1062, 2547},{ 1225, 2678},{ 1392, 2792}, - { 1531, 2907},{ 1682, 3007},{ 1856, 3074},{ 2009, 3134}, - { 2138, 3192},{ 2274, 3257},{ 2407, 3333},{ 2536, 3393}, - { 2711, 3455},{ 2875, 3531},{ 3000, 3598},{ 3186, 3599} - }, - /*Cr qi=42 INTER*/ - { - { 87, -4},{ 95, 358},{ 97, 683},{ 113, 1052}, - { 131, 1423},{ 148, 1774},{ 170, 2116},{ 198, 2448}, - { 234, 2762},{ 276, 3062},{ 331, 3343},{ 404, 3603}, - { 494, 3844},{ 598, 4067},{ 715, 4276},{ 842, 4471}, - { 977, 4661},{ 1128, 4840},{ 1311, 4991},{ 1516, 5127}, - { 1759, 5233},{ 2050, 5300},{ 2377, 5323},{ 2710, 5304} - } - } - }, - { - { - /*Y' qi=43 INTRA*/ - { - { 99, 79},{ 557, 1244},{ 1175, 2016},{ 1882, 2408}, - { 2570, 2677},{ 3288, 2926},{ 4030, 3141},{ 4760, 3307}, - { 5458, 3435},{ 6115, 3537},{ 6743, 3608},{ 7312, 3636}, - { 7841, 3652},{ 8357, 3687},{ 8870, 3742},{ 9376, 3788}, - { 9850, 3821},{10315, 3853},{10734, 3873},{11084, 3870}, - {11442, 3862},{11800, 3874},{12160, 3879},{12618, 3876} - }, - /*Y' qi=43 INTER*/ - { - { 69, -22},{ 134, 1331},{ 294, 2601},{ 551, 3703}, - { 1056, 4563},{ 2003, 5061},{ 3276, 5215},{ 4534, 5194}, - { 5599, 5133},{ 6488, 5083},{ 7257, 5044},{ 7938, 5014}, - { 8556, 4992},{ 9124, 4975},{ 9648, 4960},{10138, 4948}, - {10594, 4939},{11039, 4926},{11462, 4919},{11847, 4912}, - {12216, 4904},{12570, 4896},{12883, 4889},{13189, 4879} - } - }, - { - /*Cb qi=43 INTRA*/ - { - { 9, 3},{ 114, 371},{ 202, 740},{ 294, 1110}, - { 417, 1440},{ 558, 1716},{ 700, 1956},{ 833, 2172}, - { 966, 2365},{ 1116, 2524},{ 1269, 2661},{ 1431, 2781}, - { 1599, 2885},{ 1756, 2980},{ 1902, 3082},{ 2051, 3185}, - { 2209, 3261},{ 2337, 3342},{ 2464, 3420},{ 2633, 3475}, - { 2809, 3525},{ 2948, 3579},{ 3094, 3633},{ 3347, 3678} - }, - /*Cb qi=43 INTER*/ - { - { 111, 44},{ 106, 353},{ 102, 670},{ 112, 1040}, - { 128, 1416},{ 148, 1771},{ 176, 2104},{ 211, 2424}, - { 250, 2734},{ 293, 3030},{ 347, 3309},{ 411, 3575}, - { 490, 3828},{ 589, 4064},{ 716, 4278},{ 869, 4472}, - { 1050, 4640},{ 1264, 4781},{ 1512, 4895},{ 1775, 4991}, - { 2042, 5069},{ 2310, 5141},{ 2593, 5207},{ 2912, 5239} - } - }, - { - /*Cr qi=43 INTRA*/ - { - { 15, 7},{ 121, 390},{ 208, 767},{ 315, 1135}, - { 449, 1449},{ 586, 1715},{ 718, 1950},{ 843, 2158}, - { 977, 2342},{ 1120, 2501},{ 1290, 2632},{ 1466, 2739}, - { 1613, 2845},{ 1763, 2945},{ 1937, 3015},{ 2093, 3070}, - { 2225, 3126},{ 2366, 3194},{ 2501, 3267},{ 2634, 3324}, - { 2815, 3385},{ 2964, 3466},{ 3087, 3538},{ 3263, 3555} - }, - /*Cr qi=43 INTER*/ - { - { 84, -4},{ 93, 358},{ 95, 683},{ 113, 1052}, - { 131, 1421},{ 148, 1770},{ 171, 2110},{ 201, 2439}, - { 240, 2750},{ 287, 3046},{ 348, 3322},{ 429, 3576}, - { 527, 3811},{ 641, 4029},{ 767, 4230},{ 904, 4422}, - { 1053, 4603},{ 1225, 4765},{ 1433, 4903},{ 1661, 5030}, - { 1928, 5121},{ 2252, 5160},{ 2604, 5164},{ 2979, 5125} - } - } - }, - { - { - /*Y' qi=44 INTRA*/ - { - { 103, 80},{ 560, 1244},{ 1183, 2009},{ 1891, 2391}, - { 2586, 2649},{ 3324, 2884},{ 4093, 3089},{ 4850, 3243}, - { 5575, 3358},{ 6252, 3452},{ 6886, 3518},{ 7459, 3546}, - { 7993, 3562},{ 8515, 3594},{ 9030, 3645},{ 9534, 3691}, - {10004, 3723},{10469, 3750},{10887, 3765},{11236, 3766}, - {11596, 3762},{11960, 3775},{12317, 3784},{12766, 3789} - }, - /*Y' qi=44 INTER*/ - { - { 77, -24},{ 145, 1332},{ 332, 2580},{ 642, 3649}, - { 1270, 4438},{ 2360, 4860},{ 3685, 4982},{ 4910, 4966}, - { 5929, 4928},{ 6785, 4900},{ 7529, 4880},{ 8198, 4863}, - { 8804, 4850},{ 9361, 4842},{ 9882, 4836},{10371, 4830}, - {10827, 4822},{11262, 4816},{11672, 4811},{12052, 4807}, - {12431, 4806},{12780, 4798},{13095, 4792},{13401, 4791} - } - }, - { - /*Cb qi=44 INTRA*/ - { - { 9, 2},{ 122, 371},{ 214, 741},{ 307, 1109}, - { 433, 1432},{ 576, 1704},{ 718, 1939},{ 855, 2152}, - { 991, 2340},{ 1141, 2497},{ 1298, 2632},{ 1463, 2749}, - { 1636, 2851},{ 1796, 2944},{ 1947, 3041},{ 2101, 3140}, - { 2260, 3219},{ 2392, 3297},{ 2527, 3366},{ 2693, 3424}, - { 2872, 3477},{ 3025, 3525},{ 3175, 3584},{ 3451, 3626} - }, - /*Cb qi=44 INTER*/ - { - { 111, 14},{ 110, 339},{ 109, 671},{ 120, 1040}, - { 139, 1410},{ 162, 1758},{ 197, 2084},{ 243, 2397}, - { 291, 2702},{ 342, 2992},{ 405, 3265},{ 484, 3521}, - { 584, 3760},{ 705, 3983},{ 855, 4185},{ 1048, 4356}, - { 1274, 4500},{ 1531, 4617},{ 1816, 4707},{ 2111, 4783}, - { 2409, 4846},{ 2720, 4901},{ 3044, 4957},{ 3391, 4985} - } - }, - { - /*Cr qi=44 INTRA*/ - { - { 17, 7},{ 128, 392},{ 219, 770},{ 329, 1135}, - { 465, 1442},{ 601, 1703},{ 734, 1935},{ 862, 2142}, - { 998, 2325},{ 1147, 2482},{ 1321, 2606},{ 1496, 2710}, - { 1649, 2813},{ 1809, 2908},{ 1984, 2977},{ 2143, 3032}, - { 2279, 3087},{ 2423, 3152},{ 2559, 3225},{ 2684, 3288}, - { 2866, 3351},{ 3025, 3426},{ 3161, 3492},{ 3372, 3500} - }, - /*Cr qi=44 INTER*/ - { - { 89, 0},{ 101, 352},{ 104, 683},{ 121, 1051}, - { 141, 1414},{ 163, 1757},{ 192, 2092},{ 231, 2415}, - { 278, 2720},{ 336, 3007},{ 412, 3273},{ 510, 3516}, - { 633, 3733},{ 769, 3936},{ 914, 4130},{ 1076, 4307}, - { 1256, 4472},{ 1469, 4617},{ 1723, 4732},{ 2012, 4822}, - { 2347, 4871},{ 2716, 4875},{ 3082, 4866},{ 3422, 4826} - } - } - }, - { - { - /*Y' qi=45 INTRA*/ - { - { 119, 78},{ 610, 1226},{ 1271, 1965},{ 2026, 2319}, - { 2768, 2550},{ 3556, 2757},{ 4369, 2938},{ 5157, 3076}, - { 5901, 3182},{ 6598, 3268},{ 7253, 3326},{ 7844, 3343}, - { 8392, 3356},{ 8922, 3386},{ 9453, 3433},{ 9973, 3474}, - {10457, 3503},{10929, 3530},{11351, 3543},{11709, 3541}, - {12068, 3537},{12434, 3547},{12805, 3555},{13268, 3563} - }, - /*Y' qi=45 INTER*/ - { - { 77, -20},{ 146, 1330},{ 342, 2566},{ 699, 3604}, - { 1439, 4332},{ 2669, 4672},{ 4075, 4727},{ 5318, 4679}, - { 6345, 4630},{ 7209, 4595},{ 7963, 4570},{ 8644, 4551}, - { 9262, 4535},{ 9831, 4525},{10370, 4515},{10872, 4506}, - {11334, 4500},{11783, 4492},{12219, 4489},{12617, 4483}, - {12995, 4477},{13350, 4472},{13674, 4466},{13968, 4468} - } - }, - { - /*Cb qi=45 INTRA*/ - { - { 9, 2},{ 122, 370},{ 219, 735},{ 324, 1096}, - { 465, 1414},{ 619, 1679},{ 771, 1905},{ 920, 2103}, - { 1070, 2276},{ 1236, 2419},{ 1410, 2539},{ 1595, 2644}, - { 1784, 2736},{ 1949, 2831},{ 2104, 2931},{ 2275, 3021}, - { 2443, 3092},{ 2586, 3166},{ 2735, 3234},{ 2904, 3288}, - { 3093, 3338},{ 3262, 3382},{ 3419, 3427},{ 3708, 3456} - }, - /*Cb qi=45 INTER*/ - { - { 103, 0},{ 109, 339},{ 109, 670},{ 119, 1039}, - { 137, 1408},{ 162, 1754},{ 199, 2076},{ 248, 2386}, - { 301, 2684},{ 360, 2967},{ 433, 3234},{ 525, 3481}, - { 640, 3713},{ 780, 3924},{ 956, 4110},{ 1176, 4266}, - { 1438, 4390},{ 1736, 4481},{ 2057, 4553},{ 2385, 4613}, - { 2718, 4656},{ 3056, 4698},{ 3416, 4733},{ 3799, 4755} - } - }, - { - /*Cr qi=45 INTRA*/ - { - { 16, 7},{ 128, 391},{ 225, 763},{ 350, 1120}, - { 500, 1420},{ 649, 1673},{ 792, 1893},{ 929, 2089}, - { 1084, 2257},{ 1250, 2401},{ 1440, 2518},{ 1633, 2614}, - { 1799, 2708},{ 1968, 2798},{ 2151, 2863},{ 2314, 2914}, - { 2453, 2968},{ 2611, 3025},{ 2759, 3095},{ 2887, 3160}, - { 3082, 3210},{ 3259, 3278},{ 3403, 3342},{ 3593, 3354} - }, - /*Cr qi=45 INTER*/ - { - { 92, 0},{ 101, 352},{ 103, 682},{ 120, 1049}, - { 140, 1412},{ 163, 1752},{ 193, 2083},{ 234, 2402}, - { 287, 2702},{ 353, 2983},{ 442, 3240},{ 557, 3471}, - { 694, 3680},{ 846, 3873},{ 1014, 4056},{ 1200, 4224}, - { 1414, 4369},{ 1664, 4495},{ 1946, 4595},{ 2278, 4654}, - { 2654, 4673},{ 3047, 4658},{ 3438, 4627},{ 3825, 4585} - } - } - }, - { - { - /*Y' qi=46 INTRA*/ - { - { 119, 78},{ 610, 1227},{ 1277, 1960},{ 2043, 2309}, - { 2805, 2529},{ 3618, 2719},{ 4452, 2887},{ 5257, 3016}, - { 6017, 3115},{ 6727, 3195},{ 7392, 3248},{ 7984, 3267}, - { 8528, 3281},{ 9059, 3310},{ 9593, 3354},{10119, 3395}, - {10599, 3425},{11064, 3450},{11493, 3464},{11850, 3466}, - {12207, 3462},{12578, 3471},{12948, 3480},{13407, 3487} - }, - /*Y' qi=46 INTER*/ - { - { 74, -14},{ 149, 1326},{ 382, 2538},{ 807, 3541}, - { 1670, 4211},{ 3000, 4499},{ 4416, 4533},{ 5628, 4490}, - { 6628, 4453},{ 7479, 4425},{ 8228, 4406},{ 8902, 4393}, - { 9521, 4380},{10090, 4371},{10623, 4364},{11124, 4356}, - {11586, 4351},{12043, 4344},{12476, 4341},{12863, 4340}, - {13244, 4337},{13610, 4329},{13936, 4324},{14246, 4329} - } - }, - { - /*Cb qi=46 INTRA*/ - { - { 11, 2},{ 132, 371},{ 234, 737},{ 340, 1094}, - { 481, 1405},{ 637, 1667},{ 791, 1891},{ 944, 2084}, - { 1099, 2253},{ 1268, 2392},{ 1444, 2507},{ 1633, 2610}, - { 1825, 2700},{ 1990, 2794},{ 2147, 2895},{ 2321, 2984}, - { 2493, 3053},{ 2640, 3126},{ 2787, 3198},{ 2954, 3253}, - { 3146, 3297},{ 3313, 3344},{ 3473, 3393},{ 3757, 3434} - }, - /*Cb qi=46 INTER*/ - { - { 97, 0},{ 109, 339},{ 108, 669},{ 120, 1035}, - { 142, 1398},{ 173, 1737},{ 221, 2052},{ 281, 2353}, - { 345, 2646},{ 415, 2924},{ 504, 3183},{ 616, 3421}, - { 749, 3643},{ 914, 3842},{ 1123, 4012},{ 1379, 4150}, - { 1685, 4250},{ 2014, 4327},{ 2366, 4382},{ 2731, 4426}, - { 3083, 4470},{ 3445, 4490},{ 3805, 4511},{ 4146, 4539} - } - }, - { - /*Cr qi=46 INTRA*/ - { - { 19, 7},{ 137, 393},{ 237, 765},{ 364, 1116}, - { 516, 1411},{ 665, 1662},{ 809, 1880},{ 951, 2072}, - { 1109, 2236},{ 1278, 2378},{ 1474, 2491},{ 1669, 2584}, - { 1835, 2678},{ 2014, 2766},{ 2203, 2828},{ 2366, 2880}, - { 2506, 2933},{ 2661, 2988},{ 2810, 3053},{ 2941, 3116}, - { 3131, 3175},{ 3310, 3243},{ 3461, 3303},{ 3656, 3321} - }, - /*Cr qi=46 INTER*/ - { - { 91, 1},{ 103, 351},{ 104, 681},{ 121, 1046}, - { 144, 1401},{ 173, 1736},{ 213, 2060},{ 265, 2373}, - { 330, 2666},{ 410, 2938},{ 517, 3185},{ 655, 3404}, - { 815, 3601},{ 989, 3784},{ 1183, 3951},{ 1400, 4104}, - { 1649, 4241},{ 1933, 4352},{ 2261, 4427},{ 2646, 4458}, - { 3057, 4446},{ 3453, 4418},{ 3820, 4385},{ 4171, 4352} - } - } - }, - { - { - /*Y' qi=47 INTRA*/ - { - { 117, 83},{ 670, 1205},{ 1408, 1904},{ 2239, 2219}, - { 3049, 2414},{ 3905, 2584},{ 4775, 2734},{ 5610, 2852}, - { 6393, 2944},{ 7121, 3017},{ 7804, 3066},{ 8407, 3081}, - { 8957, 3093},{ 9498, 3119},{10043, 3160},{10582, 3199}, - {11083, 3226},{11561, 3250},{11993, 3263},{12352, 3264}, - {12711, 3259},{13092, 3266},{13463, 3271},{13918, 3275} - }, - /*Y' qi=47 INTER*/ - { - { 74, -11},{ 148, 1325},{ 404, 2518},{ 910, 3478}, - { 1916, 4080},{ 3369, 4298},{ 4823, 4292},{ 6035, 4238}, - { 7037, 4197},{ 7894, 4168},{ 8650, 4146},{ 9337, 4129}, - { 9968, 4116},{10549, 4105},{11096, 4096},{11605, 4089}, - {12081, 4083},{12547, 4076},{12990, 4070},{13399, 4070}, - {13776, 4065},{14133, 4059},{14486, 4057},{14842, 4053} - } - }, - { - /*Cb qi=47 INTRA*/ - { - { 11, 2},{ 133, 370},{ 242, 731},{ 367, 1077}, - { 524, 1378},{ 692, 1630},{ 860, 1844},{ 1028, 2024}, - { 1203, 2178},{ 1393, 2305},{ 1582, 2413},{ 1787, 2507}, - { 1992, 2590},{ 2175, 2676},{ 2351, 2767},{ 2534, 2851}, - { 2707, 2923},{ 2862, 2994},{ 3021, 3060},{ 3193, 3111}, - { 3396, 3147},{ 3573, 3184},{ 3752, 3220},{ 4038, 3255} - }, - /*Cb qi=47 INTER*/ - { - { 101, 0},{ 107, 339},{ 108, 667},{ 120, 1033}, - { 142, 1394},{ 175, 1729},{ 227, 2040},{ 295, 2335}, - { 369, 2619},{ 452, 2888},{ 556, 3138},{ 686, 3368}, - { 850, 3574},{ 1050, 3758},{ 1299, 3910},{ 1605, 4024}, - { 1950, 4104},{ 2317, 4163},{ 2689, 4210},{ 3077, 4239}, - { 3466, 4258},{ 3840, 4278},{ 4205, 4298},{ 4515, 4340} - } - }, - { - /*Cr qi=47 INTRA*/ - { - { 19, 7},{ 138, 392},{ 248, 758},{ 396, 1094}, - { 563, 1378},{ 723, 1621},{ 881, 1829},{ 1037, 2011}, - { 1214, 2165},{ 1410, 2290},{ 1623, 2393},{ 1834, 2480}, - { 2016, 2564},{ 2203, 2647},{ 2405, 2707},{ 2569, 2757}, - { 2709, 2810},{ 2871, 2860},{ 3027, 2924},{ 3178, 2980}, - { 3375, 3034},{ 3563, 3097},{ 3724, 3151},{ 3952, 3153} - }, - /*Cr qi=47 INTER*/ - { - { 91, 1},{ 100, 351},{ 102, 681},{ 120, 1043}, - { 144, 1397},{ 175, 1729},{ 219, 2049},{ 277, 2356}, - { 353, 2640},{ 451, 2902},{ 579, 3136},{ 739, 3342}, - { 926, 3525},{ 1125, 3698},{ 1343, 3859},{ 1595, 3998}, - { 1881, 4113},{ 2208, 4205},{ 2589, 4253},{ 3014, 4250}, - { 3444, 4220},{ 3838, 4183},{ 4196, 4147},{ 4521, 4116} - } - } - }, - { - { - /*Y' qi=48 INTRA*/ - { - { 107, 87},{ 681, 1200},{ 1456, 1883},{ 2306, 2193}, - { 3122, 2386},{ 3984, 2548},{ 4862, 2693},{ 5704, 2808}, - { 6495, 2899},{ 7232, 2970},{ 7915, 3018},{ 8524, 3034}, - { 9085, 3043},{ 9635, 3068},{10192, 3108},{10735, 3145}, - {11237, 3171},{11719, 3194},{12153, 3207},{12516, 3206}, - {12888, 3202},{13266, 3210},{13637, 3218},{14101, 3219} - }, - /*Y' qi=48 INTER*/ - { - { 83, -18},{ 147, 1328},{ 398, 2519},{ 923, 3468}, - { 1979, 4047},{ 3472, 4246},{ 4936, 4232},{ 6148, 4178}, - { 7150, 4139},{ 8007, 4111},{ 8765, 4091},{ 9458, 4076}, - {10090, 4063},{10676, 4054},{11226, 4045},{11742, 4038}, - {12223, 4033},{12686, 4029},{13127, 4022},{13527, 4015}, - {13915, 4012},{14277, 4007},{14619, 4004},{14966, 4001} - } - }, - { - /*Cb qi=48 INTRA*/ - { - { 11, 2},{ 134, 369},{ 245, 730},{ 373, 1075}, - { 531, 1374},{ 698, 1625},{ 865, 1839},{ 1033, 2019}, - { 1207, 2173},{ 1397, 2300},{ 1588, 2408},{ 1795, 2501}, - { 2003, 2581},{ 2187, 2666},{ 2362, 2757},{ 2548, 2841}, - { 2719, 2912},{ 2876, 2983},{ 3034, 3047},{ 3209, 3097}, - { 3409, 3137},{ 3589, 3178},{ 3762, 3216},{ 4004, 3252} - }, - /*Cb qi=48 INTER*/ - { - { 113, 26},{ 112, 344},{ 111, 668},{ 120, 1032}, - { 141, 1392},{ 173, 1727},{ 224, 2036},{ 290, 2330}, - { 363, 2612},{ 447, 2880},{ 551, 3130},{ 685, 3358}, - { 852, 3563},{ 1061, 3742},{ 1332, 3884},{ 1654, 3993}, - { 2011, 4068},{ 2394, 4120},{ 2782, 4160},{ 3172, 4186}, - { 3557, 4209},{ 3932, 4228},{ 4306, 4237},{ 4675, 4236} - } - }, - { - /*Cr qi=48 INTRA*/ - { - { 18, 7},{ 139, 389},{ 252, 755},{ 404, 1090}, - { 573, 1372},{ 732, 1615},{ 889, 1823},{ 1045, 2005}, - { 1222, 2159},{ 1417, 2285},{ 1631, 2387},{ 1843, 2474}, - { 2027, 2558},{ 2212, 2639},{ 2413, 2697},{ 2578, 2746}, - { 2720, 2798},{ 2887, 2852},{ 3040, 2913},{ 3181, 2970}, - { 3381, 3024},{ 3581, 3081},{ 3743, 3130},{ 3948, 3133} - }, - /*Cr qi=48 INTER*/ - { - { 89, 0},{ 106, 352},{ 105, 682},{ 120, 1044}, - { 144, 1395},{ 174, 1724},{ 215, 2044},{ 270, 2350}, - { 343, 2635},{ 441, 2895},{ 571, 3129},{ 735, 3334}, - { 926, 3518},{ 1139, 3684},{ 1371, 3836},{ 1628, 3977}, - { 1933, 4089},{ 2279, 4164},{ 2672, 4204},{ 3105, 4205}, - { 3533, 4176},{ 3931, 4135},{ 4290, 4089},{ 4624, 4057} - } - } - }, - { - { - /*Y' qi=49 INTRA*/ - { - { 120, 85},{ 706, 1194},{ 1485, 1875},{ 2348, 2187}, - { 3190, 2372},{ 4076, 2521},{ 4967, 2658},{ 5819, 2771}, - { 6611, 2861},{ 7345, 2936},{ 8026, 2990},{ 8626, 3013}, - { 9182, 3030},{ 9723, 3059},{10266, 3100},{10802, 3143}, - {11293, 3179},{11768, 3206},{12201, 3221},{12556, 3225}, - {12914, 3226},{13281, 3237},{13639, 3247},{14089, 3257} - }, - /*Y' qi=49 INTER*/ - { - { 72, -11},{ 155, 1320},{ 458, 2485},{ 1090, 3386}, - { 2284, 3907},{ 3835, 4075},{ 5272, 4064},{ 6449, 4026}, - { 7426, 4003},{ 8267, 3987},{ 9017, 3976},{ 9698, 3967}, - {10328, 3962},{10913, 3959},{11452, 3954},{11961, 3950}, - {12442, 3947},{12904, 3946},{13347, 3945},{13749, 3943}, - {14123, 3941},{14490, 3941},{14826, 3939},{15153, 3937} - } - }, - { - /*Cb qi=49 INTRA*/ - { - { 11, 2},{ 145, 369},{ 262, 729},{ 393, 1070}, - { 557, 1363},{ 731, 1607},{ 907, 1811},{ 1085, 1983}, - { 1268, 2130},{ 1465, 2251},{ 1658, 2359},{ 1868, 2454}, - { 2079, 2534},{ 2264, 2621},{ 2440, 2717},{ 2625, 2802}, - { 2792, 2878},{ 2945, 2954},{ 3106, 3021},{ 3277, 3075}, - { 3466, 3119},{ 3638, 3170},{ 3824, 3213},{ 4100, 3243} - }, - /*Cb qi=49 INTER*/ - { - { 98, -6},{ 113, 343},{ 110, 669},{ 122, 1029}, - { 149, 1380},{ 192, 1706},{ 258, 2007},{ 340, 2293}, - { 426, 2569},{ 525, 2831},{ 653, 3071},{ 814, 3287}, - { 1013, 3478},{ 1262, 3637},{ 1575, 3761},{ 1936, 3851}, - { 2328, 3910},{ 2741, 3949},{ 3163, 3970},{ 3559, 3994}, - { 3936, 4025},{ 4300, 4050},{ 4655, 4060},{ 4962, 4062} - } - }, - { - /*Cr qi=49 INTRA*/ - { - { 19, 7},{ 151, 389},{ 270, 753},{ 427, 1084}, - { 602, 1360},{ 767, 1595},{ 933, 1794},{ 1098, 1968}, - { 1285, 2115},{ 1489, 2237},{ 1699, 2342},{ 1912, 2435}, - { 2101, 2519},{ 2288, 2601},{ 2486, 2663},{ 2651, 2715}, - { 2799, 2769},{ 2958, 2825},{ 3106, 2890},{ 3257, 2948}, - { 3452, 3007},{ 3634, 3075},{ 3786, 3136},{ 3959, 3164} - }, - /*Cr qi=49 INTER*/ - { - { 85, 1},{ 103, 352},{ 104, 681},{ 121, 1039}, - { 152, 1382},{ 195, 1702},{ 248, 2015},{ 316, 2316}, - { 403, 2595},{ 520, 2847},{ 676, 3068},{ 870, 3258}, - { 1091, 3429},{ 1329, 3585},{ 1597, 3725},{ 1894, 3849}, - { 2242, 3940},{ 2656, 3984},{ 3098, 3992},{ 3531, 3981}, - { 3936, 3950},{ 4304, 3915},{ 4646, 3879},{ 4915, 3861} - } - } - }, - { - { - /*Y' qi=50 INTRA*/ - { - { 122, 89},{ 798, 1170},{ 1682, 1812},{ 2613, 2096}, - { 3501, 2260},{ 4430, 2388},{ 5352, 2510},{ 6228, 2613}, - { 7043, 2698},{ 7793, 2770},{ 8486, 2823},{ 9092, 2846}, - { 9652, 2865},{10210, 2895},{10773, 2936},{11315, 2979}, - {11817, 3014},{12297, 3041},{12734, 3057},{13097, 3064}, - {13443, 3067},{13813, 3078},{14190, 3088},{14646, 3103} - }, - /*Y' qi=50 INTER*/ - { - { 73, -11},{ 154, 1318},{ 501, 2457},{ 1281, 3291}, - { 2685, 3719},{ 4356, 3810},{ 5811, 3769},{ 6988, 3726}, - { 7976, 3700},{ 8835, 3682},{ 9606, 3669},{10307, 3659}, - {10953, 3652},{11556, 3645},{12115, 3643},{12641, 3640}, - {13138, 3636},{13613, 3634},{14068, 3629},{14488, 3627}, - {14876, 3625},{15237, 3621},{15585, 3623},{15922, 3629} - } - }, - { - /*Cb qi=50 INTRA*/ - { - { 11, 2},{ 148, 368},{ 278, 724},{ 431, 1052}, - { 613, 1334},{ 806, 1567},{ 1004, 1756},{ 1203, 1915}, - { 1405, 2051},{ 1621, 2163},{ 1833, 2262},{ 2059, 2347}, - { 2280, 2424},{ 2476, 2512},{ 2670, 2598},{ 2864, 2679}, - { 3037, 2754},{ 3201, 2826},{ 3376, 2887},{ 3562, 2936}, - { 3756, 2976},{ 3932, 3022},{ 4117, 3065},{ 4385, 3094} - }, - /*Cb qi=50 INTER*/ - { - { 92, -3},{ 112, 343},{ 109, 669},{ 121, 1027}, - { 149, 1375},{ 196, 1697},{ 270, 1992},{ 366, 2267}, - { 471, 2532},{ 594, 2782},{ 747, 3011},{ 942, 3212}, - { 1189, 3384},{ 1497, 3521},{ 1875, 3613},{ 2297, 3673}, - { 2739, 3710},{ 3195, 3725},{ 3644, 3737},{ 4057, 3751}, - { 4445, 3763},{ 4841, 3769},{ 5211, 3779},{ 5568, 3769} - } - }, - { - /*Cr qi=50 INTRA*/ - { - { 19, 7},{ 155, 388},{ 290, 744},{ 474, 1060}, - { 666, 1324},{ 847, 1549},{ 1033, 1737},{ 1219, 1898}, - { 1428, 2034},{ 1653, 2147},{ 1885, 2245},{ 2115, 2329}, - { 2316, 2410},{ 2517, 2486},{ 2730, 2539},{ 2901, 2586}, - { 3042, 2638},{ 3199, 2693},{ 3366, 2755},{ 3534, 2805}, - { 3738, 2858},{ 3934, 2916},{ 4079, 2975},{ 4257, 2992} - }, - /*Cr qi=50 INTER*/ - { - { 87, 1},{ 102, 353},{ 103, 680},{ 121, 1036}, - { 153, 1377},{ 199, 1694},{ 260, 1999},{ 339, 2291}, - { 446, 2559},{ 590, 2797},{ 780, 3003},{ 1010, 3176}, - { 1267, 3331},{ 1547, 3474},{ 1874, 3594},{ 2245, 3688}, - { 2666, 3742},{ 3130, 3758},{ 3594, 3748},{ 4028, 3711}, - { 4415, 3674},{ 4771, 3641},{ 5122, 3605},{ 5482, 3569} - } - } - }, - { - { - /*Y' qi=51 INTRA*/ - { - { 115, 93},{ 819, 1164},{ 1739, 1806},{ 2695, 2101}, - { 3612, 2257},{ 4552, 2374},{ 5479, 2490},{ 6352, 2593}, - { 7158, 2683},{ 7898, 2761},{ 8580, 2823},{ 9177, 2854}, - { 9728, 2880},{10268, 2917},{10816, 2966},{11350, 3016}, - {11834, 3058},{12311, 3089},{12741, 3109},{13092, 3119}, - {13434, 3126},{13791, 3142},{14156, 3155},{14590, 3171} - }, - /*Y' qi=51 INTER*/ - { - { 58, 0},{ 171, 1307},{ 610, 2407},{ 1563, 3175}, - { 3116, 3545},{ 4789, 3624},{ 6185, 3602},{ 7320, 3583}, - { 8282, 3574},{ 9124, 3569},{ 9878, 3567},{10569, 3565}, - {11207, 3563},{11801, 3564},{12359, 3566},{12884, 3567}, - {13373, 3568},{13841, 3567},{14289, 3566},{14699, 3568}, - {15086, 3568},{15446, 3566},{15788, 3564},{16103, 3568} - } - }, - { - /*Cb qi=51 INTRA*/ - { - { 14, 3},{ 161, 369},{ 297, 722},{ 454, 1047}, - { 639, 1325},{ 833, 1554},{ 1033, 1742},{ 1236, 1897}, - { 1440, 2032},{ 1653, 2148},{ 1860, 2253},{ 2077, 2347}, - { 2288, 2432},{ 2476, 2525},{ 2661, 2621},{ 2841, 2714}, - { 3010, 2797},{ 3170, 2876},{ 3333, 2945},{ 3510, 3000}, - { 3696, 3054},{ 3865, 3114},{ 4046, 3164},{ 4317, 3200} - }, - /*Cb qi=51 INTER*/ - { - { 88, -11},{ 109, 341},{ 109, 668},{ 126, 1019}, - { 168, 1358},{ 233, 1670},{ 329, 1955},{ 451, 2219}, - { 584, 2472},{ 736, 2711},{ 931, 2923},{ 1179, 3104}, - { 1480, 3254},{ 1846, 3368},{ 2265, 3448},{ 2714, 3501}, - { 3180, 3524},{ 3638, 3529},{ 4074, 3543},{ 4485, 3560}, - { 4868, 3571},{ 5238, 3581},{ 5597, 3594},{ 5953, 3591} - } - }, - { - /*Cr qi=51 INTRA*/ - { - { 24, 7},{ 168, 388},{ 309, 742},{ 496, 1054}, - { 688, 1316},{ 873, 1538},{ 1063, 1723},{ 1252, 1882}, - { 1460, 2018},{ 1682, 2134},{ 1907, 2238},{ 2125, 2332}, - { 2317, 2422},{ 2507, 2510},{ 2705, 2575},{ 2869, 2630}, - { 3015, 2684},{ 3178, 2744},{ 3329, 2815},{ 3477, 2878}, - { 3667, 2945},{ 3848, 3016},{ 3997, 3082},{ 4174, 3121} - }, - /*Cr qi=51 INTER*/ - { - { 83, -2},{ 102, 351},{ 102, 680},{ 126, 1029}, - { 172, 1359},{ 238, 1665},{ 321, 1962},{ 422, 2246}, - { 552, 2505},{ 733, 2728},{ 970, 2912},{ 1247, 3069}, - { 1552, 3209},{ 1876, 3338},{ 2251, 3440},{ 2692, 3502}, - { 3161, 3529},{ 3637, 3525},{ 4084, 3509},{ 4487, 3479}, - { 4850, 3444},{ 5181, 3419},{ 5507, 3406},{ 5786, 3398} - } - } - }, - { - { - /*Y' qi=52 INTRA*/ - { - { 117, 93},{ 814, 1168},{ 1729, 1822},{ 2706, 2119}, - { 3655, 2262},{ 4604, 2374},{ 5528, 2490},{ 6394, 2596}, - { 7189, 2691},{ 7921, 2777},{ 8596, 2846},{ 9184, 2885}, - { 9728, 2918},{10260, 2961},{10796, 3014},{11316, 3069}, - {11793, 3115},{12267, 3150},{12692, 3172},{13037, 3185}, - {13367, 3196},{13717, 3214},{14087, 3227},{14521, 3249} - }, - /*Y' qi=52 INTER*/ - { - { 52, 0},{ 169, 1308},{ 668, 2382},{ 1735, 3112}, - { 3384, 3451},{ 5077, 3519},{ 6461, 3506},{ 7587, 3496}, - { 8545, 3494},{ 9384, 3494},{10142, 3498},{10838, 3501}, - {11475, 3503},{12078, 3508},{12640, 3511},{13162, 3513}, - {13654, 3517},{14130, 3521},{14576, 3522},{14980, 3523}, - {15369, 3523},{15737, 3522},{16071, 3521},{16382, 3516} - } - }, - { - /*Cb qi=52 INTRA*/ - { - { 14, 3},{ 163, 369},{ 299, 722},{ 457, 1044}, - { 645, 1319},{ 843, 1545},{ 1050, 1728},{ 1261, 1879}, - { 1468, 2013},{ 1678, 2132},{ 1883, 2240},{ 2093, 2338}, - { 2301, 2428},{ 2488, 2523},{ 2667, 2619},{ 2843, 2718}, - { 3010, 2805},{ 3163, 2887},{ 3323, 2963},{ 3490, 3028}, - { 3665, 3087},{ 3841, 3145},{ 4011, 3197},{ 4289, 3230} - }, - /*Cb qi=52 INTER*/ - { - { 98, -7},{ 109, 342},{ 109, 668},{ 126, 1018}, - { 170, 1355},{ 242, 1663},{ 352, 1941},{ 490, 2195}, - { 642, 2439},{ 823, 2666},{ 1052, 2868},{ 1333, 3039}, - { 1670, 3178},{ 2074, 3280},{ 2524, 3348},{ 2996, 3390}, - { 3469, 3410},{ 3923, 3420},{ 4355, 3434},{ 4771, 3451}, - { 5166, 3468},{ 5532, 3483},{ 5885, 3499},{ 6263, 3501} - } - }, - { - /*Cr qi=52 INTRA*/ - { - { 25, 7},{ 170, 388},{ 312, 741},{ 500, 1051}, - { 694, 1310},{ 883, 1529},{ 1082, 1709},{ 1280, 1864}, - { 1491, 1998},{ 1710, 2117},{ 1932, 2225},{ 2143, 2324}, - { 2328, 2418},{ 2516, 2506},{ 2708, 2578},{ 2870, 2637}, - { 3017, 2693},{ 3170, 2758},{ 3312, 2835},{ 3455, 2901}, - { 3644, 2972},{ 3827, 3049},{ 3968, 3121},{ 4115, 3166} - }, - /*Cr qi=52 INTER*/ - { - { 86, -2},{ 101, 352},{ 100, 680},{ 126, 1028}, - { 175, 1356},{ 247, 1657},{ 341, 1948},{ 458, 2224}, - { 615, 2471},{ 828, 2681},{ 1091, 2857},{ 1395, 3008}, - { 1732, 3140},{ 2095, 3257},{ 2502, 3348},{ 2968, 3402}, - { 3457, 3420},{ 3926, 3413},{ 4360, 3388},{ 4759, 3357}, - { 5128, 3329},{ 5449, 3306},{ 5741, 3295},{ 6071, 3296} - } - } - }, - { - { - /*Y' qi=53 INTRA*/ - { - { 138, 93},{ 850, 1161},{ 1773, 1810},{ 2763, 2103}, - { 3722, 2245},{ 4675, 2360},{ 5600, 2483},{ 6464, 2597}, - { 7255, 2700},{ 7982, 2792},{ 8652, 2867},{ 9237, 2913}, - { 9775, 2950},{10302, 2998},{10834, 3058},{11347, 3121}, - {11826, 3169},{12299, 3207},{12713, 3235},{13054, 3250}, - {13387, 3265},{13744, 3286},{14110, 3302},{14515, 3323} - }, - /*Y' qi=53 INTER*/ - { - { 52, 2},{ 169, 1308},{ 680, 2377},{ 1763, 3103}, - { 3410, 3450},{ 5094, 3531},{ 6469, 3526},{ 7590, 3525}, - { 8547, 3530},{ 9385, 3534},{10139, 3540},{10835, 3548}, - {11479, 3553},{12075, 3559},{12634, 3565},{13159, 3570}, - {13650, 3573},{14124, 3576},{14575, 3580},{14993, 3583}, - {15375, 3584},{15744, 3584},{16091, 3583},{16421, 3586} - } - }, - { - /*Cb qi=53 INTRA*/ - { - { 14, 3},{ 167, 367},{ 317, 717},{ 492, 1033}, - { 687, 1306},{ 887, 1531},{ 1095, 1715},{ 1309, 1866}, - { 1517, 2000},{ 1729, 2119},{ 1932, 2227},{ 2146, 2325}, - { 2358, 2414},{ 2544, 2511},{ 2724, 2611},{ 2902, 2711}, - { 3070, 2800},{ 3227, 2878},{ 3381, 2954},{ 3548, 3021}, - { 3724, 3077},{ 3888, 3140},{ 4065, 3196},{ 4359, 3225} - }, - /*Cb qi=53 INTER*/ - { - { 93, -8},{ 110, 342},{ 108, 668},{ 125, 1018}, - { 170, 1355},{ 242, 1663},{ 353, 1939},{ 494, 2192}, - { 651, 2433},{ 838, 2658},{ 1076, 2856},{ 1368, 3022}, - { 1716, 3158},{ 2123, 3260},{ 2575, 3330},{ 3042, 3373}, - { 3507, 3396},{ 3962, 3413},{ 4394, 3430},{ 4797, 3452}, - { 5169, 3476},{ 5547, 3496},{ 5914, 3510},{ 6235, 3525} - } - }, - { - /*Cr qi=53 INTRA*/ - { - { 25, 7},{ 175, 386},{ 335, 734},{ 541, 1037}, - { 737, 1296},{ 926, 1516},{ 1125, 1696},{ 1324, 1851}, - { 1540, 1984},{ 1763, 2102},{ 1989, 2210},{ 2202, 2310}, - { 2386, 2404},{ 2572, 2495},{ 2768, 2569},{ 2929, 2627}, - { 3071, 2684},{ 3231, 2749},{ 3374, 2825},{ 3514, 2894}, - { 3703, 2963},{ 3882, 3040},{ 4024, 3111},{ 4190, 3150} - }, - /*Cr qi=53 INTER*/ - { - { 87, -1},{ 99, 352},{ 100, 680},{ 125, 1027}, - { 175, 1355},{ 249, 1657},{ 343, 1946},{ 462, 2220}, - { 624, 2465},{ 844, 2671},{ 1122, 2841},{ 1435, 2989}, - { 1768, 3125},{ 2134, 3243},{ 2545, 3334},{ 3002, 3393}, - { 3490, 3412},{ 3965, 3405},{ 4401, 3384},{ 4797, 3359}, - { 5156, 3328},{ 5482, 3297},{ 5800, 3292},{ 6135, 3293} - } - } - }, - { - { - /*Y' qi=54 INTRA*/ - { - { 184, 94},{ 902, 1151},{ 1876, 1776},{ 2881, 2057}, - { 3832, 2200},{ 4785, 2315},{ 5709, 2442},{ 6570, 2562}, - { 7362, 2672},{ 8092, 2771},{ 8760, 2852},{ 9337, 2901}, - { 9874, 2943},{10402, 2995},{10928, 3059},{11443, 3126}, - {11926, 3178},{12396, 3220},{12805, 3251},{13139, 3266}, - {13466, 3280},{13822, 3304},{14184, 3322},{14585, 3342} - }, - /*Y' qi=54 INTER*/ - { - { 60, 5},{ 169, 1308},{ 683, 2375},{ 1791, 3090}, - { 3478, 3412},{ 5184, 3470},{ 6568, 3455},{ 7697, 3446}, - { 8659, 3446},{ 9503, 3447},{10266, 3450},{10971, 3454}, - {11619, 3458},{12223, 3462},{12789, 3467},{13315, 3471}, - {13811, 3475},{14291, 3479},{14743, 3479},{15148, 3481}, - {15535, 3483},{15913, 3481},{16252, 3479},{16569, 3472} - } - }, - { - /*Cb qi=54 INTRA*/ - { - { 13, 2},{ 165, 367},{ 318, 715},{ 498, 1030}, - { 698, 1301},{ 906, 1523},{ 1121, 1703},{ 1336, 1853}, - { 1549, 1984},{ 1765, 2100},{ 1974, 2207},{ 2192, 2306}, - { 2402, 2396},{ 2587, 2493},{ 2773, 2591},{ 2953, 2691}, - { 3119, 2778},{ 3277, 2858},{ 3430, 2940},{ 3603, 3004}, - { 3788, 3059},{ 3950, 3121},{ 4128, 3173},{ 4398, 3215} - }, - /*Cb qi=54 INTER*/ - { - { 100, -3},{ 109, 343},{ 107, 668},{ 125, 1018}, - { 169, 1354},{ 241, 1662},{ 353, 1938},{ 496, 2190}, - { 655, 2431},{ 843, 2655},{ 1082, 2851},{ 1381, 3015}, - { 1739, 3146},{ 2154, 3243},{ 2610, 3310},{ 3094, 3344}, - { 3581, 3358},{ 4034, 3371},{ 4457, 3384},{ 4867, 3399}, - { 5255, 3413},{ 5630, 3425},{ 6003, 3440},{ 6346, 3440} - } - }, - { - /*Cr qi=54 INTRA*/ - { - { 23, 7},{ 174, 386},{ 338, 732},{ 549, 1034}, - { 751, 1289},{ 947, 1506},{ 1150, 1685},{ 1353, 1837}, - { 1572, 1969},{ 1800, 2087},{ 2031, 2192},{ 2248, 2291}, - { 2434, 2387},{ 2622, 2477},{ 2815, 2549},{ 2976, 2607}, - { 3126, 2663},{ 3286, 2727},{ 3427, 2807},{ 3569, 2877}, - { 3761, 2941},{ 3942, 3016},{ 4084, 3093},{ 4226, 3131} - }, - /*Cr qi=54 INTER*/ - { - { 88, -2},{ 99, 351},{ 100, 680},{ 125, 1027}, - { 175, 1354},{ 248, 1656},{ 343, 1945},{ 463, 2219}, - { 626, 2463},{ 850, 2668},{ 1128, 2837},{ 1445, 2983}, - { 1791, 3111},{ 2168, 3224},{ 2597, 3309},{ 3075, 3351}, - { 3560, 3364},{ 4029, 3356},{ 4464, 3335},{ 4858, 3307}, - { 5218, 3275},{ 5547, 3256},{ 5850, 3247},{ 6171, 3214} - } - } - }, - { - { - /*Y' qi=55 INTRA*/ - { - { 178, 95},{ 968, 1137},{ 2000, 1747},{ 3013, 2027}, - { 3966, 2173},{ 4920, 2294},{ 5842, 2427},{ 6702, 2553}, - { 7489, 2668},{ 8213, 2773},{ 8875, 2858},{ 9452, 2913}, - { 9986, 2959},{10504, 3016},{11023, 3085},{11530, 3157}, - {12011, 3213},{12480, 3257},{12882, 3291},{13214, 3310}, - {13542, 3325},{13890, 3350},{14248, 3371},{14671, 3398} - }, - /*Y' qi=55 INTER*/ - { - { 59, 5},{ 170, 1307},{ 725, 2358},{ 1886, 3058}, - { 3589, 3385},{ 5284, 3459},{ 6654, 3458},{ 7771, 3461}, - { 8727, 3470},{ 9564, 3478},{10322, 3488},{11019, 3497}, - {11658, 3505},{12258, 3513},{12819, 3520},{13344, 3527}, - {13840, 3533},{14314, 3537},{14755, 3541},{15161, 3544}, - {15552, 3548},{15916, 3548},{16257, 3548},{16576, 3540} - } - }, - { - /*Cb qi=55 INTRA*/ - { - { 13, 2},{ 167, 366},{ 322, 714},{ 508, 1026}, - { 716, 1292},{ 930, 1511},{ 1148, 1690},{ 1366, 1839}, - { 1578, 1972},{ 1793, 2090},{ 2001, 2199},{ 2217, 2300}, - { 2427, 2393},{ 2609, 2495},{ 2784, 2600},{ 2961, 2704}, - { 3121, 2797},{ 3268, 2884},{ 3423, 2965},{ 3590, 3032}, - { 3764, 3096},{ 3926, 3165},{ 4101, 3223},{ 4405, 3258} - }, - /*Cb qi=55 INTER*/ - { - { 90, -4},{ 109, 344},{ 107, 668},{ 126, 1017}, - { 172, 1351},{ 249, 1657},{ 370, 1928},{ 527, 2174}, - { 702, 2407},{ 909, 2624},{ 1170, 2814},{ 1493, 2970}, - { 1869, 3097},{ 2292, 3192},{ 2752, 3258},{ 3232, 3295}, - { 3709, 3314},{ 4156, 3335},{ 4592, 3355},{ 5004, 3373}, - { 5377, 3389},{ 5737, 3411},{ 6092, 3432},{ 6473, 3423} - } - }, - { - /*Cr qi=55 INTRA*/ - { - { 23, 7},{ 175, 385},{ 342, 730},{ 561, 1028}, - { 771, 1279},{ 973, 1493},{ 1181, 1669},{ 1384, 1822}, - { 1602, 1956},{ 1830, 2076},{ 2057, 2184},{ 2270, 2288}, - { 2452, 2389},{ 2637, 2484},{ 2823, 2559},{ 2983, 2621}, - { 3129, 2682},{ 3280, 2753},{ 3417, 2833},{ 3554, 2904}, - { 3743, 2977},{ 3921, 3060},{ 4055, 3137},{ 4185, 3186} - }, - /*Cr qi=55 INTER*/ - { - { 85, 0},{ 99, 352},{ 100, 679},{ 126, 1025}, - { 178, 1351},{ 256, 1650},{ 359, 1935},{ 493, 2202}, - { 675, 2439},{ 921, 2636},{ 1220, 2799},{ 1552, 2941}, - { 1910, 3068},{ 2303, 3177},{ 2735, 3262},{ 3206, 3311}, - { 3689, 3333},{ 4152, 3327},{ 4588, 3299},{ 4978, 3272}, - { 5325, 3243},{ 5651, 3221},{ 5969, 3210},{ 6218, 3185} - } - } - }, - { - { - /*Y' qi=56 INTRA*/ - { - { 137, 104},{ 1048, 1128},{ 2147, 1760},{ 3261, 2029}, - { 4319, 2131},{ 5310, 2234},{ 6245, 2351},{ 7101, 2464}, - { 7886, 2572},{ 8610, 2675},{ 9270, 2762},{ 9840, 2818}, - {10365, 2869},{10875, 2928},{11393, 2997},{11900, 3071}, - {12371, 3128},{12834, 3172},{13233, 3208},{13562, 3228}, - {13878, 3245},{14221, 3271},{14584, 3292},{15008, 3320} - }, - /*Y' qi=56 INTER*/ - { - { 19, 21},{ 207, 1292},{ 1031, 2252},{ 2553, 2846}, - { 4463, 3085},{ 6137, 3131},{ 7441, 3151},{ 8526, 3172}, - { 9468, 3193},{10301, 3209},{11059, 3224},{11760, 3237}, - {12405, 3249},{13008, 3261},{13570, 3270},{14100, 3278}, - {14597, 3284},{15074, 3289},{15524, 3297},{15929, 3302}, - {16314, 3306},{16675, 3307},{17004, 3305},{17288, 3301} - } - }, - { - /*Cb qi=56 INTRA*/ - { - { 16, 3},{ 188, 367},{ 353, 712},{ 546, 1017}, - { 765, 1275},{ 989, 1484},{ 1221, 1653},{ 1459, 1791}, - { 1681, 1920},{ 1893, 2046},{ 2102, 2160},{ 2323, 2257}, - { 2534, 2347},{ 2720, 2447},{ 2902, 2549},{ 3075, 2654}, - { 3239, 2749},{ 3392, 2835},{ 3544, 2920},{ 3712, 2988}, - { 3882, 3052},{ 4052, 3123},{ 4227, 3181},{ 4483, 3213} - }, - /*Cb qi=56 INTER*/ - { - { 92, -1},{ 111, 343},{ 114, 665},{ 148, 1003}, - { 224, 1321},{ 345, 1609},{ 526, 1858},{ 754, 2077}, - { 1009, 2281},{ 1319, 2464},{ 1702, 2614},{ 2145, 2732}, - { 2625, 2824},{ 3123, 2890},{ 3634, 2933},{ 4137, 2954}, - { 4614, 2965},{ 5052, 2988},{ 5468, 3015},{ 5852, 3035}, - { 6213, 3060},{ 6557, 3081},{ 6906, 3094},{ 7243, 3112} - } - }, - { - /*Cr qi=56 INTRA*/ - { - { 28, 8},{ 195, 385},{ 373, 727},{ 598, 1019}, - { 816, 1263},{ 1033, 1465},{ 1260, 1630},{ 1482, 1773}, - { 1717, 1900},{ 1949, 2018},{ 2178, 2128},{ 2393, 2233}, - { 2570, 2338},{ 2749, 2435},{ 2937, 2514},{ 3097, 2577}, - { 3240, 2638},{ 3398, 2709},{ 3540, 2791},{ 3673, 2865}, - { 3869, 2938},{ 4049, 3019},{ 4179, 3095},{ 4330, 3137} - }, - /*Cr qi=56 INTER*/ - { - { 83, 0},{ 99, 353},{ 103, 676},{ 146, 1010}, - { 232, 1320},{ 355, 1601},{ 512, 1866},{ 713, 2109}, - { 988, 2312},{ 1344, 2471},{ 1750, 2602},{ 2180, 2719}, - { 2642, 2819},{ 3141, 2892},{ 3653, 2939},{ 4159, 2961}, - { 4636, 2961},{ 5072, 2945},{ 5464, 2917},{ 5813, 2895}, - { 6134, 2890},{ 6458, 2883},{ 6735, 2881},{ 6953, 2902} - } - } - }, - { - { - /*Y' qi=57 INTRA*/ - { - { 170, 106},{ 1106, 1120},{ 2246, 1740},{ 3399, 1993}, - { 4482, 2077},{ 5492, 2167},{ 6446, 2273},{ 7324, 2379}, - { 8130, 2482},{ 8866, 2578},{ 9537, 2661},{10119, 2715}, - {10646, 2762},{11161, 2820},{11694, 2886},{12214, 2957}, - {12693, 3013},{13166, 3053},{13569, 3087},{13897, 3106}, - {14224, 3122},{14568, 3148},{14931, 3167},{15390, 3192} - }, - /*Y' qi=57 INTER*/ - { - { 19, 20},{ 205, 1292},{ 1096, 2229},{ 2775, 2766}, - { 4811, 2943},{ 6512, 2964},{ 7832, 2976},{ 8940, 2990}, - { 9903, 3004},{10755, 3017},{11532, 3029},{12243, 3039}, - {12891, 3047},{13502, 3058},{14073, 3065},{14603, 3071}, - {15097, 3078},{15581, 3083},{16036, 3086},{16452, 3090}, - {16855, 3093},{17222, 3094},{17552, 3092},{17851, 3098} - } - }, - { - /*Cb qi=57 INTRA*/ - { - { 16, 3},{ 197, 365},{ 384, 704},{ 603, 1001}, - { 837, 1252},{ 1077, 1455},{ 1326, 1618},{ 1581, 1748}, - { 1819, 1871},{ 2042, 1993},{ 2264, 2104},{ 2500, 2196}, - { 2722, 2280},{ 2916, 2375},{ 3103, 2473},{ 3290, 2575}, - { 3456, 2667},{ 3612, 2748},{ 3775, 2829},{ 3958, 2896}, - { 4145, 2947},{ 4307, 3012},{ 4476, 3070},{ 4733, 3110} - }, - /*Cb qi=57 INTER*/ - { - { 94, -1},{ 111, 344},{ 112, 665},{ 147, 1002}, - { 227, 1319},{ 353, 1604},{ 543, 1849},{ 785, 2062}, - { 1066, 2257},{ 1408, 2430},{ 1827, 2568},{ 2320, 2670}, - { 2848, 2743},{ 3386, 2791},{ 3934, 2812},{ 4453, 2820}, - { 4929, 2830},{ 5368, 2842},{ 5787, 2856},{ 6190, 2875}, - { 6554, 2896},{ 6895, 2913},{ 7229, 2927},{ 7572, 2932} - } - }, - { - /*Cr qi=57 INTRA*/ - { - { 28, 8},{ 207, 383},{ 413, 716},{ 661, 999}, - { 889, 1237},{ 1123, 1433},{ 1365, 1592},{ 1603, 1731}, - { 1853, 1852},{ 2103, 1965},{ 2345, 2072},{ 2571, 2173}, - { 2763, 2271},{ 2949, 2364},{ 3146, 2438},{ 3315, 2497}, - { 3459, 2552},{ 3618, 2616},{ 3767, 2697},{ 3906, 2773}, - { 4099, 2841},{ 4281, 2916},{ 4429, 2987},{ 4569, 3030} - }, - /*Cr qi=57 INTER*/ - { - { 85, 0},{ 99, 352},{ 102, 675},{ 147, 1008}, - { 235, 1317},{ 363, 1597},{ 529, 1858},{ 748, 2094}, - { 1050, 2287},{ 1439, 2436},{ 1877, 2557},{ 2352, 2660}, - { 2869, 2740},{ 3413, 2791},{ 3962, 2815},{ 4485, 2819}, - { 4955, 2816},{ 5382, 2800},{ 5769, 2772},{ 6107, 2748}, - { 6443, 2740},{ 6754, 2739},{ 7029, 2737},{ 7284, 2745} - } - } - }, - { - { - /*Y' qi=58 INTRA*/ - { - { 164, 109},{ 1198, 1111},{ 2396, 1737},{ 3606, 1978}, - { 4727, 2048},{ 5749, 2138},{ 6708, 2243},{ 7584, 2347}, - { 8388, 2449},{ 9122, 2549},{ 9784, 2635},{10354, 2691}, - {10876, 2740},{11385, 2800},{11912, 2869},{12429, 2941}, - {12902, 2997},{13375, 3040},{13779, 3075},{14103, 3096}, - {14435, 3112},{14783, 3140},{15141, 3160},{15599, 3186} - }, - /*Y' qi=58 INTER*/ - { - { 14, 23},{ 210, 1290},{ 1277, 2178},{ 3118, 2677}, - { 5207, 2834},{ 6902, 2857},{ 8218, 2878},{ 9323, 2900}, - {10285, 2919},{11132, 2934},{11899, 2949},{12599, 2961}, - {13235, 2971},{13835, 2982},{14394, 2991},{14917, 2997}, - {15412, 3005},{15882, 3009},{16325, 3013},{16735, 3016}, - {17131, 3018},{17501, 3021},{17824, 3021},{18125, 3016} - } - }, - { - /*Cb qi=58 INTRA*/ - { - { 17, 3},{ 200, 365},{ 389, 703},{ 613, 996}, - { 853, 1243},{ 1095, 1445},{ 1349, 1604},{ 1613, 1731}, - { 1853, 1853},{ 2074, 1978},{ 2292, 2091},{ 2526, 2184}, - { 2750, 2266},{ 2945, 2360},{ 3134, 2458},{ 3320, 2561}, - { 3482, 2654},{ 3641, 2737},{ 3804, 2818},{ 3985, 2881}, - { 4168, 2935},{ 4331, 3003},{ 4499, 3060},{ 4751, 3100} - }, - /*Cb qi=58 INTER*/ - { - { 94, -1},{ 112, 345},{ 112, 665},{ 152, 998}, - { 247, 1307},{ 406, 1580},{ 644, 1810},{ 938, 2007}, - { 1271, 2189},{ 1668, 2348},{ 2151, 2470},{ 2691, 2558}, - { 3249, 2619},{ 3798, 2659},{ 4334, 2682},{ 4849, 2692}, - { 5314, 2700},{ 5747, 2721},{ 6167, 2742},{ 6547, 2765}, - { 6902, 2790},{ 7251, 2804},{ 7583, 2819},{ 7924, 2833} - } - }, - { - /*Cr qi=58 INTRA*/ - { - { 29, 8},{ 210, 382},{ 419, 714},{ 671, 993}, - { 903, 1229},{ 1141, 1422},{ 1390, 1578},{ 1635, 1713}, - { 1889, 1833},{ 2140, 1946},{ 2379, 2055},{ 2604, 2157}, - { 2794, 2256},{ 2977, 2349},{ 3174, 2422},{ 3339, 2482}, - { 3483, 2537},{ 3643, 2604},{ 3790, 2684},{ 3927, 2757}, - { 4112, 2826},{ 4294, 2900},{ 4451, 2975},{ 4600, 3011} - }, - /*Cr qi=58 INTER*/ - { - { 86, 0},{ 99, 352},{ 103, 675},{ 151, 1004}, - { 256, 1306},{ 417, 1573},{ 628, 1819},{ 901, 2040}, - { 1262, 2217},{ 1705, 2353},{ 2191, 2466},{ 2713, 2556}, - { 3268, 2622},{ 3831, 2664},{ 4374, 2682},{ 4881, 2686}, - { 5339, 2685},{ 5747, 2668},{ 6123, 2646},{ 6465, 2630}, - { 6783, 2618},{ 7082, 2623},{ 7366, 2632},{ 7673, 2654} - } - } - }, - { - { - /*Y' qi=59 INTRA*/ - { - { 142, 112},{ 1259, 1100},{ 2552, 1711},{ 3815, 1933}, - { 4955, 1987},{ 5983, 2068},{ 6949, 2165},{ 7832, 2263}, - { 8645, 2359},{ 9392, 2454},{10066, 2536},{10643, 2589}, - {11174, 2636},{11696, 2693},{12230, 2758},{12752, 2826}, - {13239, 2883},{13721, 2926},{14139, 2959},{14479, 2978}, - {14811, 2993},{15166, 3020},{15532, 3039},{16000, 3062} - }, - /*Y' qi=59 INTER*/ - { - { 8, 25},{ 211, 1289},{ 1394, 2144},{ 3421, 2580}, - { 5611, 2689},{ 7316, 2701},{ 8643, 2717},{ 9762, 2734}, - {10735, 2750},{11587, 2763},{12353, 2775},{13056, 2785}, - {13693, 2793},{14288, 2805},{14843, 2814},{15361, 2821}, - {15857, 2827},{16328, 2831},{16763, 2834},{17171, 2838}, - {17568, 2840},{17941, 2842},{18285, 2843},{18586, 2839} - } - }, - { - /*Cb qi=59 INTRA*/ - { - { 17, 3},{ 224, 363},{ 441, 696},{ 689, 982}, - { 945, 1222},{ 1204, 1416},{ 1474, 1571},{ 1751, 1695}, - { 2001, 1816},{ 2228, 1941},{ 2453, 2055},{ 2693, 2147}, - { 2924, 2227},{ 3125, 2321},{ 3321, 2416},{ 3510, 2520}, - { 3676, 2616},{ 3839, 2699},{ 4008, 2778},{ 4193, 2842}, - { 4371, 2898},{ 4535, 2965},{ 4710, 3023},{ 4921, 3068} - }, - /*Cb qi=59 INTER*/ - { - { 95, -5},{ 111, 343},{ 112, 664},{ 157, 995}, - { 258, 1302},{ 429, 1569},{ 691, 1790},{ 1017, 1977}, - { 1387, 2148},{ 1832, 2294},{ 2368, 2401},{ 2961, 2472}, - { 3553, 2518},{ 4133, 2545},{ 4688, 2557},{ 5198, 2563}, - { 5663, 2574},{ 6100, 2590},{ 6511, 2608},{ 6898, 2621}, - { 7274, 2634},{ 7631, 2655},{ 7984, 2669},{ 8361, 2669} - } - }, - { - /*Cr qi=59 INTRA*/ - { - { 31, 8},{ 240, 379},{ 480, 706},{ 748, 978}, - { 993, 1208},{ 1250, 1394},{ 1519, 1543},{ 1779, 1674}, - { 2047, 1792},{ 2307, 1904},{ 2552, 2013},{ 2780, 2116}, - { 2973, 2216},{ 3165, 2309},{ 3362, 2383},{ 3528, 2444}, - { 3677, 2499},{ 3841, 2566},{ 3995, 2646},{ 4139, 2720}, - { 4324, 2793},{ 4504, 2867},{ 4658, 2939},{ 4806, 2975} - }, - /*Cr qi=59 INTER*/ - { - { 89, -3},{ 98, 352},{ 103, 674},{ 156, 1002}, - { 268, 1300},{ 441, 1562},{ 673, 1801},{ 980, 2010}, - { 1385, 2175},{ 1868, 2301},{ 2401, 2402},{ 2984, 2474}, - { 3591, 2520},{ 4179, 2545},{ 4729, 2555},{ 5232, 2553}, - { 5679, 2545},{ 6081, 2530},{ 6447, 2510},{ 6791, 2496}, - { 7101, 2487},{ 7393, 2489},{ 7684, 2499},{ 7950, 2501} - } - } - }, - { - { - /*Y' qi=60 INTRA*/ - { - { 92, 116},{ 1361, 1085},{ 2746, 1686},{ 4050, 1895}, - { 5209, 1939},{ 6244, 2012},{ 7213, 2103},{ 8105, 2197}, - { 8928, 2290},{ 9685, 2381},{10371, 2460},{10952, 2511}, - {11487, 2556},{12026, 2611},{12574, 2674},{13102, 2739}, - {13597, 2793},{14092, 2831},{14523, 2862},{14862, 2881}, - {15198, 2897},{15568, 2923},{15949, 2941},{16416, 2964} - }, - /*Y' qi=60 INTER*/ - { - { 4, 30},{ 215, 1287},{ 1547, 2104},{ 3729, 2491}, - { 5973, 2568},{ 7672, 2577},{ 9001, 2591},{10123, 2606}, - {11094, 2620},{11943, 2632},{12709, 2643},{13409, 2652}, - {14044, 2660},{14641, 2669},{15193, 2677},{15709, 2684}, - {16201, 2689},{16675, 2693},{17118, 2696},{17522, 2701}, - {17920, 2704},{18293, 2706},{18620, 2702},{18923, 2700} - } - }, - { - /*Cb qi=60 INTRA*/ - { - { 18, 3},{ 227, 362},{ 447, 694},{ 708, 974}, - { 981, 1207},{ 1252, 1397},{ 1532, 1547},{ 1822, 1663}, - { 2082, 1780},{ 2316, 1903},{ 2548, 2013},{ 2794, 2101}, - { 3029, 2178},{ 3242, 2266},{ 3445, 2360},{ 3638, 2459}, - { 3816, 2547},{ 3980, 2628},{ 4146, 2708},{ 4344, 2766}, - { 4546, 2812},{ 4725, 2872},{ 4880, 2930},{ 5054, 2966} - }, - /*Cb qi=60 INTER*/ - { - { 97, -4},{ 112, 343},{ 114, 664},{ 162, 993}, - { 273, 1294},{ 472, 1553},{ 774, 1762},{ 1138, 1939}, - { 1543, 2102},{ 2034, 2236},{ 2620, 2329},{ 3244, 2389}, - { 3860, 2423},{ 4443, 2440},{ 4997, 2449},{ 5502, 2455}, - { 5962, 2458},{ 6413, 2466},{ 6836, 2485},{ 7217, 2506}, - { 7592, 2518},{ 7957, 2533},{ 8291, 2543},{ 8574, 2545} - } - }, - { - /*Cr qi=60 INTRA*/ - { - { 32, 8},{ 243, 379},{ 488, 702},{ 771, 968}, - { 1030, 1192},{ 1300, 1373},{ 1581, 1517},{ 1854, 1643}, - { 2127, 1757},{ 2393, 1864},{ 2645, 1968},{ 2879, 2068}, - { 3078, 2166},{ 3277, 2256},{ 3484, 2325},{ 3660, 2381}, - { 3808, 2433},{ 3970, 2496},{ 4138, 2571},{ 4288, 2643}, - { 4475, 2710},{ 4655, 2778},{ 4810, 2843},{ 4959, 2879} - }, - /*Cr qi=60 INTER*/ - { - { 86, -2},{ 99, 352},{ 103, 673},{ 160, 998}, - { 284, 1292},{ 484, 1546},{ 753, 1774},{ 1100, 1973}, - { 1546, 2129},{ 2072, 2246},{ 2652, 2334},{ 3279, 2392}, - { 3911, 2425},{ 4504, 2440},{ 5044, 2443},{ 5536, 2440}, - { 5979, 2430},{ 6381, 2413},{ 6735, 2397},{ 7062, 2382}, - { 7383, 2376},{ 7680, 2375},{ 7962, 2373},{ 8203, 2379} - } - } - }, - { - { - /*Y' qi=61 INTRA*/ - { - { 54, 121},{ 1477, 1069},{ 3061, 1638},{ 4465, 1808}, - { 5649, 1827},{ 6710, 1884},{ 7716, 1958},{ 8648, 2037}, - { 9514, 2116},{10311, 2192},{11033, 2261},{11641, 2305}, - {12202, 2342},{12771, 2387},{13356, 2440},{13924, 2493}, - {14444, 2541},{14951, 2576},{15409, 2600},{15779, 2615}, - {16131, 2626},{16521, 2648},{16921, 2663},{17409, 2694} - }, - /*Y' qi=61 INTER*/ - { - { -1, 32},{ 216, 1286},{ 1806, 2036},{ 4279, 2327}, - { 6629, 2352},{ 8347, 2352},{ 9707, 2357},{10860, 2364}, - {11857, 2372},{12726, 2377},{13508, 2382},{14225, 2387}, - {14877, 2392},{15484, 2398},{16048, 2401},{16581, 2405}, - {17092, 2409},{17573, 2409},{18016, 2410},{18427, 2413}, - {18829, 2415},{19221, 2415},{19578, 2415},{19980, 2413} - } - }, - { - /*Cb qi=61 INTRA*/ - { - { 19, 3},{ 231, 362},{ 456, 693},{ 733, 965}, - { 1032, 1188},{ 1330, 1369},{ 1637, 1508},{ 1956, 1612}, - { 2241, 1718},{ 2496, 1832},{ 2750, 1932},{ 3019, 2007}, - { 3274, 2074},{ 3505, 2154},{ 3725, 2236},{ 3943, 2323}, - { 4138, 2403},{ 4323, 2476},{ 4505, 2543},{ 4706, 2592}, - { 4909, 2630},{ 5109, 2675},{ 5292, 2724},{ 5495, 2768} - }, - /*Cb qi=61 INTER*/ - { - { 91, -2},{ 111, 344},{ 114, 663},{ 166, 989}, - { 291, 1285},{ 522, 1534},{ 875, 1729},{ 1302, 1889}, - { 1786, 2031},{ 2368, 2141},{ 3042, 2207},{ 3734, 2243}, - { 4388, 2259},{ 4982, 2264},{ 5533, 2265},{ 6043, 2262}, - { 6524, 2264},{ 6982, 2274},{ 7422, 2283},{ 7831, 2295}, - { 8198, 2308},{ 8593, 2319},{ 8965, 2329},{ 9258, 2340} - } - }, - { - /*Cr qi=61 INTRA*/ - { - { 33, 9},{ 245, 378},{ 497, 699},{ 801, 958}, - { 1087, 1171},{ 1384, 1342},{ 1692, 1474},{ 1992, 1589}, - { 2290, 1692},{ 2576, 1789},{ 2852, 1884},{ 3109, 1973}, - { 3324, 2061},{ 3544, 2142},{ 3763, 2199},{ 3945, 2244}, - { 4103, 2292},{ 4283, 2349},{ 4469, 2413},{ 4635, 2476}, - { 4836, 2534},{ 5038, 2592},{ 5210, 2649},{ 5358, 2682} - }, - /*Cr qi=61 INTER*/ - { - { 82, 0},{ 97, 353},{ 104, 672},{ 165, 995}, - { 303, 1284},{ 532, 1529},{ 852, 1742},{ 1273, 1921}, - { 1798, 2057},{ 2409, 2154},{ 3090, 2212},{ 3794, 2240}, - { 4460, 2251},{ 5057, 2249},{ 5596, 2249},{ 6085, 2245}, - { 6519, 2234},{ 6908, 2220},{ 7269, 2203},{ 7618, 2196}, - { 7949, 2198},{ 8269, 2195},{ 8554, 2196},{ 8928, 2217} - } - } - }, - { - { - /*Y' qi=62 INTRA*/ - { - { 29, 124},{ 1527, 1067},{ 3221, 1618},{ 4703, 1751}, - { 5909, 1744},{ 7001, 1779},{ 8057, 1829},{ 9049, 1885}, - { 9968, 1943},{10813, 1999},{11572, 2050},{12206, 2082}, - {12801, 2107},{13402, 2140},{14020, 2180},{14625, 2223}, - {15179, 2260},{15718, 2288},{16196, 2305},{16581, 2313}, - {16963, 2324},{17382, 2341},{17800, 2351},{18318, 2376} - }, - /*Y' qi=62 INTER*/ - { - { -8, 36},{ 218, 1284},{ 2073, 1965},{ 4814, 2159}, - { 7237, 2138},{ 8979, 2124},{10378, 2115},{11570, 2109}, - {12601, 2106},{13503, 2103},{14320, 2103},{15064, 2103}, - {15746, 2103},{16384, 2104},{16975, 2105},{17534, 2105}, - {18062, 2106},{18564, 2107},{19035, 2106},{19471, 2107}, - {19890, 2107},{20288, 2107},{20651, 2107},{21012, 2108} - } - }, - { - /*Cb qi=62 INTRA*/ - { - { 21, 3},{ 283, 360},{ 565, 683},{ 907, 938}, - { 1269, 1143},{ 1611, 1311},{ 1949, 1441},{ 2290, 1535}, - { 2596, 1632},{ 2877, 1738},{ 3162, 1828},{ 3458, 1893}, - { 3745, 1948},{ 4011, 2016},{ 4253, 2089},{ 4506, 2164}, - { 4734, 2233},{ 4943, 2294},{ 5162, 2353},{ 5381, 2393}, - { 5593, 2420},{ 5807, 2454},{ 6003, 2496},{ 6210, 2543} - }, - /*Cb qi=62 INTER*/ - { - { 91, -1},{ 110, 344},{ 113, 663},{ 169, 987}, - { 306, 1279},{ 562, 1519},{ 961, 1701},{ 1450, 1845}, - { 2013, 1967},{ 2686, 2053},{ 3437, 2095},{ 4171, 2109}, - { 4841, 2109},{ 5441, 2105},{ 6002, 2097},{ 6542, 2089}, - { 7028, 2087},{ 7491, 2088},{ 7949, 2090},{ 8377, 2089}, - { 8789, 2095},{ 9195, 2103},{ 9569, 2104},{ 9937, 2102} - } - }, - { - /*Cr qi=62 INTRA*/ - { - { 38, 8},{ 308, 374},{ 619, 685},{ 984, 925}, - { 1326, 1126},{ 1662, 1285},{ 1999, 1407},{ 2328, 1512}, - { 2659, 1604},{ 2976, 1691},{ 3285, 1774},{ 3570, 1853}, - { 3815, 1931},{ 4068, 1998},{ 4304, 2044},{ 4491, 2082}, - { 4666, 2124},{ 4870, 2174},{ 5078, 2231},{ 5262, 2285}, - { 5480, 2335},{ 5703, 2378},{ 5905, 2423},{ 6075, 2454} - }, - /*Cr qi=62 INTER*/ - { - { 79, 1},{ 95, 353},{ 102, 671},{ 169, 992}, - { 318, 1277},{ 569, 1515},{ 936, 1716},{ 1428, 1876}, - { 2034, 1993},{ 2738, 2067},{ 3511, 2095},{ 4268, 2094}, - { 4943, 2087},{ 5543, 2079},{ 6074, 2074},{ 6552, 2069}, - { 6985, 2057},{ 7366, 2043},{ 7728, 2030},{ 8086, 2021}, - { 8423, 2017},{ 8752, 2016},{ 9057, 2014},{ 9376, 2008} - } - } - }, - { - { - /*Y' qi=63 INTRA*/ - { - { -59, 134},{ 1734, 1036},{ 3743, 1521},{ 5309, 1618}, - { 6520, 1597},{ 7664, 1609},{ 8809, 1630},{ 9894, 1657}, - {10907, 1687},{11838, 1717},{12673, 1744},{13379, 1758}, - {14038, 1767},{14698, 1784},{15379, 1806},{16062, 1831}, - {16694, 1852},{17300, 1867},{17827, 1878},{18250, 1881}, - {18702, 1884},{19199, 1892},{19665, 1896},{20273, 1908} - }, - /*Y' qi=63 INTER*/ - { - { -7, 33},{ 209, 1285},{ 2309, 1904},{ 5274, 2025}, - { 7801, 1966},{ 9637, 1924},{11126, 1892},{12403, 1868}, - {13515, 1849},{14491, 1834},{15380, 1822},{16197, 1814}, - {16944, 1806},{17645, 1799},{18303, 1794},{18916, 1789}, - {19494, 1785},{20056, 1782},{20568, 1779},{21047, 1776}, - {21508, 1775},{21925, 1772},{22327, 1770},{22678, 1771} - } - }, - { - /*Cb qi=63 INTRA*/ - { - { 20, 3},{ 294, 357},{ 608, 673},{ 1047, 908}, - { 1501, 1090},{ 1898, 1240},{ 2275, 1353},{ 2654, 1427}, - { 3014, 1502},{ 3366, 1579},{ 3726, 1637},{ 4084, 1674}, - { 4425, 1703},{ 4752, 1743},{ 5058, 1791},{ 5377, 1838}, - { 5676, 1877},{ 5946, 1912},{ 6213, 1945},{ 6458, 1969}, - { 6704, 1982},{ 6969, 1997},{ 7210, 2017},{ 7439, 2037} - }, - /*Cb qi=63 INTER*/ - { - { 86, 1},{ 108, 345},{ 111, 663},{ 168, 985}, - { 307, 1276},{ 577, 1513},{ 1007, 1688},{ 1550, 1819}, - { 2189, 1921},{ 2938, 1981},{ 3744, 2002},{ 4512, 2002}, - { 5199, 1996},{ 5824, 1986},{ 6419, 1971},{ 6978, 1954}, - { 7507, 1940},{ 8015, 1932},{ 8502, 1928},{ 8978, 1920}, - { 9410, 1915},{ 9842, 1910},{10262, 1901},{10634, 1896} - } - }, - { - /*Cr qi=63 INTRA*/ - { - { 38, 7},{ 324, 367},{ 677, 670},{ 1136, 892}, - { 1562, 1070},{ 1951, 1209},{ 2326, 1313},{ 2694, 1399}, - { 3074, 1471},{ 3460, 1531},{ 3850, 1575},{ 4214, 1622}, - { 4522, 1679},{ 4819, 1723},{ 5089, 1749},{ 5315, 1769}, - { 5530, 1792},{ 5756, 1825},{ 6006, 1860},{ 6244, 1889}, - { 6514, 1924},{ 6792, 1946},{ 7026, 1962},{ 7191, 1971} - }, - /*Cr qi=63 INTER*/ - { - { 80, 2},{ 95, 354},{ 101, 671},{ 167, 990}, - { 321, 1274},{ 585, 1509},{ 984, 1702},{ 1534, 1849}, - { 2217, 1947},{ 3005, 1995},{ 3839, 1999},{ 4619, 1986}, - { 5310, 1973},{ 5933, 1961},{ 6486, 1952},{ 6988, 1942}, - { 7435, 1927},{ 7817, 1911},{ 8198, 1900},{ 8552, 1895}, - { 8881, 1890},{ 9253, 1883},{ 9598, 1876},{ 9923, 1859} - } - } - } -}; - -#endif diff --git a/drivers/theora/ocintrin.h b/drivers/theora/ocintrin.h deleted file mode 100644 index d49ebb2159..0000000000 --- a/drivers/theora/ocintrin.h +++ /dev/null @@ -1,128 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -/*Some common macros for potential platform-specific optimization.*/ -#include -#if !defined(_ocintrin_H) -# define _ocintrin_H (1) - -/*Some specific platforms may have optimized intrinsic or inline assembly - versions of these functions which can substantially improve performance. - We define macros for them to allow easy incorporation of these non-ANSI - features.*/ - -/*Note that we do not provide a macro for abs(), because it is provided as a - library function, which we assume is translated into an intrinsic to avoid - the function call overhead and then implemented in the smartest way for the - target platform. - With modern gcc (4.x), this is true: it uses cmov instructions if the - architecture supports it and branchless bit-twiddling if it does not (the - speed difference between the two approaches is not measurable). - Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150) - by Sun Microsystems, despite prior art dating back to at least 1996: - http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT - On gcc 3.x, however, our assumption is not true, as abs() is translated to a - conditional jump, which is horrible on deeply piplined architectures (e.g., - all consumer architectures for the past decade or more). - Also be warned that -C*abs(x) where C is a constant is mis-optimized as - abs(C*x) on every gcc release before 4.2.3. - See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */ - -/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if - given an appropriate architecture, but the branchless bit-twiddling versions - are just as fast, and do not require any special target architecture. - Earlier gcc versions (3.x) compiled both code to the same assembly - instructions, because of the way they represented ((_b)>(_a)) internally.*/ -#define OC_MAXI(_a,_b) ((_a)-((_a)-(_b)&-((_b)>(_a)))) -#define OC_MINI(_a,_b) ((_a)+((_b)-(_a)&-((_b)<(_a)))) -/*Clamps an integer into the given range. - If _a>_c, then the lower bound _a is respected over the upper bound _c (this - behavior is required to meet our documented API behavior). - _a: The lower bound. - _b: The value to clamp. - _c: The upper boud.*/ -#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) -#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255)))) -/*This has a chance of compiling branchless, and is just as fast as the - bit-twiddling method, which is slightly less portable, since it relies on a - sign-extended rightshift, which is not guaranteed by ANSI (but present on - every relevant platform).*/ -#define OC_SIGNI(_a) (((_a)>0)-((_a)<0)) -/*Slightly more portable than relying on a sign-extended right-shift (which is - not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both) - compile it into the right-shift anyway.*/ -#define OC_SIGNMASK(_a) (-((_a)<0)) -/*Divides an integer by a power of two, truncating towards 0. - _dividend: The integer to divide. - _shift: The non-negative power of two to divide by. - _rmask: (1<<_shift)-1*/ -#define OC_DIV_POW2(_dividend,_shift,_rmask)\ - ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift)) -/*Divides _x by 65536, truncating towards 0.*/ -#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF) -/*Divides _x by 2, truncating towards 0.*/ -#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1) -/*Divides _x by 8, truncating towards 0.*/ -#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7) -/*Divides _x by 16, truncating towards 0.*/ -#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF) -/*Right shifts _dividend by _shift, adding _rval, and subtracting one for - negative dividends first. - When _rval is (1<<_shift-1), this is equivalent to division with rounding - ties away from zero.*/ -#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\ - ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift)) -/*Divides a _x by 2, rounding towards even numbers.*/ -#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1) -/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/ -#define OC_DIV_POW2_RE(_x,_shift) \ - ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift)) -/*Swaps two integers _a and _b if _a>_b.*/ -#define OC_SORT2I(_a,_b) \ - do{ \ - int t__; \ - t__=((_a)^(_b))&-((_b)<(_a)); \ - (_a)^=t__; \ - (_b)^=t__; \ - } \ - while(0) - -/*Accesses one of four (signed) bytes given an index. - This can be used to avoid small lookup tables.*/ -#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \ - ((signed char) \ - (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8)) -/*Accesses one of eight (unsigned) nibbles given an index. - This can be used to avoid small lookup tables.*/ -#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \ - ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \ - ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF) - - - -/*All of these macros should expect floats as arguments.*/ -#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a)) -#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a)) -#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c))) -#define OC_FABSF(_f) ((float)fabs(_f)) -#define OC_SQRTF(_f) ((float)sqrt(_f)) -#define OC_POWF(_b,_e) ((float)pow(_b,_e)) -#define OC_LOGF(_f) ((float)log(_f)) -#define OC_IFLOORF(_f) ((int)floor(_f)) -#define OC_ICEILF(_f) ((int)ceil(_f)) - -#endif diff --git a/drivers/theora/quant.c b/drivers/theora/quant.c deleted file mode 100644 index 8359f5abea..0000000000 --- a/drivers/theora/quant.c +++ /dev/null @@ -1,119 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: quant.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#include -#include -#include "quant.h" -#include "decint.h" - -static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; -static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; - -/*Initializes the dequantization tables from a set of quantizer info. - Currently the dequantizer (and elsewhere enquantizer) tables are expected to - be initialized as pointing to the storage reserved for them in the - oc_theora_state (resp. oc_enc_ctx) structure. - If some tables are duplicates of others, the pointers will be adjusted to - point to a single copy of the tables, but the storage for them will not be - freed. - If you're concerned about the memory footprint, the obvious thing to do is - to move the storage out of its fixed place in the structures and allocate - it on demand. - However, a much, much better option is to only store the quantization - matrices being used for the current frame, and to recalculate these as the - qi values change between frames (this is what VP3 did).*/ -void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], - int _pp_dc_scale[64],const th_quant_info *_qinfo){ - /*Coding mode: intra or inter.*/ - int qti; - /*Y', C_b, C_r*/ - int pli; - for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ - /*Quality index.*/ - int qi; - /*Range iterator.*/ - int qri; - for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){ - th_quant_base base; - ogg_uint32_t q; - int qi_start; - int qi_end; - memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], - sizeof(base)); - qi_start=qi; - if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; - else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; - /*Iterate over quality indicies in this range.*/ - for(;;){ - ogg_uint32_t qfac; - int zzi; - int ci; - /*In the original VP3.2 code, the rounding offset and the size of the - dead zone around 0 were controlled by a "sharpness" parameter. - The size of our dead zone is now controlled by the per-coefficient - quality thresholds returned by our HVS module. - We round down from a more accurate value when the quality of the - reconstruction does not fall below our threshold and it saves bits. - Hence, all of that VP3.2 code is gone from here, and the remaining - floating point code has been implemented as equivalent integer code - with exact precision.*/ - qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; - /*For postprocessing, not dequantization.*/ - if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); - /*Scale DC the coefficient from the proper table.*/ - q=(qfac/100)<<2; - q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); - _dequant[qi][pli][qti][0]=(ogg_uint16_t)q; - /*Now scale AC coefficients from the proper table.*/ - for(zzi=1;zzi<64;zzi++){ - q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2; - q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); - _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q; - } - /*If this is a duplicate of a previous matrix, use that instead. - This simple check helps us improve cache coherency later.*/ - { - int dupe; - int qtj; - int plj; - dupe=0; - for(qtj=0;qtj<=qti;qtj++){ - for(plj=0;plj<(qtj=qi_end)break; - /*Interpolate the next base matrix.*/ - for(ci=0;ci<64;ci++){ - base[ci]=(unsigned char)( - (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ - (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) - +_qinfo->qi_ranges[qti][pli].sizes[qri])/ - (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); - } - } - } - } -} diff --git a/drivers/theora/quant.h b/drivers/theora/quant.h deleted file mode 100644 index 49ce13a65c..0000000000 --- a/drivers/theora/quant.h +++ /dev/null @@ -1,33 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#if !defined(_quant_H) -# define _quant_H (1) -# include "theora/codec.h" -# include "ocintrin.h" - -typedef ogg_uint16_t oc_quant_table[64]; - - -/*Maximum scaled quantizer value.*/ -#define OC_QUANT_MAX (1024<<2) - - -void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], - int _pp_dc_scale[64],const th_quant_info *_qinfo); - -#endif diff --git a/drivers/theora/rate.c b/drivers/theora/rate.c deleted file mode 100644 index 4f43bb2e5f..0000000000 --- a/drivers/theora/rate.c +++ /dev/null @@ -1,1137 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: rate.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include -#include "encint.h" - -/*A rough lookup table for tan(x), 0<=x>24; - if(i>=17)i=16; - t0=OC_ROUGH_TAN_LOOKUP[i]; - t1=OC_ROUGH_TAN_LOOKUP[i+1]; - d=_alpha*36-(i<<24); - return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32); -} - -/*Re-initialize the Bessel filter coefficients with the specified delay. - This does not alter the x/y state, but changes the reaction time of the - filter. - Altering the time constant of a reactive filter without alterning internal - state is something that has to be done carefuly, but our design operates at - high enough delays and with small enough time constant changes to make it - safe.*/ -static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){ - int alpha; - ogg_int64_t one48; - ogg_int64_t warp; - ogg_int64_t k1; - ogg_int64_t k2; - ogg_int64_t d; - ogg_int64_t a; - ogg_int64_t ik2; - ogg_int64_t b1; - ogg_int64_t b2; - /*This borrows some code from an unreleased version of Postfish. - See the recipe at http://unicorn.us.com/alex/2polefilters.html for details - on deriving the filter coefficients.*/ - /*alpha is Q24*/ - alpha=(1<<24)/_delay; - one48=(ogg_int64_t)1<<48; - /*warp is 7.12*/ - warp=OC_MAXI(oc_warp_alpha(alpha),1); - /*k1 is 9.12*/ - k1=3*warp; - /*k2 is 16.24.*/ - k2=k1*warp; - /*d is 16.15.*/ - d=((1<<12)+k1<<12)+k2+256>>9; - /*a is 0.32, since d is larger than both 1.0 and k2.*/ - a=(k2<<23)/d; - /*ik2 is 25.24.*/ - ik2=one48/k2; - /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/ - b1=2*a*(ik2-(1<<24)); - /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/ - b2=(one48<<8)-(4*a<<24)-b1; - /*All of the filter parameters are Q24.*/ - _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32); - _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32); - _f->g=(ogg_int32_t)(a+128>>8); -} - -/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay - and initial value. - _value is Q24.*/ -static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){ - oc_iir_filter_reinit(_f,_delay); - _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value; -} - -static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){ - ogg_int64_t c0; - ogg_int64_t c1; - ogg_int64_t g; - ogg_int64_t x0; - ogg_int64_t x1; - ogg_int64_t y0; - ogg_int64_t y1; - ogg_int64_t ya; - c0=_f->c[0]; - c1=_f->c[1]; - g=_f->g; - x0=_f->x[0]; - x1=_f->x[1]; - y0=_f->y[0]; - y1=_f->y[1]; - ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24; - _f->x[1]=(ogg_int32_t)x0; - _f->x[0]=_x; - _f->y[1]=(ogg_int32_t)y0; - _f->y[0]=(ogg_int32_t)ya; - return ya; -} - - - -/*Search for the quantizer that matches the target most closely. - We don't assume a linear ordering, but when there are ties we pick the - quantizer closest to the old one.*/ -static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old, - int _qi_min,ogg_int64_t _log_qtarget){ - ogg_int64_t best_qdiff; - int best_qi; - int qi; - best_qi=_qi_min; - best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget; - best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff); - for(qi=_qi_min+1;qi<64;qi++){ - ogg_int64_t qdiff; - qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget; - qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff); - if(qdiffstate.qis[0]; - /*If rate control is active, use the lambda for the _target_ quantizer. - This allows us to scale to rates slightly lower than we'd normally be able - to reach, and give the rate control a semblance of "fractional qi" - precision. - TODO: Add API for changing QI, and allow extra precision.*/ - if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget; - else lq=_enc->log_qavg[_qti][qi]; - /*The resulting lambda value is less than 0x500000.*/ - _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL); - /*Select additional quantizers. - The R-D optimal block AC quantizer statistics suggest that the distribution - is roughly Gaussian-like with a slight positive skew. - K-means clustering on log_qavg to select 3 quantizers produces cluster - centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}. - Experiments confirm these are relatively good choices. - - Although we do greedy R-D optimization of the qii flags to avoid switching - too frequently, this becomes ineffective at low rates, either because we - do a poor job of predicting the actual R-D cost, or the greedy - optimization is not sufficient. - Therefore adaptive quantization is disabled above an (experimentally - suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or - INTER qi=20 with current matrices). - This may need to be revised if the R-D cost estimation or qii flag - optimization strategies change.*/ - nqis=1; - if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){ - qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0, - lq+(OC_Q57(7)+5)/10); - if(qi1!=qi)_enc->state.qis[nqis++]=qi1; - qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0, - lq-(OC_Q57(6)+5)/10); - if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1; - } - _enc->state.nqis=nqis; -} - -/*Binary exponential of _log_scale with 24-bit fractional precision and - saturation. - _log_scale: A binary logarithm in Q24 format. - Return: The binary exponential in Q24 format, saturated to 2**47-1 if - _log_scale was too large.*/ -static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){ - if(_log_scale<(ogg_int32_t)23<<24){ - ogg_int64_t ret; - ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24)); - return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL; - } - return 0x7FFFFFFFFFFFLL; -} - -/*Convenience function converts Q57 value to a clamped 32-bit Q24 value - _in: input in Q57 format. - Return: same number in Q24 */ -static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){ - ogg_int64_t ret; - ret=_in+((ogg_int64_t)1<<32)>>33; - /*0x80000000 is automatically converted to unsigned on 32-bit systems. - -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to - unsigned.*/ - return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF); -} - -/*Binary exponential of _log_scale with 24-bit fractional precision and - saturation. - _log_scale: A binary logarithm in Q57 format. - Return: The binary exponential in Q24 format, saturated to 2**31-1 if - _log_scale was too large.*/ -static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){ - if(_log_scalerc.bits_per_frame=(_enc->state.info.target_bitrate* - (ogg_int64_t)_enc->state.info.fps_denominator)/ - _enc->state.info.fps_numerator; - /*Insane framerates or frame sizes mean insane bitrates. - Let's not get carried away.*/ - if(_enc->rc.bits_per_frame>0x400000000000LL){ - _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; - } - else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; - _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); - _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; - /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend - on a single keyframe interval. - We can require fully half the bits in an interval for a keyframe, so this - initial level gives us maximum flexibility for over/under-shooting in - subsequent frames.*/ - _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* - OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); - _enc->rc.fullness=_enc->rc.target; - /*Pick exponents and initial scales for quantizer selection.*/ - npixels=_enc->state.info.frame_width* - (ogg_int64_t)_enc->state.info.frame_height; - _enc->rc.log_npixels=oc_blog64(npixels); - ibpp=npixels/_enc->rc.bits_per_frame; - if(ibpp<1){ - _enc->rc.exp[0]=59; - _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8); - } - else if(ibpp<2){ - _enc->rc.exp[0]=55; - _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8); - } - else{ - _enc->rc.exp[0]=48; - _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8); - } - if(ibpp<4){ - _enc->rc.exp[1]=100; - _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8); - } - else if(ibpp<8){ - _enc->rc.exp[1]=95; - _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8); - } - else{ - _enc->rc.exp[1]=73; - _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8); - } - _enc->rc.prev_drop_count=0; - _enc->rc.log_drop_scale=OC_Q57(0); - /*Set up second order followers, initialized according to corresponding - time constants.*/ - oc_iir_filter_init(&_enc->rc.scalefilter[0],4, - oc_q57_to_q24(_enc->rc.log_scale[0])); - inter_delay=(_enc->rc.twopass? - OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1; - _enc->rc.inter_count=0; - /*We clamp the actual inter_delay to a minimum of 10 to work within the range - of values where later incrementing the delay works as designed. - 10 is not an exact choice, but rather a good working trade-off.*/ - _enc->rc.inter_delay=10; - _enc->rc.inter_delay_target=inter_delay; - oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay, - oc_q57_to_q24(_enc->rc.log_scale[1])); - oc_iir_filter_init(&_enc->rc.vfrfilter,4, - oc_bexp64_q24(_enc->rc.log_drop_scale)); -} - -void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){ - _rc->twopass=0; - _rc->twopass_buffer_bytes=0; - _rc->twopass_force_kf=0; - _rc->frame_metrics=NULL; - _rc->rate_bias=0; - if(_enc->state.info.target_bitrate>0){ - /*The buffer size is set equal to the keyframe interval, clamped to the - range [12,256] frames. - The 12 frame minimum gives us some chance to distribute bit estimation - errors. - The 256 frame maximum means we'll require 8-10 seconds of pre-buffering - at 24-30 fps, which is not unreasonable.*/ - _rc->buf_delay=_enc->keyframe_frequency_force>256? - 256:_enc->keyframe_frequency_force; - /*By default, enforce all buffer constraints.*/ - _rc->drop_frames=1; - _rc->cap_overflow=1; - _rc->cap_underflow=0; - oc_enc_rc_reset(_enc); - } -} - -void oc_rc_state_clear(oc_rc_state *_rc){ - _ogg_free(_rc->frame_metrics); -} - -void oc_enc_rc_resize(oc_enc_ctx *_enc){ - /*If encoding has not yet begun, reset the buffer state.*/ - if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc); - else{ - int idt; - /*Otherwise, update the bounds on the buffer, but not the current - fullness.*/ - _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate* - (ogg_int64_t)_enc->state.info.fps_denominator)/ - _enc->state.info.fps_numerator; - /*Insane framerates or frame sizes mean insane bitrates. - Let's not get carried away.*/ - if(_enc->rc.bits_per_frame>0x400000000000LL){ - _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; - } - else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; - _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); - _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; - _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* - OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); - /*Update the INTER-frame scale filter delay. - We jump to it immediately if we've already seen enough frames; otherwise - it is simply set as the new target.*/ - _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10); - if(idtrc.inter_delay,_enc->rc.inter_count)){ - oc_iir_filter_init(&_enc->rc.scalefilter[1],idt, - _enc->rc.scalefilter[1].y[0]); - _enc->rc.inter_delay=idt; - } - } - /*If we're in pass-2 mode, make sure the frame metrics array is big enough - to hold frame statistics for the full buffer.*/ - if(_enc->rc.twopass==2){ - int cfm; - int buf_delay; - int reset_window; - buf_delay=_enc->rc.buf_delay; - reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0|| - buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1] - +_enc->rc.frames_total[2]); - cfm=_enc->rc.cframe_metrics; - /*Only try to resize the frame metrics buffer if a) it's too small and - b) we were using a finite buffer, or are about to start.*/ - if(cfmrc.frame_metrics!=NULL||reset_window)){ - oc_frame_metrics *fm; - int nfm; - int fmh; - fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics, - buf_delay*sizeof(*_enc->rc.frame_metrics)); - if(fm==NULL){ - /*We failed to allocate a finite buffer.*/ - /*If we don't have a valid 2-pass header yet, just return; we'll reset - the buffer size when we read the header.*/ - if(_enc->rc.frames_total[0]==0)return; - /*Otherwise revert to the largest finite buffer previously set, or to - whole-file buffering if we were still using that.*/ - _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL? - cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1] - +_enc->rc.frames_total[2]; - oc_enc_rc_resize(_enc); - return; - } - _enc->rc.frame_metrics=fm; - _enc->rc.cframe_metrics=buf_delay; - /*Re-organize the circular buffer.*/ - fmh=_enc->rc.frame_metrics_head; - nfm=_enc->rc.nframe_metrics; - if(fmh+nfm>cfm){ - int shift; - shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm); - memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm)); - if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay); - } - } - /*We were using whole-file buffering; now we're not.*/ - if(reset_window){ - _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0; - _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; - _enc->rc.scale_window_end=_enc->rc.scale_window0= - _enc->state.curframe_num+_enc->prev_dup_count+1; - if(_enc->rc.twopass_buffer_bytes){ - int qti; - /*We already read the metrics for the first frame in the window.*/ - *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics; - _enc->rc.nframe_metrics++; - qti=_enc->rc.cur_metrics.frame_type; - _enc->rc.nframes[qti]++; - _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count; - _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); - _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1; - if(_enc->rc.scale_window_end-_enc->rc.scale_window0rc.twopass_buffer_bytes=0; - } - } - } - /*Otherwise, we could shrink the size of the current window, if necessary, - but leaving it like it is lets us adapt to the new buffer size more - gracefully.*/ - } -} - -/*Scale the number of frames by the number of expected drops/duplicates.*/ -static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){ - if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){ - ogg_int64_t dup_scale; - dup_scale=oc_bexp64((_rc->log_drop_scale - +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8)); - if(dup_scale<_nframes<<8){ - int dup_scalei; - dup_scalei=(int)dup_scale; - if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei; - } - else _nframes=!!_nframes; - } - return _nframes; -} - -int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){ - ogg_int64_t rate_total; - ogg_int64_t rate_bias; - int nframes[2]; - int buf_delay; - int buf_pad; - ogg_int64_t log_qtarget; - ogg_int64_t log_scale0; - ogg_int64_t log_cur_scale; - ogg_int64_t log_qexp; - int exp0; - int old_qi; - int qi; - /*Figure out how to re-distribute bits so that we hit our fullness target - before the last keyframe in our current buffer window (after the current - frame), or the end of the buffer window, whichever comes first.*/ - log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33; - buf_pad=0; - switch(_enc->rc.twopass){ - default:{ - ogg_uint32_t next_key_frame; - /*Single pass mode: assume only forced keyframes and attempt to estimate - the drop count for VFR content.*/ - next_key_frame=_qti?_enc->keyframe_frequency_force - -(_enc->state.curframe_num-_enc->state.keyframe_num):0; - nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay) - +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force; - if(nframes[0]+_qti>1){ - nframes[0]--; - buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force; - } - else buf_delay=_enc->rc.buf_delay; - nframes[1]=buf_delay-nframes[0]; - /*Downgrade the delta frame rate to correspond to the recent drop count - history.*/ - nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]); - }break; - case 1:{ - /*Pass 1 mode: use a fixed qi value.*/ - qi=_enc->state.qis[0]; - _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi]; - return qi; - }break; - case 2:{ - ogg_int64_t scale_sum[2]; - int qti; - /*Pass 2 mode: we know exactly how much of each frame type there is in - the current buffer window, and have estimates for the scales.*/ - nframes[0]=_enc->rc.nframes[0]; - nframes[1]=_enc->rc.nframes[1]; - scale_sum[0]=_enc->rc.scale_sum[0]; - scale_sum[1]=_enc->rc.scale_sum[1]; - /*The window size can be slightly larger than the buffer window for VFR - content; clamp it down, if appropriate (the excess will all be dup - frames).*/ - buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0, - _enc->rc.buf_delay); - /*If we're approaching the end of the file, add some slack to keep us - from slamming into a rail. - Our rate accuracy goes down, but it keeps the result sensible. - We position the target where the first forced keyframe beyond the end - of the file would be (for consistency with 1-pass mode).*/ - buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num - +_enc->keyframe_frequency_force-_enc->rc.scale_window0); - if(buf_delayrc.frame_metrics!=NULL){ - int fmi; - int fm_tail; - fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics; - if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics; - for(fmi=fm_tail;;){ - oc_frame_metrics *m; - fmi--; - if(fmi<0)fmi+=_enc->rc.cframe_metrics; - /*Stop before we remove the first frame.*/ - if(fmi==_enc->rc.frame_metrics_head)break; - m=_enc->rc.frame_metrics+fmi; - /*If we find a keyframe, remove it and everything past it.*/ - if(m->frame_type==OC_INTRA_FRAME){ - do{ - qti=m->frame_type; - nframes[qti]--; - scale_sum[qti]-=oc_bexp_q24(m->log_scale); - buf_delay-=m->dup_count+1; - fmi++; - if(fmi>=_enc->rc.cframe_metrics)fmi=0; - m=_enc->rc.frame_metrics+fmi; - } - while(fmi!=fm_tail); - /*And stop scanning backwards.*/ - break; - } - } - } - } - /*If we're not using the same frame type as in pass 1 (because someone - changed the keyframe interval), remove that scale estimate. - We'll add in a replacement for the correct frame type below.*/ - qti=_enc->rc.cur_metrics.frame_type; - if(qti!=_qti){ - nframes[qti]--; - scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); - } - /*Compute log_scale estimates for each frame type from the pass-1 scales - we measured in the current window.*/ - for(qti=0;qti<2;qti++){ - _enc->rc.log_scale[qti]=nframes[qti]>0? - oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24): - -_enc->rc.log_npixels; - } - /*If we're not using the same frame type as in pass 1, add a scale - estimate for the corresponding frame using the current low-pass - filter value. - This is mostly to ensure we have a valid estimate even when pass 1 had - no frames of this type in the buffer window. - TODO: We could also plan ahead and figure out how many keyframes we'll - be forced to add in the current buffer window.*/ - qti=_enc->rc.cur_metrics.frame_type; - if(qti!=_qti){ - ogg_int64_t scale; - scale=_enc->rc.log_scale[_qti]rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL; - scale*=nframes[_qti]; - nframes[_qti]++; - scale+=oc_bexp_q24(log_cur_scale>>33); - _enc->rc.log_scale[_qti]=oc_blog64(scale) - -oc_blog64(nframes[qti])-OC_Q57(24); - } - else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33; - /*Add the padding from above. - This basically reverts to 1-pass estimations in the last keyframe - interval.*/ - if(buf_pad>0){ - ogg_int64_t scale; - int nextra_frames; - /*Extend the buffer.*/ - buf_delay+=buf_pad; - /*Add virtual delta frames according to the estimated drop count.*/ - nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad); - /*And blend in the low-pass filtered scale according to how many frames - we added.*/ - scale= - oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1] - +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames; - nframes[1]+=nextra_frames; - _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24); - } - }break; - } - /*If we've been missing our target, add a penalty term.*/ - rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))* - (buf_delay-buf_pad); - /*rate_total is the total bits available over the next buf_delay frames.*/ - rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias - +buf_delay*_enc->rc.bits_per_frame; - log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels; - /*If there aren't enough bits to achieve our desired fullness level, use the - minimum quality permitted.*/ - if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG; - else{ - static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL; - ogg_int64_t log_scale1; - ogg_int64_t rlo; - ogg_int64_t rhi; - log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels; - rlo=0; - rhi=(rate_total+nframes[_qti]-1)/nframes[_qti]; - while(rlo>1; - log_rpow=oc_blog64(curr)-log_scale0; - log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti]; - if(_qti)log_rpow+=LOG_KEY_RATIO>>6; - else log_rpow-=LOG_KEY_RATIO>>6; - log_rpow*=_enc->rc.exp[1-_qti]; - rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow); - rdiff=nframes[_qti]*curr+rscale-rate_total; - if(rdiff<0)rlo=curr+1; - else if(rdiff>0)rhi=curr-1; - else break; - } - log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/ - _enc->rc.exp[_qti]<<6); - log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); - } - /*The above allocation looks only at the total rate we'll accumulate in the - next buf_delay frames. - However, we could overflow the buffer on the very next frame, so check for - that here, if we're not using a soft target.*/ - exp0=_enc->rc.exp[_qti]; - if(_enc->rc.cap_overflow){ - ogg_int64_t margin; - ogg_int64_t soft_limit; - ogg_int64_t log_soft_limit; - /*Allow 3% of the buffer for prediction error. - This should be plenty, and we don't mind if we go a bit over; we only - want to keep these bits from being completely wasted.*/ - margin=_enc->rc.max+31>>5; - /*We want to use at least this many bits next frame.*/ - soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin); - log_soft_limit=oc_blog64(soft_limit); - /*If we're predicting we won't use that many...*/ - log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; - if(log_scale0-log_qexp>32)* - ((OC_MINI(margin,soft_limit)<<32)/margin); - log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); - } - } - /*If this was not one of the initial frames, limit the change in quality.*/ - old_qi=_enc->state.qis[0]; - if(_clamp){ - ogg_int64_t log_qmin; - ogg_int64_t log_qmax; - /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the - current quantizer. - TODO: With user-specified quant matrices, we need to enlarge these limits - if they don't actually let us change qi values.*/ - log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL; - log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL; - log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax); - } - /*The above allocation looks only at the total rate we'll accumulate in the - next buf_delay frames. - However, we could bust the budget on the very next frame, so check for that - here, if we're not using a soft target.*/ - /* Disabled when our minimum qi > 0; if we saturate log_qtarget to - to the maximum possible size when we have a minimum qi, the - resulting lambda will interact very strangely with SKIP. The - resulting artifacts look like waterfalls. */ - if(_enc->state.info.quality==0){ - ogg_int64_t log_hard_limit; - /*Compute the maximum number of bits we can use in the next frame. - Allow 50% of the rate for a single frame for prediction error. - This may not be enough for keyframes or sudden changes in complexity.*/ - log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1)); - /*If we're predicting we'll use more than this...*/ - log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; - if(log_scale0-log_qexp>log_hard_limit){ - /*Force the target to hit our limit exactly.*/ - log_qexp=log_scale0-log_hard_limit; - log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); - /*If that target is unreasonable, oh well; we'll have to drop.*/ - log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); - } - } - /*Compute a final estimate of the number of bits we plan to use.*/ - log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti]; - _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp); - qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi, - _enc->state.info.quality,log_qtarget); - /*Save the quantizer target for lambda calculations.*/ - _enc->rc.log_qtarget=log_qtarget; - return qi; -} - -int oc_enc_update_rc_state(oc_enc_ctx *_enc, - long _bits,int _qti,int _qi,int _trial,int _droppable){ - ogg_int64_t buf_delta; - ogg_int64_t log_scale; - int dropped; - dropped=0; - /* Drop frames also disabled for now in the case of infinite-buffer - two-pass mode */ - if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){ - _droppable=0; - } - buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count); - if(_bits<=0){ - /*We didn't code any blocks in this frame.*/ - log_scale=OC_Q57(-64); - _bits=0; - } - else{ - ogg_int64_t log_bits; - ogg_int64_t log_qexp; - /*Compute the estimated scale factor for this frame type.*/ - log_bits=oc_blog64(_bits); - log_qexp=_enc->rc.log_qtarget-OC_Q57(2); - log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]); - log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16)); - } - /*Special two-pass processing.*/ - switch(_enc->rc.twopass){ - case 1:{ - /*Pass 1 mode: save the metrics for this frame.*/ - _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale); - _enc->rc.cur_metrics.dup_count=_enc->dup_count; - _enc->rc.cur_metrics.frame_type=_enc->state.frame_type; - _enc->rc.twopass_buffer_bytes=0; - }break; - case 2:{ - /*Pass 2 mode:*/ - if(!_trial){ - ogg_int64_t next_frame_num; - int qti; - /*Move the current metrics back one frame.*/ - *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics; - next_frame_num=_enc->state.curframe_num+_enc->dup_count+1; - /*Back out the last frame's statistics from the sliding window.*/ - qti=_enc->rc.prev_metrics.frame_type; - _enc->rc.frames_left[qti]--; - _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count; - _enc->rc.nframes[qti]--; - _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count; - _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale); - _enc->rc.scale_window0=(int)next_frame_num; - /*Free the corresponding entry in the circular buffer.*/ - if(_enc->rc.frame_metrics!=NULL){ - _enc->rc.nframe_metrics--; - _enc->rc.frame_metrics_head++; - if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){ - _enc->rc.frame_metrics_head=0; - } - } - /*Mark us ready for the next 2-pass packet.*/ - _enc->rc.twopass_buffer_bytes=0; - /*Update state, so the user doesn't have to keep calling 2pass_in after - they've fed in all the data when we're using a finite buffer.*/ - _enc->prev_dup_count=_enc->dup_count; - oc_enc_rc_2pass_in(_enc,NULL,0); - } - }break; - } - /*Common to all passes:*/ - if(_bits>0){ - if(_trial){ - oc_iir_filter *f; - /*Use the estimated scale factor directly if this was a trial.*/ - f=_enc->rc.scalefilter+_qti; - f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale); - _enc->rc.log_scale[_qti]=log_scale; - } - else{ - /*Lengthen the time constant for the INTER filter as we collect more - frame statistics, until we reach our target.*/ - if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&& - _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){ - oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay); - } - /*Otherwise update the low-pass scale filter for this frame type, - regardless of whether or not we dropped this frame.*/ - _enc->rc.log_scale[_qti]=oc_iir_filter_update( - _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33; - /*If this frame busts our budget, it must be dropped.*/ - if(_droppable&&_enc->rc.fullness+buf_delta<_bits){ - _enc->rc.prev_drop_count+=1+_enc->dup_count; - _bits=0; - dropped=1; - } - else{ - ogg_uint32_t drop_count; - /*Update a low-pass filter to estimate the "real" frame rate taking - drops and duplicates into account. - This is only done if the frame is coded, as it needs the final - count of dropped frames.*/ - drop_count=_enc->rc.prev_drop_count+1; - if(drop_count>0x7F)drop_count=0x7FFFFFFF; - else drop_count<<=24; - _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update( - &_enc->rc.vfrfilter,drop_count))-OC_Q57(24); - /*Initialize the drop count for this frame to the user-requested dup - count. - It will be increased if we drop more frames.*/ - _enc->rc.prev_drop_count=_enc->dup_count; - } - } - /*Increment the INTER frame count, for filter adaptation purposes.*/ - if(_enc->rc.inter_countrc.inter_count+=_qti; - } - /*Increase the drop count.*/ - else _enc->rc.prev_drop_count+=1+_enc->dup_count; - /*And update the buffer fullness level.*/ - if(!_trial){ - _enc->rc.fullness+=buf_delta-_bits; - /*If we're too quick filling the buffer and overflow is capped, - that rate is lost forever.*/ - if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){ - _enc->rc.fullness=_enc->rc.max; - } - /*If we're too quick draining the buffer and underflow is capped, - don't try to make up that rate later.*/ - if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){ - _enc->rc.fullness=0; - } - /*Adjust the bias for the real bits we've used.*/ - _enc->rc.rate_bias-=_bits; - } - return dropped; -} - -#define OC_RC_2PASS_VERSION (1) -#define OC_RC_2PASS_HDR_SZ (38) -#define OC_RC_2PASS_PACKET_SZ (8) - -static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){ - while(_bytes-->0){ - _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF); - _val>>=8; - } -} - -int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){ - if(_enc->rc.twopass_buffer_bytes==0){ - if(_enc->rc.twopass==0){ - int qi; - /*Pick first-pass qi for scale calculations.*/ - qi=oc_enc_select_qi(_enc,0,0); - _enc->state.nqis=1; - _enc->state.qis[0]=qi; - _enc->rc.twopass=1; - _enc->rc.frames_total[0]=_enc->rc.frames_total[1]= - _enc->rc.frames_total[2]=0; - _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; - /*Fill in dummy summary values.*/ - oc_rc_buffer_val(&_enc->rc,0x5032544F,4); - oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); - oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8); - } - else{ - int qti; - qti=_enc->rc.cur_metrics.frame_type; - _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); - _enc->rc.frames_total[qti]++; - _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count; - oc_rc_buffer_val(&_enc->rc, - _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4); - oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4); - } - } - else if(_enc->packet_state==OC_PACKET_DONE&& - _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){ - _enc->rc.twopass_buffer_bytes=0; - oc_rc_buffer_val(&_enc->rc,0x5032544F,4); - oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); - oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4); - oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4); - oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4); - oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1); - oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1); - oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8); - oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8); - } - else{ - /*The data for this frame has already been retrieved.*/ - *_buf=NULL; - return 0; - } - *_buf=_enc->rc.twopass_buffer; - return _enc->rc.twopass_buffer_bytes; -} - -static size_t oc_rc_buffer_fill(oc_rc_state *_rc, - unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){ - while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){ - _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++]; - } - return _consumed; -} - -static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){ - ogg_int64_t ret; - int shift; - ret=0; - shift=0; - while(_bytes-->0){ - ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<rc.twopass==0){ - _enc->rc.twopass=2; - _enc->rc.twopass_buffer_fill=0; - _enc->rc.frames_total[0]=0; - _enc->rc.nframe_metrics=0; - _enc->rc.cframe_metrics=0; - _enc->rc.frame_metrics_head=0; - _enc->rc.scale_window0=0; - _enc->rc.scale_window_end=0; - } - /*If we haven't got a valid summary header yet, try to parse one.*/ - if(_enc->rc.frames_total[0]==0){ - if(!_buf){ - int frames_needed; - /*If we're using a whole-file buffer, we just need the first frame. - Otherwise, we may need as many as one per buffer slot.*/ - frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay; - return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ - -_enc->rc.twopass_buffer_fill; - } - consumed=oc_rc_buffer_fill(&_enc->rc, - _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ); - if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){ - ogg_int64_t scale_sum[2]; - int exp[2]; - int buf_delay; - /*Read the summary header data.*/ - /*Check the magic value and version number.*/ - if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F|| - oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){ - _enc->rc.twopass_buffer_bytes=0; - return TH_ENOTFORMAT; - } - _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); - _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); - _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); - exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1); - exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1); - scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8); - scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8); - /*Make sure the file claims to have at least one frame. - Otherwise we probably got the placeholder data from an aborted pass 1. - Also make sure the total frame count doesn't overflow an integer.*/ - buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] - +_enc->rc.frames_total[2]; - if(_enc->rc.frames_total[0]==0||buf_delay<0|| - (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]|| - (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){ - _enc->rc.frames_total[0]=0; - _enc->rc.twopass_buffer_bytes=0; - return TH_EBADHEADER; - } - /*Got a valid header; set up pass 2.*/ - _enc->rc.frames_left[0]=_enc->rc.frames_total[0]; - _enc->rc.frames_left[1]=_enc->rc.frames_total[1]; - _enc->rc.frames_left[2]=_enc->rc.frames_total[2]; - /*If the user hasn't specified a buffer size, use the whole file.*/ - if(_enc->rc.frame_metrics==NULL){ - _enc->rc.buf_delay=buf_delay; - _enc->rc.nframes[0]=_enc->rc.frames_total[0]; - _enc->rc.nframes[1]=_enc->rc.frames_total[1]; - _enc->rc.nframes[2]=_enc->rc.frames_total[2]; - _enc->rc.scale_sum[0]=scale_sum[0]; - _enc->rc.scale_sum[1]=scale_sum[1]; - _enc->rc.scale_window_end=buf_delay; - oc_enc_rc_reset(_enc); - } - _enc->rc.exp[0]=exp[0]; - _enc->rc.exp[1]=exp[1]; - /*Clear the header data from the buffer to make room for packet data.*/ - _enc->rc.twopass_buffer_fill=0; - _enc->rc.twopass_buffer_bytes=0; - } - } - if(_enc->rc.frames_total[0]!=0){ - ogg_int64_t curframe_num; - int nframes_total; - curframe_num=_enc->state.curframe_num; - if(curframe_num>=0){ - /*We just encoded a frame; make sure things matched.*/ - if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){ - _enc->rc.twopass_buffer_bytes=0; - return TH_EINVAL; - } - } - curframe_num+=_enc->prev_dup_count+1; - nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] - +_enc->rc.frames_total[2]; - if(curframe_num>=nframes_total){ - /*We don't want any more data after the last frame, and we don't want to - allow any more frames to be encoded.*/ - _enc->rc.twopass_buffer_bytes=0; - } - else if(_enc->rc.twopass_buffer_bytes==0){ - if(_enc->rc.frame_metrics==NULL){ - /*We're using a whole-file buffer:*/ - if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill; - consumed=oc_rc_buffer_fill(&_enc->rc, - _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); - if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ - ogg_uint32_t dup_count; - ogg_int32_t log_scale; - int qti; - int arg; - /*Read the metrics for the next frame.*/ - dup_count=oc_rc_unbuffer_val(&_enc->rc,4); - log_scale=oc_rc_unbuffer_val(&_enc->rc,4); - _enc->rc.cur_metrics.log_scale=log_scale; - qti=(dup_count&0x80000000)>>31; - _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF; - _enc->rc.cur_metrics.frame_type=qti; - _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME; - /*"Helpfully" set the dup count back to what it was in pass 1.*/ - arg=_enc->rc.cur_metrics.dup_count; - th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); - /*Clear the buffer for the next frame.*/ - _enc->rc.twopass_buffer_fill=0; - } - } - else{ - int frames_needed; - /*We're using a finite buffer:*/ - frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay - -(_enc->rc.scale_window_end-_enc->rc.scale_window0), - _enc->rc.frames_left[0]+_enc->rc.frames_left[1] - -_enc->rc.nframes[0]-_enc->rc.nframes[1]); - while(frames_needed>0){ - if(!_buf){ - return OC_RC_2PASS_PACKET_SZ*frames_needed - -_enc->rc.twopass_buffer_fill; - } - consumed=oc_rc_buffer_fill(&_enc->rc, - _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); - if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ - oc_frame_metrics *m; - int fmi; - ogg_uint32_t dup_count; - ogg_int32_t log_scale; - int qti; - /*Read the metrics for the next frame.*/ - dup_count=oc_rc_unbuffer_val(&_enc->rc,4); - log_scale=oc_rc_unbuffer_val(&_enc->rc,4); - /*Add the to the circular buffer.*/ - fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++; - if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics; - m=_enc->rc.frame_metrics+fmi; - m->log_scale=log_scale; - qti=(dup_count&0x80000000)>>31; - m->dup_count=dup_count&0x7FFFFFFF; - m->frame_type=qti; - /*And accumulate the statistics over the window.*/ - _enc->rc.nframes[qti]++; - _enc->rc.nframes[2]+=m->dup_count; - _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale); - _enc->rc.scale_window_end+=m->dup_count+1; - /*Compute an upper bound on the number of remaining packets needed - for the current window.*/ - frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay - -(_enc->rc.scale_window_end-_enc->rc.scale_window0), - _enc->rc.frames_left[0]+_enc->rc.frames_left[1] - -_enc->rc.nframes[0]-_enc->rc.nframes[1]); - /*Clear the buffer for the next frame.*/ - _enc->rc.twopass_buffer_fill=0; - _enc->rc.twopass_buffer_bytes=0; - } - /*Go back for more data.*/ - else break; - } - /*If we've got all the frames we need, fill in the current metrics. - We're ready to go.*/ - if(frames_needed<=0){ - int arg; - *&_enc->rc.cur_metrics= - *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head); - _enc->rc.twopass_force_kf= - _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME; - /*"Helpfully" set the dup count back to what it was in pass 1.*/ - arg=_enc->rc.cur_metrics.dup_count; - th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); - /*Mark us ready for the next frame.*/ - _enc->rc.twopass_buffer_bytes=1; - } - } - } - } - return (int)consumed; -} diff --git a/drivers/theora/state.c b/drivers/theora/state.c deleted file mode 100644 index 42ed33a9a3..0000000000 --- a/drivers/theora/state.c +++ /dev/null @@ -1,1227 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include -#include -#include "internal.h" -#if defined(OC_X86_ASM) -#if defined(_MSC_VER) -# include "x86_vc/x86int.h" -#else -# include "x86/x86int.h" -#endif -#endif -#if defined(OC_DUMP_IMAGES) -# include -# include "png.h" -#endif - -/*Returns the fragment index of the top-left block in a macro block. - This can be used to test whether or not the whole macro block is valid. - _sb_map: The super block map. - _quadi: The quadrant number. - Return: The index of the fragment of the upper left block in the macro - block, or -1 if the block lies outside the coded frame.*/ -static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ - /*It so happens that under the Hilbert curve ordering described below, the - upper-left block in each macro block is at index 0, except in macro block - 3, where it is at index 2.*/ - return _sb_map[_quadi][_quadi&_quadi<<1]; -} - -/*Fills in the mapping from block positions to fragment numbers for a single - color plane. - This function also fills in the "valid" flag of each quadrant in the super - block flags. - _sb_maps: The array of super block maps for the color plane. - _sb_flags: The array of super block flags for the color plane. - _frag0: The index of the first fragment in the plane. - _hfrags: The number of horizontal fragments in a coded frame. - _vfrags: The number of vertical fragments in a coded frame.*/ -static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], - oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ - /*Contains the (macro_block,block) indices for a 4x4 grid of - fragments. - The pattern is a 4x4 Hilbert space-filling curve. - A Hilbert curve has the nice property that as the curve grows larger, its - fractal dimension approaches 2. - The intuition is that nearby blocks in the curve are also close spatially, - with the previous element always an immediate neighbor, so that runs of - blocks should be well correlated.*/ - static const int SB_MAP[4][4][2]={ - {{0,0},{0,1},{3,2},{3,3}}, - {{0,3},{0,2},{3,1},{3,0}}, - {{1,0},{1,3},{2,0},{2,3}}, - {{1,1},{1,2},{2,1},{2,2}} - }; - ptrdiff_t yfrag; - unsigned sbi; - int y; - sbi=0; - yfrag=_frag0; - for(y=0;;y+=4){ - int imax; - int x; - /*Figure out how many columns of blocks in this super block lie within the - image.*/ - imax=_vfrags-y; - if(imax>4)imax=4; - else if(imax<=0)break; - for(x=0;;x+=4,sbi++){ - ptrdiff_t xfrag; - int jmax; - int quadi; - int i; - /*Figure out how many rows of blocks in this super block lie within the - image.*/ - jmax=_hfrags-x; - if(jmax>4)jmax=4; - else if(jmax<=0)break; - /*By default, set all fragment indices to -1.*/ - memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi])); - /*Fill in the fragment map for this super block.*/ - xfrag=yfrag+x; - for(i=0;i=0)<nhfrags+_xfrag0+j; - } -} - -/*Fills in the chroma plane fragment maps for a macro block. - This version is for use with chroma decimated in the X and Y directions - (4:2:0). - _mb_map: The macro block map to fill. - _fplanes: The descriptions of the fragment planes. - _xfrag0: The X location of the upper-left hand fragment in the luma plane. - _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ -static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], - const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ - ptrdiff_t fragi; - _xfrag0>>=1; - _yfrag0>>=1; - fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; - _mb_map[1][0]=fragi+_fplanes[1].froffset; - _mb_map[2][0]=fragi+_fplanes[2].froffset; -} - -/*Fills in the chroma plane fragment maps for a macro block. - This version is for use with chroma decimated in the Y direction. - _mb_map: The macro block map to fill. - _fplanes: The descriptions of the fragment planes. - _xfrag0: The X location of the upper-left hand fragment in the luma plane. - _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ -static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], - const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ - ptrdiff_t fragi; - int j; - _yfrag0>>=1; - fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; - for(j=0;j<2;j++){ - _mb_map[1][j]=fragi+_fplanes[1].froffset; - _mb_map[2][j]=fragi+_fplanes[2].froffset; - fragi++; - } -} - -/*Fills in the chroma plane fragment maps for a macro block. - This version is for use with chroma decimated in the X direction (4:2:2). - _mb_map: The macro block map to fill. - _fplanes: The descriptions of the fragment planes. - _xfrag0: The X location of the upper-left hand fragment in the luma plane. - _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ -static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], - const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ - ptrdiff_t fragi; - int i; - _xfrag0>>=1; - fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; - for(i=0;i<2;i++){ - _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; - _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; - fragi+=_fplanes[1].nhfrags; - } -} - -/*Fills in the chroma plane fragment maps for a macro block. - This version is for use with no chroma decimation (4:4:4). - This uses the already filled-in luma plane values. - _mb_map: The macro block map to fill. - _fplanes: The descriptions of the fragment planes.*/ -static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], - const oc_fragment_plane _fplanes[3]){ - int k; - for(k=0;k<4;k++){ - _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; - _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; - } -} - -/*The function type used to fill in the chroma plane fragment maps for a - macro block. - _mb_map: The macro block map to fill. - _fplanes: The descriptions of the fragment planes. - _xfrag0: The X location of the upper-left hand fragment in the luma plane. - _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ -typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], - const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); - -/*A table of functions used to fill in the chroma plane fragment maps for a - macro block for each type of chrominance decimation.*/ -static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ - oc_mb_fill_cmapping00, - oc_mb_fill_cmapping01, - oc_mb_fill_cmapping10, - (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11 -}; - -/*Fills in the mapping from macro blocks to their corresponding fragment - numbers in each plane. - _mb_maps: The list of macro block maps. - _mb_modes: The list of macro block modes; macro blocks completely outside - the coded region are marked invalid. - _fplanes: The descriptions of the fragment planes. - _pixel_fmt: The chroma decimation type.*/ -static void oc_mb_create_mapping(oc_mb_map _mb_maps[], - signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ - oc_mb_fill_cmapping_func mb_fill_cmapping; - unsigned sbi; - int y; - mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; - /*Loop through the luma plane super blocks.*/ - for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ - int x; - for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ - int ymb; - /*Loop through the macro blocks in each super block in display order.*/ - for(ymb=0;ymb<2;ymb++){ - int xmb; - for(xmb=0;xmb<2;xmb++){ - unsigned mbi; - int mbx; - int mby; - mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; - mbx=x|xmb<<1; - mby=y|ymb<<1; - /*Initialize fragment indices to -1.*/ - memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); - /*Make sure this macro block is within the encoded region.*/ - if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ - _mb_modes[mbi]=OC_MODE_INVALID; - continue; - } - /*Fill in the fragment indices for the luma plane.*/ - oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); - /*Fill in the fragment indices for the chroma planes.*/ - (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); - } - } - } - } -} - -/*Marks the fragments which fall all or partially outside the displayable - region of the frame. - _state: The Theora state containing the fragments to be marked.*/ -static void oc_state_border_init(oc_theora_state *_state){ - oc_fragment *frag; - oc_fragment *yfrag_end; - oc_fragment *xfrag_end; - oc_fragment_plane *fplane; - int crop_x0; - int crop_y0; - int crop_xf; - int crop_yf; - int pli; - int y; - int x; - /*The method we use here is slow, but the code is dead simple and handles - all the special cases easily. - We only ever need to do it once.*/ - /*Loop through the fragments, marking those completely outside the - displayable region and constructing a border mask for those that straddle - the border.*/ - _state->nborders=0; - yfrag_end=frag=_state->frags; - for(pli=0;pli<3;pli++){ - fplane=_state->fplanes+pli; - /*Set up the cropping rectangle for this plane.*/ - crop_x0=_state->info.pic_x; - crop_xf=_state->info.pic_x+_state->info.pic_width; - crop_y0=_state->info.pic_y; - crop_yf=_state->info.pic_y+_state->info.pic_height; - if(pli>0){ - if(!(_state->info.pixel_fmt&1)){ - crop_x0=crop_x0>>1; - crop_xf=crop_xf+1>>1; - } - if(!(_state->info.pixel_fmt&2)){ - crop_y0=crop_y0>>1; - crop_yf=crop_yf+1>>1; - } - } - y=0; - for(yfrag_end+=fplane->nfrags;fragnhfrags;frag=crop_xf||crop_y0>=crop_yf){ - frag->invalid=1; - } - /*Otherwise, check to see if it straddles the border.*/ - else if(x=crop_x0&&x+j=crop_y0&&y+i=_state->nborders){ - _state->nborders++; - _state->borders[i].mask=mask; - _state->borders[i].npixels=npixels; - } - else if(_state->borders[i].mask!=mask)continue; - frag->borderi=i; - break; - } - } - else frag->borderi=-1; - } - } - } -} - -static int oc_state_frarray_init(oc_theora_state *_state){ - int yhfrags; - int yvfrags; - int chfrags; - int cvfrags; - ptrdiff_t yfrags; - ptrdiff_t cfrags; - ptrdiff_t nfrags; - unsigned yhsbs; - unsigned yvsbs; - unsigned chsbs; - unsigned cvsbs; - unsigned ysbs; - unsigned csbs; - unsigned nsbs; - size_t nmbs; - int hdec; - int vdec; - int pli; - /*Figure out the number of fragments in each plane.*/ - /*These parameters have already been validated to be multiples of 16.*/ - yhfrags=_state->info.frame_width>>3; - yvfrags=_state->info.frame_height>>3; - hdec=!(_state->info.pixel_fmt&1); - vdec=!(_state->info.pixel_fmt&2); - chfrags=yhfrags+hdec>>hdec; - cvfrags=yvfrags+vdec>>vdec; - yfrags=yhfrags*(ptrdiff_t)yvfrags; - cfrags=chfrags*(ptrdiff_t)cvfrags; - nfrags=yfrags+2*cfrags; - /*Figure out the number of super blocks in each plane.*/ - yhsbs=yhfrags+3>>2; - yvsbs=yvfrags+3>>2; - chsbs=chfrags+3>>2; - cvsbs=cvfrags+3>>2; - ysbs=yhsbs*yvsbs; - csbs=chsbs*cvsbs; - nsbs=ysbs+2*csbs; - nmbs=(size_t)ysbs<<2; - /*Check for overflow. - We support the ridiculous upper limits of the specification (1048560 by - 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, - but for those with 32-bit pointers (or smaller!) we have to check. - If the caller wants to prevent denial-of-service by imposing a more - reasonable upper limit on the size of attempted allocations, they must do - so themselves; we have no platform independent way to determine how much - system memory there is nor an application-independent way to decide what a - "reasonable" allocation is.*/ - if(yfrags/yhfrags!=yvfrags||2*cfrags>2!=ysbs){ - return TH_EIMPL; - } - /*Initialize the fragment array.*/ - _state->fplanes[0].nhfrags=yhfrags; - _state->fplanes[0].nvfrags=yvfrags; - _state->fplanes[0].froffset=0; - _state->fplanes[0].nfrags=yfrags; - _state->fplanes[0].nhsbs=yhsbs; - _state->fplanes[0].nvsbs=yvsbs; - _state->fplanes[0].sboffset=0; - _state->fplanes[0].nsbs=ysbs; - _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; - _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; - _state->fplanes[1].froffset=yfrags; - _state->fplanes[2].froffset=yfrags+cfrags; - _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; - _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; - _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; - _state->fplanes[1].sboffset=ysbs; - _state->fplanes[2].sboffset=ysbs+csbs; - _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; - _state->nfrags=nfrags; - _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); - _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); - _state->nsbs=nsbs; - _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); - _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); - _state->nhmbs=yhsbs<<1; - _state->nvmbs=yvsbs<<1; - _state->nmbs=nmbs; - _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); - _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); - _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); - if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| - _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| - _state->coded_fragis==NULL){ - return TH_EFAULT; - } - /*Create the mapping from super blocks to fragments.*/ - for(pli=0;pli<3;pli++){ - oc_fragment_plane *fplane; - fplane=_state->fplanes+pli; - oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, - _state->sb_flags+fplane->sboffset,fplane->froffset, - fplane->nhfrags,fplane->nvfrags); - } - /*Create the mapping from macro blocks to fragments.*/ - oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, - _state->fplanes,_state->info.pixel_fmt); - /*Initialize the invalid and borderi fields of each fragment.*/ - oc_state_border_init(_state); - return 0; -} - -static void oc_state_frarray_clear(oc_theora_state *_state){ - _ogg_free(_state->coded_fragis); - _ogg_free(_state->mb_modes); - _ogg_free(_state->mb_maps); - _ogg_free(_state->sb_flags); - _ogg_free(_state->sb_maps); - _ogg_free(_state->frag_mvs); - _ogg_free(_state->frags); -} - - -/*Initializes the buffers used for reconstructed frames. - These buffers are padded with 16 extra pixels on each side, to allow - unrestricted motion vectors without special casing the boundary. - If chroma is decimated in either direction, the padding is reduced by a - factor of 2 on the appropriate sides. - _nrefs: The number of reference buffers to init; must be 3 or 4.*/ -static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ - th_info *info; - unsigned char *ref_frame_data; - size_t ref_frame_data_sz; - size_t ref_frame_sz; - size_t yplane_sz; - size_t cplane_sz; - int yhstride; - int yheight; - int chstride; - int cheight; - ptrdiff_t yoffset; - ptrdiff_t coffset; - ptrdiff_t *frag_buf_offs; - ptrdiff_t fragi; - int hdec; - int vdec; - int rfi; - int pli; - if(_nrefs<3||_nrefs>4)return TH_EINVAL; - info=&_state->info; - /*Compute the image buffer parameters for each plane.*/ - hdec=!(info->pixel_fmt&1); - vdec=!(info->pixel_fmt&2); - yhstride=info->frame_width+2*OC_UMV_PADDING; - yheight=info->frame_height+2*OC_UMV_PADDING; - chstride=yhstride>>hdec; - cheight=yheight>>vdec; - yplane_sz=yhstride*(size_t)yheight; - cplane_sz=chstride*(size_t)cheight; - yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; - coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; - ref_frame_sz=yplane_sz+2*cplane_sz; - ref_frame_data_sz=_nrefs*ref_frame_sz; - /*Check for overflow. - The same caveats apply as for oc_state_frarray_init().*/ - if(yplane_sz/yhstride!=yheight||2*cplane_szfrag_buf_offs= - _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); - if(ref_frame_data==NULL||frag_buf_offs==NULL){ - _ogg_free(frag_buf_offs); - _ogg_free(ref_frame_data); - return TH_EFAULT; - } - /*Set up the width, height and stride for the image buffers.*/ - _state->ref_frame_bufs[0][0].width=info->frame_width; - _state->ref_frame_bufs[0][0].height=info->frame_height; - _state->ref_frame_bufs[0][0].stride=yhstride; - _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= - info->frame_width>>hdec; - _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= - info->frame_height>>vdec; - _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= - chstride; - for(rfi=1;rfi<_nrefs;rfi++){ - memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], - sizeof(_state->ref_frame_bufs[0])); - } - /*Set up the data pointers for the image buffers.*/ - for(rfi=0;rfi<_nrefs;rfi++){ - _state->ref_frame_data[rfi]=ref_frame_data; - _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; - ref_frame_data+=yplane_sz; - _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; - ref_frame_data+=cplane_sz; - _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; - ref_frame_data+=cplane_sz; - /*Flip the buffer upside down. - This allows us to decode Theora's bottom-up frames in their natural - order, yet return a top-down buffer with a positive stride to the user.*/ - oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], - _state->ref_frame_bufs[rfi]); - } - _state->ref_ystride[0]=-yhstride; - _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; - /*Initialize the fragment buffer offsets.*/ - ref_frame_data=_state->ref_frame_data[0]; - fragi=0; - for(pli=0;pli<3;pli++){ - th_img_plane *iplane; - oc_fragment_plane *fplane; - unsigned char *vpix; - ptrdiff_t stride; - ptrdiff_t vfragi_end; - int nhfrags; - iplane=_state->ref_frame_bufs[0]+pli; - fplane=_state->fplanes+pli; - vpix=iplane->data; - vfragi_end=fplane->froffset+fplane->nfrags; - nhfrags=fplane->nhfrags; - stride=iplane->stride; - while(fragiref_frame_idx[OC_FRAME_GOLD]= - _state->ref_frame_idx[OC_FRAME_PREV]= - _state->ref_frame_idx[OC_FRAME_SELF]=-1; - _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1; - return 0; -} - -static void oc_state_ref_bufs_clear(oc_theora_state *_state){ - _ogg_free(_state->frag_buf_offs); - _ogg_free(_state->ref_frame_data[0]); -} - - -void oc_state_vtable_init_c(oc_theora_state *_state){ - _state->opt_vtable.frag_copy=oc_frag_copy_c; - _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; - _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; - _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; - _state->opt_vtable.idct8x8=oc_idct8x8_c; - _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; - _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c; - _state->opt_vtable.state_loop_filter_frag_rows= - oc_state_loop_filter_frag_rows_c; - _state->opt_vtable.restore_fpu=oc_restore_fpu_c; - _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; -} - -/*Initialize the accelerated function pointers.*/ -void oc_state_vtable_init(oc_theora_state *_state){ -#if defined(OC_X86_ASM) - oc_state_vtable_init_x86(_state); -#else - oc_state_vtable_init_c(_state); -#endif -} - - -int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ - int ret; - /*First validate the parameters.*/ - if(_info==NULL)return TH_EFAULT; - /*The width and height of the encoded frame must be multiples of 16. - They must also, when divided by 16, fit into a 16-bit unsigned integer. - The displayable frame offset coordinates must fit into an 8-bit unsigned - integer. - Note that the offset Y in the API is specified on the opposite side from - how it is specified in the bitstream, because the Y axis is flipped in - the bitstream. - The displayable frame must fit inside the encoded frame. - The color space must be one known by the encoder.*/ - if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| - _info->frame_width<=0||_info->frame_width>=0x100000|| - _info->frame_height<=0||_info->frame_height>=0x100000|| - _info->pic_x+_info->pic_width>_info->frame_width|| - _info->pic_y+_info->pic_height>_info->frame_height|| - _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| - /*Note: the following <0 comparisons may generate spurious warnings on - platforms where enums are unsigned. - We could cast them to unsigned and just use the following >= comparison, - but there are a number of compilers which will mis-optimize this. - It's better to live with the spurious warnings.*/ - _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| - _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ - return TH_EINVAL; - } - memset(_state,0,sizeof(*_state)); - memcpy(&_state->info,_info,sizeof(*_info)); - /*Invert the sense of pic_y to match Theora's right-handed coordinate - system.*/ - _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; - _state->frame_type=OC_UNKWN_FRAME; - oc_state_vtable_init(_state); - ret=oc_state_frarray_init(_state); - if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); - if(ret<0){ - oc_state_frarray_clear(_state); - return ret; - } - /*If the keyframe_granule_shift is out of range, use the maximum allowable - value.*/ - if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ - _state->info.keyframe_granule_shift=31; - } - _state->keyframe_num=0; - _state->curframe_num=-1; - /*3.2.0 streams mark the frame index instead of the frame count. - This was changed with stream version 3.2.1 to conform to other Ogg - codecs. - We add an extra bias when computing granule positions for new streams.*/ - _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); - return 0; -} - -void oc_state_clear(oc_theora_state *_state){ - oc_state_ref_bufs_clear(_state); - oc_state_frarray_clear(_state); -} - - -/*Duplicates the pixels on the border of the image plane out into the - surrounding padding for use by unrestricted motion vectors. - This function only adds the left and right borders, and only for the fragment - rows specified. - _refi: The index of the reference buffer to pad. - _pli: The color plane. - _y0: The Y coordinate of the first row to pad. - _yend: The Y coordinate of the row to stop padding at.*/ -void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, - int _y0,int _yend){ - th_img_plane *iplane; - unsigned char *apix; - unsigned char *bpix; - unsigned char *epix; - int stride; - int hpadding; - hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); - iplane=_state->ref_frame_bufs[_refi]+_pli; - stride=iplane->stride; - apix=iplane->data+_y0*(ptrdiff_t)stride; - bpix=apix+iplane->width-1; - epix=iplane->data+_yend*(ptrdiff_t)stride; - /*Note the use of != instead of <, which allows the stride to be negative.*/ - while(apix!=epix){ - memset(apix-hpadding,apix[0],hpadding); - memset(bpix+1,bpix[0],hpadding); - apix+=stride; - bpix+=stride; - } -} - -/*Duplicates the pixels on the border of the image plane out into the - surrounding padding for use by unrestricted motion vectors. - This function only adds the top and bottom borders, and must be called after - the left and right borders are added. - _refi: The index of the reference buffer to pad. - _pli: The color plane.*/ -void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ - th_img_plane *iplane; - unsigned char *apix; - unsigned char *bpix; - unsigned char *epix; - int stride; - int hpadding; - int vpadding; - int fullw; - hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); - vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); - iplane=_state->ref_frame_bufs[_refi]+_pli; - stride=iplane->stride; - fullw=iplane->width+(hpadding<<1); - apix=iplane->data-hpadding; - bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; - epix=apix-stride*(ptrdiff_t)vpadding; - while(apix!=epix){ - memcpy(apix-stride,apix,fullw); - memcpy(bpix+stride,bpix,fullw); - apix-=stride; - bpix+=stride; - } -} - -/*Duplicates the pixels on the border of the given reference image out into - the surrounding padding for use by unrestricted motion vectors. - _state: The context containing the reference buffers. - _refi: The index of the reference buffer to pad.*/ -void oc_state_borders_fill(oc_theora_state *_state,int _refi){ - int pli; - for(pli=0;pli<3;pli++){ - oc_state_borders_fill_rows(_state,_refi,pli,0, - _state->ref_frame_bufs[_refi][pli].height); - oc_state_borders_fill_caps(_state,_refi,pli); - } -} - -/*Determines the offsets in an image buffer to use for motion compensation. - _state: The Theora state the offsets are to be computed with. - _offsets: Returns the offset for the buffer(s). - _offsets[0] is always set. - _offsets[1] is set if the motion vector has non-zero fractional - components. - _pli: The color plane index. - _dx: The X component of the motion vector. - _dy: The Y component of the motion vector. - Return: The number of offsets returned: 1 or 2.*/ -int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], - int _pli,int _dx,int _dy){ - /*Here is a brief description of how Theora handles motion vectors: - Motion vector components are specified to half-pixel accuracy in - undecimated directions of each plane, and quarter-pixel accuracy in - decimated directions. - Integer parts are extracted by dividing (not shifting) by the - appropriate amount, with truncation towards zero. - These integer values are used to calculate the first offset. - - If either of the fractional parts are non-zero, then a second offset is - computed. - No third or fourth offsets are computed, even if both components have - non-zero fractional parts. - The second offset is computed by dividing (not shifting) by the - appropriate amount, always truncating _away_ from zero.*/ -#if 0 - /*This version of the code doesn't use any tables, but is slower.*/ - int ystride; - int xprec; - int yprec; - int xfrac; - int yfrac; - int offs; - ystride=_state->ref_ystride[_pli]; - /*These two variables decide whether we are in half- or quarter-pixel - precision in each component.*/ - xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); - yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); - /*These two variables are either 0 if all the fractional bits are zero or -1 - if any of them are non-zero.*/ - xfrac=OC_SIGNMASK(-(_dx&(xprec|1))); - yfrac=OC_SIGNMASK(-(_dy&(yprec|1))); - offs=(_dx>>xprec)+(_dy>>yprec)*ystride; - if(xfrac||yfrac){ - int xmask; - int ymask; - xmask=OC_SIGNMASK(_dx); - ymask=OC_SIGNMASK(_dy); - yfrac&=ystride; - _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); - _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); - return 2; - } - else{ - _offsets[0]=offs; - return 1; - } -#else - /*Using tables simplifies the code, and there's enough arithmetic to hide the - latencies of the memory references.*/ - static const signed char OC_MVMAP[2][64]={ - { - -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, - -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, - 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, - 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 - }, - { - -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, - -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 - } - }; - static const signed char OC_MVMAP2[2][64]={ - { - -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, - 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 - }, - { - -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, - 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 - } - }; - int ystride; - int qpx; - int qpy; - int mx; - int my; - int mx2; - int my2; - int offs; - ystride=_state->ref_ystride[_pli]; - qpy=_pli!=0&&!(_state->info.pixel_fmt&2); - my=OC_MVMAP[qpy][_dy+31]; - my2=OC_MVMAP2[qpy][_dy+31]; - qpx=_pli!=0&&!(_state->info.pixel_fmt&1); - mx=OC_MVMAP[qpx][_dx+31]; - mx2=OC_MVMAP2[qpx][_dx+31]; - offs=my*ystride+mx; - if(mx2||my2){ - _offsets[1]=offs+my2*ystride+mx2; - _offsets[0]=offs; - return 2; - } - _offsets[0]=offs; - return 1; -#endif -} - -void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ - _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs, - _last_zzi,_dc_quant); -} - -void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ - unsigned char *dst; - ptrdiff_t frag_buf_off; - int ystride; - int mb_mode; - /*Apply the inverse transform.*/ - /*Special case only having a DC component.*/ - if(_last_zzi<2){ - ogg_int16_t p; - int ci; - /*We round this dequant product (and not any of the others) because there's - no iDCT rounding.*/ - p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); - /*LOOP VECTORIZES.*/ - for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p; - } - else{ - /*First, dequantize the DC coefficient.*/ - _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); - oc_idct8x8(_state,_dct_coeffs,_last_zzi); - } - /*Fill in the target buffer.*/ - frag_buf_off=_state->frag_buf_offs[_fragi]; - mb_mode=_state->frags[_fragi].mb_mode; - ystride=_state->ref_ystride[_pli]; - dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; - if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs); - else{ - const unsigned char *ref; - int mvoffsets[2]; - ref= - _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] - +frag_buf_off; - if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, - _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ - oc_frag_recon_inter2(_state, - dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs); - } - else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs); - } -} - -/*Copies the fragments specified by the lists of fragment indices from one - frame to another. - _fragis: A pointer to a list of fragment indices. - _nfragis: The number of fragment indices to copy. - _dst_frame: The reference frame to copy to. - _src_frame: The reference frame to copy from. - _pli: The color plane the fragments lie in.*/ -void oc_state_frag_copy_list(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli){ - _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame, - _src_frame,_pli); -} - -void oc_state_frag_copy_list_c(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli){ - const ptrdiff_t *frag_buf_offs; - const unsigned char *src_frame_data; - unsigned char *dst_frame_data; - ptrdiff_t fragii; - int ystride; - dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; - src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; - ystride=_state->ref_ystride[_pli]; - frag_buf_offs=_state->frag_buf_offs; - for(fragii=0;fragii<_nfragis;fragii++){ - ptrdiff_t frag_buf_off; - frag_buf_off=frag_buf_offs[_fragis[fragii]]; - oc_frag_copy(_state,dst_frame_data+frag_buf_off, - src_frame_data+frag_buf_off,ystride); - } -} - -static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){ - int y; - _pix-=2; - for(y=0;y<8;y++){ - int f; - f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); - /*The _bv array is used to compute the function - f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); - where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ - f=*(_bv+(f+4>>3)); - _pix[1]=OC_CLAMP255(_pix[1]+f); - _pix[2]=OC_CLAMP255(_pix[2]-f); - _pix+=_ystride; - } -} - -static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){ - int x; - _pix-=_ystride*2; - for(x=0;x<8;x++){ - int f; - f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); - /*The _bv array is used to compute the function - f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); - where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ - f=*(_bv+(f+4>>3)); - _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); - _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); - } -} - -/*Initialize the bounding values array used by the loop filter. - _bv: Storage for the array. - Return: 0 on success, or a non-zero value if no filtering need be applied.*/ -int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){ - int flimit; - int i; - flimit=_state->loop_filter_limits[_state->qis[0]]; - if(flimit==0)return 1; - memset(_bv,0,sizeof(_bv[0])*256); - for(i=0;i=0)_bv[127-i-flimit]=i-flimit; - _bv[127-i]=-i; - _bv[127+i]=i; - if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i; - } - return 0; -} - -/*Apply the loop filter to a given set of fragment rows in the given plane. - The filter may be run on the bottom edge, affecting pixels in the next row of - fragments, so this row also needs to be available. - _bv: The bounding values array. - _refi: The index of the frame buffer to filter. - _pli: The color plane to filter. - _fragy0: The Y coordinate of the first fragment row to filter. - _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ -void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256], - int _refi,int _pli,int _fragy0,int _fragy_end){ - _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli, - _fragy0,_fragy_end); -} - -void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end){ - const oc_fragment_plane *fplane; - const oc_fragment *frags; - const ptrdiff_t *frag_buf_offs; - unsigned char *ref_frame_data; - ptrdiff_t fragi_top; - ptrdiff_t fragi_bot; - ptrdiff_t fragi0; - ptrdiff_t fragi0_end; - int ystride; - int nhfrags; - _bv+=127; - fplane=_state->fplanes+_pli; - nhfrags=fplane->nhfrags; - fragi_top=fplane->froffset; - fragi_bot=fragi_top+fplane->nfrags; - fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; - fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; - ystride=_state->ref_ystride[_pli]; - frags=_state->frags; - frag_buf_offs=_state->frag_buf_offs; - ref_frame_data=_state->ref_frame_data[_refi]; - /*The following loops are constructed somewhat non-intuitively on purpose. - The main idea is: if a block boundary has at least one coded fragment on - it, the filter is applied to it. - However, the order that the filters are applied in matters, and VP3 chose - the somewhat strange ordering used below.*/ - while(fragi0fragi0)loop_filter_h(ref,ystride,_bv); - if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); - if(fragi+1info.frame_width; - height=_state->info.frame_height; - iframe=_state->granpos>>_state->info.keyframe_granule_shift; - pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); - sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); - fp=fopen(fname,"wb"); - if(fp==NULL)return TH_EFAULT; - image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); - if(image==NULL){ - fclose(fp); - return TH_EFAULT; - } - png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); - if(png==NULL){ - oc_free_2d(image); - fclose(fp); - return TH_EFAULT; - } - info=png_create_info_struct(png); - if(info==NULL){ - png_destroy_write_struct(&png,NULL); - oc_free_2d(image); - fclose(fp); - return TH_EFAULT; - } - if(setjmp(png_jmpbuf(png))){ - png_destroy_write_struct(&png,&info); - oc_free_2d(image); - fclose(fp); - return TH_EFAULT; - } - framei=_state->ref_frame_idx[_frame]; - y_row=_state->ref_frame_bufs[framei][0].data; - u_row=_state->ref_frame_bufs[framei][1].data; - v_row=_state->ref_frame_bufs[framei][2].data; - y_stride=_state->ref_frame_bufs[framei][0].stride; - u_stride=_state->ref_frame_bufs[framei][1].stride; - v_stride=_state->ref_frame_bufs[framei][2].stride; - /*Chroma up-sampling is just done with a box filter. - This is very likely what will actually be used in practice on a real - display, and also removes one more layer to search in for the source of - artifacts. - As an added bonus, it's dead simple.*/ - for(imgi=height;imgi-->0;){ - int dc; - y=y_row; - u=u_row; - v=v_row; - for(imgj=0;imgj<6*width;){ - float yval; - float uval; - float vval; - unsigned rval; - unsigned gval; - unsigned bval; - /*This is intentionally slow and very accurate.*/ - yval=(*y-16)*(1.0F/219); - uval=(*u-128)*(2*(1-0.114F)/224); - vval=(*v-128)*(2*(1-0.299F)/224); - rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); - gval=OC_CLAMPI(0,(int)(65535*( - yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); - bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); - image[imgi][imgj++]=(unsigned char)(rval>>8); - image[imgi][imgj++]=(unsigned char)(rval&0xFF); - image[imgi][imgj++]=(unsigned char)(gval>>8); - image[imgi][imgj++]=(unsigned char)(gval&0xFF); - image[imgi][imgj++]=(unsigned char)(bval>>8); - image[imgi][imgj++]=(unsigned char)(bval&0xFF); - dc=(y-y_row&1)|(_state->info.pixel_fmt&1); - y++; - u+=dc; - v+=dc; - } - dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); - y_row+=y_stride; - u_row+=dc&u_stride; - v_row+=dc&v_stride; - } - png_init_io(png,fp); - png_set_compression_level(png,Z_BEST_COMPRESSION); - png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, - PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); - switch(_state->info.colorspace){ - case TH_CS_ITU_REC_470M:{ - png_set_gAMA(png,info,2.2); - png_set_cHRM_fixed(png,info,31006,31616, - 67000,32000,21000,71000,14000,8000); - }break; - case TH_CS_ITU_REC_470BG:{ - png_set_gAMA(png,info,2.67); - png_set_cHRM_fixed(png,info,31271,32902, - 64000,33000,29000,60000,15000,6000); - }break; - default:break; - } - png_set_pHYs(png,info,_state->info.aspect_numerator, - _state->info.aspect_denominator,0); - png_set_rows(png,info,image); - png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); - png_write_end(png,info); - png_destroy_write_struct(&png,&info); - oc_free_2d(image); - fclose(fp); - return 0; -} -#endif - - - -ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ - oc_theora_state *state; - state=(oc_theora_state *)_encdec; - if(_granpos>=0){ - ogg_int64_t iframe; - ogg_int64_t pframe; - iframe=_granpos>>state->info.keyframe_granule_shift; - pframe=_granpos-(iframe<info.keyframe_granule_shift); - /*3.2.0 streams store the frame index in the granule position. - 3.2.1 and later store the frame count. - We return the index, so adjust the value if we have a 3.2.1 or later - stream.*/ - return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); - } - return -1; -} - -double th_granule_time(void *_encdec,ogg_int64_t _granpos){ - oc_theora_state *state; - state=(oc_theora_state *)_encdec; - if(_granpos>=0){ - return (th_granule_frame(_encdec, _granpos)+1)*( - (double)state->info.fps_denominator/state->info.fps_numerator); - } - return -1; -} diff --git a/drivers/theora/theora.exp b/drivers/theora/theora.exp deleted file mode 100644 index b4e0225f1e..0000000000 --- a/drivers/theora/theora.exp +++ /dev/null @@ -1,55 +0,0 @@ -# export list for libtheora -_theora_version_string -_theora_version_number -_theora_encode_init -_theora_encode_YUVin -_theora_encode_packetout -_theora_encode_header -_theora_encode_comment -_theora_encode_tables -_theora_decode_header -_theora_decode_init -_theora_decode_packetin -_theora_decode_YUVout -_theora_control -_theora_packet_isheader -_theora_packet_iskeyframe -_theora_granule_shift -_theora_granule_frame -_theora_granule_time -_theora_info_init -_theora_info_clear -_theora_clear -_theora_comment_init -_theora_comment_add -_theora_comment_add_tag -_theora_comment_query -_theora_comment_query_count -_theora_comment_clear -_th_version_string -_th_version_number -_th_decode_headerin -_th_decode_alloc -_th_setup_free -_th_decode_ctl -_th_decode_packetin -_th_decode_ycbcr_out -_th_decode_free -_th_packet_isheader -_th_packet_iskeyframe -_th_granule_frame -_th_granule_time -_th_info_init -_th_info_clear -_th_comment_init -_th_comment_add -_th_comment_add_tag -_th_comment_query -_th_comment_query_count -_th_comment_clear -_th_encode_alloc -_th_encode_ctl -_th_encode_flushheader -_th_encode_packetout -_th_encode_ycbcr_in -_th_encode_free diff --git a/drivers/theora/theora.h b/drivers/theora/theora.h deleted file mode 100644 index af6eb6f380..0000000000 --- a/drivers/theora/theora.h +++ /dev/null @@ -1,784 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: theora.h,v 1.17 2003/12/06 18:06:19 arc Exp $ - - ********************************************************************/ - -#ifndef _O_THEORA_H_ -#define _O_THEORA_H_ - -#ifdef __cplusplus -extern "C" -{ -#endif /* __cplusplus */ - -#include /* for size_t */ - -#include - -/** \file - * The libtheora pre-1.0 legacy C API. - * - * \ingroup oldfuncs - * - * \section intro Introduction - * - * This is the documentation for the libtheora legacy C API, declared in - * the theora.h header, which describes the old interface used before - * the 1.0 release. This API was widely deployed for several years and - * remains supported, but for new code we recommend the cleaner API - * declared in theoradec.h and theoraenc.h. - * - * libtheora is the reference implementation for - * Theora, a free video codec. - * Theora is derived from On2's VP3 codec with improved integration with - * Ogg multimedia formats by Xiph.Org. - * - * \section overview Overview - * - * This library will both decode and encode theora packets to/from raw YUV - * frames. In either case, the packets will most likely either come from or - * need to be embedded in an Ogg stream. Use - * libogg or - * liboggz - * to extract/package these packets. - * - * \section decoding Decoding Process - * - * Decoding can be separated into the following steps: - * -# initialise theora_info and theora_comment structures using - * theora_info_init() and theora_comment_init(): - \verbatim - theora_info info; - theora_comment comment; - - theora_info_init(&info); - theora_comment_init(&comment); - \endverbatim - * -# retrieve header packets from Ogg stream (there should be 3) and decode - * into theora_info and theora_comment structures using - * theora_decode_header(). See \ref identification for more information on - * identifying which packets are theora packets. - \verbatim - int i; - for (i = 0; i < 3; i++) - { - (get a theora packet "op" from the Ogg stream) - theora_decode_header(&info, &comment, op); - } - \endverbatim - * -# initialise the decoder based on the information retrieved into the - * theora_info struct by theora_decode_header(). You will need a - * theora_state struct. - \verbatim - theora_state state; - - theora_decode_init(&state, &info); - \endverbatim - * -# pass in packets and retrieve decoded frames! See the yuv_buffer - * documentation for information on how to retrieve raw YUV data. - \verbatim - yuf_buffer buffer; - while (last packet was not e_o_s) { - (get a theora packet "op" from the Ogg stream) - theora_decode_packetin(&state, op); - theora_decode_YUVout(&state, &buffer); - } - \endverbatim - * - * - * \subsection identification Identifying Theora Packets - * - * All streams inside an Ogg file have a unique serial_no attached to the - * stream. Typically, you will want to - * - retrieve the serial_no for each b_o_s (beginning of stream) page - * encountered within the Ogg file; - * - test the first (only) packet on that page to determine if it is a theora - * packet; - * - once you have found a theora b_o_s page then use the retrieved serial_no - * to identify future packets belonging to the same theora stream. - * - * Note that you \e cannot use theora_packet_isheader() to determine if a - * packet is a theora packet or not, as this function does not perform any - * checking beyond whether a header bit is present. Instead, use the - * theora_decode_header() function and check the return value; or examine the - * header bytes at the beginning of the Ogg page. - */ - - -/** \defgroup oldfuncs Legacy pre-1.0 C API */ -/* @{ */ - -/** - * A YUV buffer for passing uncompressed frames to and from the codec. - * This holds a Y'CbCr frame in planar format. The CbCr planes can be - * subsampled and have their own separate dimensions and row stride - * offsets. Note that the strides may be negative in some - * configurations. For theora the width and height of the largest plane - * must be a multiple of 16. The actual meaningful picture size and - * offset are stored in the theora_info structure; frames returned by - * the decoder may need to be cropped for display. - * - * All samples are 8 bits. Within each plane samples are ordered by - * row from the top of the frame to the bottom. Within each row samples - * are ordered from left to right. - * - * During decode, the yuv_buffer struct is allocated by the user, but all - * fields (including luma and chroma pointers) are filled by the library. - * These pointers address library-internal memory and their contents should - * not be modified. - * - * Conversely, during encode the user allocates the struct and fills out all - * fields. The user also manages the data addressed by the luma and chroma - * pointers. See the encoder_example.c and dump_video.c example files in - * theora/examples/ for more information. - */ -typedef struct { - int y_width; /**< Width of the Y' luminance plane */ - int y_height; /**< Height of the luminance plane */ - int y_stride; /**< Offset in bytes between successive rows */ - - int uv_width; /**< Width of the Cb and Cr chroma planes */ - int uv_height; /**< Height of the chroma planes */ - int uv_stride; /**< Offset between successive chroma rows */ - unsigned char *y; /**< Pointer to start of luminance data */ - unsigned char *u; /**< Pointer to start of Cb data */ - unsigned char *v; /**< Pointer to start of Cr data */ - -} yuv_buffer; - -/** - * A Colorspace. - */ -typedef enum { - OC_CS_UNSPECIFIED, /**< The colorspace is unknown or unspecified */ - OC_CS_ITU_REC_470M, /**< This is the best option for 'NTSC' content */ - OC_CS_ITU_REC_470BG, /**< This is the best option for 'PAL' content */ - OC_CS_NSPACES /**< This marks the end of the defined colorspaces */ -} theora_colorspace; - -/** - * A Chroma subsampling - * - * These enumerate the available chroma subsampling options supported - * by the theora format. See Section 4.4 of the specification for - * exact definitions. - */ -typedef enum { - OC_PF_420, /**< Chroma subsampling by 2 in each direction (4:2:0) */ - OC_PF_RSVD, /**< Reserved value */ - OC_PF_422, /**< Horizonatal chroma subsampling by 2 (4:2:2) */ - OC_PF_444, /**< No chroma subsampling at all (4:4:4) */ -} theora_pixelformat; - -/** - * Theora bitstream info. - * Contains the basic playback parameters for a stream, - * corresponding to the initial 'info' header packet. - * - * Encoded theora frames must be a multiple of 16 in width and height. - * To handle other frame sizes, a crop rectangle is specified in - * frame_height and frame_width, offset_x and * offset_y. The offset - * and size should still be a multiple of 2 to avoid chroma sampling - * shifts. Offset values in this structure are measured from the - * upper left of the image. - * - * Frame rate, in frames per second, is stored as a rational - * fraction. Aspect ratio is also stored as a rational fraction, and - * refers to the aspect ratio of the frame pixels, not of the - * overall frame itself. - * - * See - * examples/encoder_example.c for usage examples of the - * other paramters and good default settings for the encoder parameters. - */ -typedef struct { - ogg_uint32_t width; /**< encoded frame width */ - ogg_uint32_t height; /**< encoded frame height */ - ogg_uint32_t frame_width; /**< display frame width */ - ogg_uint32_t frame_height; /**< display frame height */ - ogg_uint32_t offset_x; /**< horizontal offset of the displayed frame */ - ogg_uint32_t offset_y; /**< vertical offset of the displayed frame */ - ogg_uint32_t fps_numerator; /**< frame rate numerator **/ - ogg_uint32_t fps_denominator; /**< frame rate denominator **/ - ogg_uint32_t aspect_numerator; /**< pixel aspect ratio numerator */ - ogg_uint32_t aspect_denominator; /**< pixel aspect ratio denominator */ - theora_colorspace colorspace; /**< colorspace */ - int target_bitrate; /**< nominal bitrate in bits per second */ - int quality; /**< Nominal quality setting, 0-63 */ - int quick_p; /**< Quick encode/decode */ - - /* decode only */ - unsigned char version_major; - unsigned char version_minor; - unsigned char version_subminor; - - void *codec_setup; - - /* encode only */ - int dropframes_p; - int keyframe_auto_p; - ogg_uint32_t keyframe_frequency; - ogg_uint32_t keyframe_frequency_force; /* also used for decode init to - get granpos shift correct */ - ogg_uint32_t keyframe_data_target_bitrate; - ogg_int32_t keyframe_auto_threshold; - ogg_uint32_t keyframe_mindistance; - ogg_int32_t noise_sensitivity; - ogg_int32_t sharpness; - - theora_pixelformat pixelformat; /**< chroma subsampling mode to expect */ - -} theora_info; - -/** Codec internal state and context. - */ -typedef struct{ - theora_info *i; - ogg_int64_t granulepos; - - void *internal_encode; - void *internal_decode; - -} theora_state; - -/** - * Comment header metadata. - * - * This structure holds the in-stream metadata corresponding to - * the 'comment' header packet. - * - * Meta data is stored as a series of (tag, value) pairs, in - * length-encoded string vectors. The first occurence of the - * '=' character delimits the tag and value. A particular tag - * may occur more than once. The character set encoding for - * the strings is always UTF-8, but the tag names are limited - * to case-insensitive ASCII. See the spec for details. - * - * In filling in this structure, theora_decode_header() will - * null-terminate the user_comment strings for safety. However, - * the bitstream format itself treats them as 8-bit clean, - * and so the length array should be treated as authoritative - * for their length. - */ -typedef struct theora_comment{ - char **user_comments; /**< An array of comment string vectors */ - int *comment_lengths; /**< An array of corresponding string vector lengths in bytes */ - int comments; /**< The total number of comment string vectors */ - char *vendor; /**< The vendor string identifying the encoder, null terminated */ - -} theora_comment; - - -/**\name theora_control() codes */ -/* \anchor decctlcodes_old - * These are the available request codes for theora_control() - * when called with a decoder instance. - * By convention decoder control codes are odd, to distinguish - * them from \ref encctlcodes_old "encoder control codes" which - * are even. - * - * Note that since the 1.0 release, both the legacy and the final - * implementation accept all the same control codes, but only the - * final API declares the newer codes. - * - * Keep any experimental or vendor-specific values above \c 0x8000.*/ - -/*@{*/ - -/**Get the maximum post-processing level. - * The decoder supports a post-processing filter that can improve - * the appearance of the decoded images. This returns the highest - * level setting for this post-processor, corresponding to maximum - * improvement and computational expense. - */ -#define TH_DECCTL_GET_PPLEVEL_MAX (1) - -/**Set the post-processing level. - * Sets the level of post-processing to use when decoding the - * compressed stream. This must be a value between zero (off) - * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. - */ -#define TH_DECCTL_SET_PPLEVEL (3) - -/**Sets the maximum distance between key frames. - * This can be changed during an encode, but will be bounded by - * 1<. - * If it is set before encoding begins, th_info#keyframe_granule_shift will - * be enlarged appropriately. - * - * \param[in] buf ogg_uint32_t: The maximum distance between key - * frames. - * \param[out] buf ogg_uint32_t: The actual maximum distance set. - * \retval OC_FAULT \a theora_state or \a buf is NULL. - * \retval OC_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). - * \retval OC_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) - -/**Set the granule position. - * Call this after a seek, to update the internal granulepos - * in the decoder, to insure that subsequent frames are marked - * properly. If you track timestamps yourself and do not use - * the granule postion returned by the decoder, then you do - * not need to use this control. - */ -#define TH_DECCTL_SET_GRANPOS (5) - -/**\anchor encctlcodes_old */ - -/**Sets the quantization parameters to use. - * The parameters are copied, not stored by reference, so they can be freed - * after this call. - * NULL may be specified to revert to the default parameters. - * - * \param[in] buf #th_quant_info - * \retval OC_FAULT \a theora_state is NULL. - * \retval OC_EINVAL Encoding has already begun, the quantization parameters - * are not acceptable to this version of the encoder, - * \a buf is NULL and \a buf_sz is not zero, - * or \a buf is non-NULL and \a buf_sz is - * not sizeof(#th_quant_info). - * \retval OC_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_QUANT_PARAMS (2) - -/**Disables any encoder features that would prevent lossless transcoding back - * to VP3. - * This primarily means disabling block-level QI values and not using 4MV mode - * when any of the luma blocks in a macro block are not coded. - * It also includes using the VP3 quantization tables and Huffman codes; if you - * set them explicitly after calling this function, the resulting stream will - * not be VP3-compatible. - * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source - * material, or when using a picture region smaller than the full frame (e.g. - * a non-multiple-of-16 width or height), then non-VP3 bitstream features will - * still be disabled, but the stream will still not be VP3-compatible, as VP3 - * was not capable of encoding such formats. - * If you call this after encoding has already begun, then the quantization - * tables and codebooks cannot be changed, but the frame-level features will - * be enabled or disabled as requested. - * - * \param[in] buf int: a non-zero value to enable VP3 compatibility, - * or 0 to disable it (the default). - * \param[out] buf int: 1 if all bitstream features required for - * VP3-compatibility could be set, and 0 otherwise. - * The latter will be returned if the pixel format is not - * 4:2:0, the picture region is smaller than the full frame, - * or if encoding has begun, preventing the quantization - * tables and codebooks from being set. - * \retval OC_FAULT \a theora_state or \a buf is NULL. - * \retval OC_EINVAL \a buf_sz is not sizeof(int). - * \retval OC_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_VP3_COMPATIBLE (10) - -/**Gets the maximum speed level. - * Higher speed levels favor quicker encoding over better quality per bit. - * Depending on the encoding mode, and the internal algorithms used, quality - * may actually improve, but in this case bitrate will also likely increase. - * In any case, overall rate/distortion performance will probably decrease. - * The maximum value, and the meaning of each value, may change depending on - * the current encoding mode (VBR vs. CQI, etc.). - * - * \param[out] buf int: The maximum encoding speed level. - * \retval OC_FAULT \a theora_state or \a buf is NULL. - * \retval OC_EINVAL \a buf_sz is not sizeof(int). - * \retval OC_IMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_GET_SPLEVEL_MAX (12) - -/**Sets the speed level. - * By default a speed value of 1 is used. - * - * \param[in] buf int: The new encoding speed level. - * 0 is slowest, larger values use less CPU. - * \retval OC_FAULT \a theora_state or \a buf is NULL. - * \retval OC_EINVAL \a buf_sz is not sizeof(int), or the - * encoding speed level is out of bounds. - * The maximum encoding speed level may be - * implementation- and encoding mode-specific, and can be - * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. - * \retval OC_IMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_SET_SPLEVEL (14) - -/*@}*/ - -#define OC_FAULT -1 /**< General failure */ -#define OC_EINVAL -10 /**< Library encountered invalid internal data */ -#define OC_DISABLED -11 /**< Requested action is disabled */ -#define OC_BADHEADER -20 /**< Header packet was corrupt/invalid */ -#define OC_NOTFORMAT -21 /**< Packet is not a theora packet */ -#define OC_VERSION -22 /**< Bitstream version is not handled */ -#define OC_IMPL -23 /**< Feature or action not implemented */ -#define OC_BADPACKET -24 /**< Packet is corrupt */ -#define OC_NEWPACKET -25 /**< Packet is an (ignorable) unhandled extension */ -#define OC_DUPFRAME 1 /**< Packet is a dropped frame */ - -/** - * Retrieve a human-readable string to identify the encoder vendor and version. - * \returns A version string. - */ -extern const char *theora_version_string(void); - -/** - * Retrieve a 32-bit version number. - * This number is composed of a 16-bit major version, 8-bit minor version - * and 8 bit sub-version, composed as follows: -
-   (VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
-
-* \returns The version number. -*/ -extern ogg_uint32_t theora_version_number(void); - -/** - * Initialize the theora encoder. - * \param th The theora_state handle to initialize for encoding. - * \param ti A theora_info struct filled with the desired encoding parameters. - * \retval 0 Success - */ -extern int theora_encode_init(theora_state *th, theora_info *ti); - -/** - * Submit a YUV buffer to the theora encoder. - * \param t A theora_state handle previously initialized for encoding. - * \param yuv A buffer of YUV data to encode. Note that both the yuv_buffer - * struct and the luma/chroma buffers within should be allocated by - * the user. - * \retval OC_EINVAL Encoder is not ready, or is finished. - * \retval -1 The size of the given frame differs from those previously input - * \retval 0 Success - */ -extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv); - -/** - * Request the next packet of encoded video. - * The encoded data is placed in a user-provided ogg_packet structure. - * \param t A theora_state handle previously initialized for encoding. - * \param last_p whether this is the last packet the encoder should produce. - * \param op An ogg_packet structure to fill. libtheora will set all - * elements of this structure, including a pointer to encoded - * data. The memory for the encoded data is owned by libtheora. - * \retval 0 No internal storage exists OR no packet is ready - * \retval -1 The encoding process has completed - * \retval 1 Success - */ -extern int theora_encode_packetout( theora_state *t, int last_p, - ogg_packet *op); - -/** - * Request a packet containing the initial header. - * A pointer to the header data is placed in a user-provided ogg_packet - * structure. - * \param t A theora_state handle previously initialized for encoding. - * \param op An ogg_packet structure to fill. libtheora will set all - * elements of this structure, including a pointer to the header - * data. The memory for the header data is owned by libtheora. - * \retval 0 Success - */ -extern int theora_encode_header(theora_state *t, ogg_packet *op); - -/** - * Request a comment header packet from provided metadata. - * A pointer to the comment data is placed in a user-provided ogg_packet - * structure. - * \param tc A theora_comment structure filled with the desired metadata - * \param op An ogg_packet structure to fill. libtheora will set all - * elements of this structure, including a pointer to the encoded - * comment data. The memory for the comment data is owned by - * libtheora. - * \retval 0 Success - */ -extern int theora_encode_comment(theora_comment *tc, ogg_packet *op); - -/** - * Request a packet containing the codebook tables for the stream. - * A pointer to the codebook data is placed in a user-provided ogg_packet - * structure. - * \param t A theora_state handle previously initialized for encoding. - * \param op An ogg_packet structure to fill. libtheora will set all - * elements of this structure, including a pointer to the codebook - * data. The memory for the header data is owned by libtheora. - * \retval 0 Success - */ -extern int theora_encode_tables(theora_state *t, ogg_packet *op); - -/** - * Decode an Ogg packet, with the expectation that the packet contains - * an initial header, comment data or codebook tables. - * - * \param ci A theora_info structure to fill. This must have been previously - * initialized with theora_info_init(). If \a op contains an initial - * header, theora_decode_header() will fill \a ci with the - * parsed header values. If \a op contains codebook tables, - * theora_decode_header() will parse these and attach an internal - * representation to \a ci->codec_setup. - * \param cc A theora_comment structure to fill. If \a op contains comment - * data, theora_decode_header() will fill \a cc with the parsed - * comments. - * \param op An ogg_packet structure which you expect contains an initial - * header, comment data or codebook tables. - * - * \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet - * has the signature of an initial packet, but op is - * not a b_o_s packet; OR this packet has the signature - * of an initial header packet, but an initial header - * packet has already been seen; OR this packet has the - * signature of a comment packet, but the initial header - * has not yet been seen; OR this packet has the signature - * of a comment packet, but contains invalid data; OR - * this packet has the signature of codebook tables, - * but the initial header or comments have not yet - * been seen; OR this packet has the signature of codebook - * tables, but contains invalid data; - * OR the stream being decoded has a compatible version - * but this packet does not have the signature of a - * theora initial header, comments, or codebook packet - * \retval OC_VERSION The packet data of \a op is an initial header with - * a version which is incompatible with this version of - * libtheora. - * \retval OC_NEWPACKET the stream being decoded has an incompatible (future) - * version and contains an unknown signature. - * \retval 0 Success - * - * \note The normal usage is that theora_decode_header() be called on the - * first three packets of a theora logical bitstream in succession. - */ -extern int theora_decode_header(theora_info *ci, theora_comment *cc, - ogg_packet *op); - -/** - * Initialize a theora_state handle for decoding. - * \param th The theora_state handle to initialize. - * \param c A theora_info struct filled with the desired decoding parameters. - * This is of course usually obtained from a previous call to - * theora_decode_header(). - * \retval 0 Success - */ -extern int theora_decode_init(theora_state *th, theora_info *c); - -/** - * Input a packet containing encoded data into the theora decoder. - * \param th A theora_state handle previously initialized for decoding. - * \param op An ogg_packet containing encoded theora data. - * \retval 0 Success - * \retval OC_BADPACKET \a op does not contain encoded video data - */ -extern int theora_decode_packetin(theora_state *th,ogg_packet *op); - -/** - * Output the next available frame of decoded YUV data. - * \param th A theora_state handle previously initialized for decoding. - * \param yuv A yuv_buffer in which libtheora should place the decoded data. - * Note that the buffer struct itself is allocated by the user, but - * that the luma and chroma pointers will be filled in by the - * library. Also note that these luma and chroma regions should be - * considered read-only by the user. - * \retval 0 Success - */ -extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv); - -/** - * Report whether a theora packet is a header or not - * This function does no verification beyond checking the header - * flag bit so it should not be used for bitstream identification; - * use theora_decode_header() for that. - * - * \param op An ogg_packet containing encoded theora data. - * \retval 1 The packet is a header packet - * \retval 0 The packet is not a header packet (and so contains frame data) - * - * Thus function was added in the 1.0alpha4 release. - */ -extern int theora_packet_isheader(ogg_packet *op); - -/** - * Report whether a theora packet is a keyframe or not - * - * \param op An ogg_packet containing encoded theora data. - * \retval 1 The packet contains a keyframe image - * \retval 0 The packet is contains an interframe delta - * \retval -1 The packet is not an image data packet at all - * - * Thus function was added in the 1.0alpha4 release. - */ -extern int theora_packet_iskeyframe(ogg_packet *op); - -/** - * Report the granulepos shift radix - * - * When embedded in Ogg, Theora uses a two-part granulepos, - * splitting the 64-bit field into two pieces. The more-significant - * section represents the frame count at the last keyframe, - * and the less-significant section represents the count of - * frames since the last keyframe. In this way the overall - * field is still non-decreasing with time, but usefully encodes - * a pointer to the last keyframe, which is necessary for - * correctly restarting decode after a seek. - * - * This function reports the number of bits used to represent - * the distance to the last keyframe, and thus how the granulepos - * field must be shifted or masked to obtain the two parts. - * - * Since libtheora returns compressed data in an ogg_packet - * structure, this may be generally useful even if the Theora - * packets are not being used in an Ogg container. - * - * \param ti A previously initialized theora_info struct - * \returns The bit shift dividing the two granulepos fields - * - * This function was added in the 1.0alpha5 release. - */ -int theora_granule_shift(theora_info *ti); - -/** - * Convert a granulepos to an absolute frame index, starting at 0. - * The granulepos is interpreted in the context of a given theora_state handle. - * - * Note that while the granulepos encodes the frame count (i.e. starting - * from 1) this call returns the frame index, starting from zero. Thus - * One can calculate the presentation time by multiplying the index by - * the rate. - * - * \param th A previously initialized theora_state handle (encode or decode) - * \param granulepos The granulepos to convert. - * \returns The frame index corresponding to \a granulepos. - * \retval -1 The given granulepos is undefined (i.e. negative) - * - * Thus function was added in the 1.0alpha4 release. - */ -extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos); - -/** - * Convert a granulepos to absolute time in seconds. The granulepos is - * interpreted in the context of a given theora_state handle, and gives - * the end time of a frame's presentation as used in Ogg mux ordering. - * - * \param th A previously initialized theora_state handle (encode or decode) - * \param granulepos The granulepos to convert. - * \returns The absolute time in seconds corresponding to \a granulepos. - * This is the "end time" for the frame, or the latest time it should - * be displayed. - * It is not the presentation time. - * \retval -1. The given granulepos is undefined (i.e. negative), or - * \retval -1. The function has been disabled because floating - * point support is not available. - */ -extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos); - -/** - * Initialize a theora_info structure. All values within the given theora_info - * structure are initialized, and space is allocated within libtheora for - * internal codec setup data. - * \param c A theora_info struct to initialize. - */ -extern void theora_info_init(theora_info *c); - -/** - * Clear a theora_info structure. All values within the given theora_info - * structure are cleared, and associated internal codec setup data is freed. - * \param c A theora_info struct to initialize. - */ -extern void theora_info_clear(theora_info *c); - -/** - * Free all internal data associated with a theora_state handle. - * \param t A theora_state handle. - */ -extern void theora_clear(theora_state *t); - -/** - * Initialize an allocated theora_comment structure - * \param tc An allocated theora_comment structure - **/ -extern void theora_comment_init(theora_comment *tc); - -/** - * Add a comment to an initialized theora_comment structure - * \param tc A previously initialized theora comment structure - * \param comment A null-terminated string encoding the comment in the form - * "TAG=the value" - * - * Neither theora_comment_add() nor theora_comment_add_tag() support - * comments containing null values, although the bitstream format - * supports this. To add such comments you will need to manipulate - * the theora_comment structure directly. - **/ - -extern void theora_comment_add(theora_comment *tc, char *comment); - -/** - * Add a comment to an initialized theora_comment structure. - * \param tc A previously initialized theora comment structure - * \param tag A null-terminated string containing the tag - * associated with the comment. - * \param value The corresponding value as a null-terminated string - * - * Neither theora_comment_add() nor theora_comment_add_tag() support - * comments containing null values, although the bitstream format - * supports this. To add such comments you will need to manipulate - * the theora_comment structure directly. - **/ -extern void theora_comment_add_tag(theora_comment *tc, - char *tag, char *value); - -/** - * Look up a comment value by tag. - * \param tc Tn initialized theora_comment structure - * \param tag The tag to look up - * \param count The instance of the tag. The same tag can appear multiple - * times, each with a distinct and ordered value, so an index - * is required to retrieve them all. - * \returns A pointer to the queried tag's value - * \retval NULL No matching tag is found - * - * \note Use theora_comment_query_count() to get the legal range for the - * count parameter. - **/ - -extern char *theora_comment_query(theora_comment *tc, char *tag, int count); - -/** Look up the number of instances of a tag. - * \param tc An initialized theora_comment structure - * \param tag The tag to look up - * \returns The number on instances of a particular tag. - * - * Call this first when querying for a specific tag and then interate - * over the number of instances with separate calls to - * theora_comment_query() to retrieve all instances in order. - **/ -extern int theora_comment_query_count(theora_comment *tc, char *tag); - -/** - * Clear an allocated theora_comment struct so that it can be freed. - * \param tc An allocated theora_comment structure. - **/ -extern void theora_comment_clear(theora_comment *tc); - -/**Encoder control function. - * This is used to provide advanced control the encoding process. - * \param th A #theora_state handle. - * \param req The control code to process. - * See \ref encctlcodes_old "the list of available - * control codes" for details. - * \param buf The parameters for this control code. - * \param buf_sz The size of the parameter buffer.*/ -extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz); - -/* @} */ /* end oldfuncs doxygen group */ - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _O_THEORA_H_ */ diff --git a/drivers/theora/theoradec.h b/drivers/theora/theoradec.h deleted file mode 100644 index b20f0e3a64..0000000000 --- a/drivers/theora/theoradec.h +++ /dev/null @@ -1,325 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ - - ********************************************************************/ - -/**\file - * The libtheoradec C decoding API.*/ - -#if !defined(_O_THEORA_THEORADEC_H_) -# define _O_THEORA_THEORADEC_H_ (1) -# include -# include -# include "codec.h" - -#if defined(__cplusplus) -extern "C" { -#endif - - - -/**\name th_decode_ctl() codes - * \anchor decctlcodes - * These are the available request codes for th_decode_ctl(). - * By convention, these are odd, to distinguish them from the - * \ref encctlcodes "encoder control codes". - * Keep any experimental or vendor-specific values above \c 0x8000.*/ -/*@{*/ -/**Gets the maximum post-processing level. - * The decoder supports a post-processing filter that can improve - * the appearance of the decoded images. This returns the highest - * level setting for this post-processor, corresponding to maximum - * improvement and computational expense. - * - * \param[out] _buf int: The maximum post-processing level. - * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int). - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_DECCTL_GET_PPLEVEL_MAX (1) -/**Sets the post-processing level. - * By default, post-processing is disabled. - * - * Sets the level of post-processing to use when decoding the - * compressed stream. This must be a value between zero (off) - * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. - * - * \param[in] _buf int: The new post-processing level. - * 0 to disable; larger values use more CPU. - * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the - * post-processing level is out of bounds. - * The maximum post-processing level may be - * implementation-specific, and can be obtained via - * #TH_DECCTL_GET_PPLEVEL_MAX. - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_DECCTL_SET_PPLEVEL (3) -/**Sets the granule position. - * Call this after a seek, before decoding the first frame, to ensure that the - * proper granule position is returned for all subsequent frames. - * If you track timestamps yourself and do not use the granule position - * returned by the decoder, then you need not call this function. - * - * \param[in] _buf ogg_int64_t: The granule position of the next - * frame. - * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_int64_t), or the - * granule position is negative.*/ -#define TH_DECCTL_SET_GRANPOS (5) -/**Sets the striped decode callback function. - * If set, this function will be called as each piece of a frame is fully - * decoded in th_decode_packetin(). - * You can pass in a #th_stripe_callback with - * th_stripe_callback#stripe_decoded set to NULL to disable the - * callbacks at any point. - * Enabling striped decode does not prevent you from calling - * th_decode_ycbcr_out() after the frame is fully decoded. - * - * \param[in] _buf #th_stripe_callback: The callback parameters. - * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not - * sizeof(th_stripe_callback).*/ -#define TH_DECCTL_SET_STRIPE_CB (7) - -/**Enables telemetry and sets the macroblock display mode */ -#define TH_DECCTL_SET_TELEMETRY_MBMODE (9) -/**Enables telemetry and sets the motion vector display mode */ -#define TH_DECCTL_SET_TELEMETRY_MV (11) -/**Enables telemetry and sets the adaptive quantization display mode */ -#define TH_DECCTL_SET_TELEMETRY_QI (13) -/**Enables telemetry and sets the bitstream breakdown visualization mode */ -#define TH_DECCTL_SET_TELEMETRY_BITS (15) -/*@}*/ - - - -/**A callback function for striped decode. - * This is a function pointer to an application-provided function that will be - * called each time a section of the image is fully decoded in - * th_decode_packetin(). - * This allows the application to process the section immediately, while it is - * still in cache. - * Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily - * decrease with each call until it reaches 0, at which point the full frame - * is decoded. - * The number of fragment rows made available in each call depends on the pixel - * format and the number of post-processing filters enabled, and may not even - * be constant for the entire frame. - * If a non-NULL \a _granpos pointer is passed to - * th_decode_packetin(), the granule position for the frame will be stored - * in it before the first callback is made. - * If an entire frame is dropped (a 0-byte packet), then no callbacks will be - * made at all for that frame. - * \param _ctx An application-provided context pointer. - * \param _buf The image buffer for the decoded frame. - * \param _yfrag0 The Y coordinate of the first row of 8x8 fragments - * decoded. - * Multiply this by 8 to obtain the pixel row number in the - * luma plane. - * If the chroma planes are subsampled in the Y direction, - * this will always be divisible by two. - * \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past - * the newly decoded section. - * If the chroma planes are subsampled in the Y direction, - * this will always be divisible by two. - * I.e., this section contains fragment rows - * \a _yfrag0 ...\a _yfrag_end -1.*/ -typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf, - int _yfrag0,int _yfrag_end); - -/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/ -typedef struct{ - /**An application-provided context pointer. - * This will be passed back verbatim to the application.*/ - void *ctx; - /**The callback function pointer.*/ - th_stripe_decoded_func stripe_decoded; -}th_stripe_callback; - - - -/**\name Decoder state - The following data structures are opaque, and their contents are not - publicly defined by this API. - Referring to their internals directly is unsupported, and may break without - warning.*/ -/*@{*/ -/**The decoder context.*/ -typedef struct th_dec_ctx th_dec_ctx; -/**Setup information. - This contains auxiliary information (Huffman tables and quantization - parameters) decoded from the setup header by th_decode_headerin() to be - passed to th_decode_alloc(). - It can be re-used to initialize any number of decoders, and can be freed - via th_setup_free() at any time.*/ -typedef struct th_setup_info th_setup_info; -/*@}*/ - - - -/**\defgroup decfuncs Functions for Decoding*/ -/*@{*/ -/**\name Functions for decoding - * You must link to libtheoradec if you use any of the - * functions in this section. - * - * The functions are listed in the order they are used in a typical decode. - * The basic steps are: - * - Parse the header packets by repeatedly calling th_decode_headerin(). - * - Allocate a #th_dec_ctx handle with th_decode_alloc(). - * - Call th_setup_free() to free any memory used for codec setup - * information. - * - Perform any additional decoder configuration with th_decode_ctl(). - * - For each video data packet: - * - Submit the packet to the decoder via th_decode_packetin(). - * - Retrieve the uncompressed video data via th_decode_ycbcr_out(). - * - Call th_decode_free() to release all decoder memory.*/ -/*@{*/ -/**Decodes the header packets of a Theora stream. - * This should be called on the initial packets of the stream, in succession, - * until it returns 0, indicating that all headers have been - * processed, or an error is encountered. - * At least three header packets are required, and additional optional header - * packets may follow. - * This can be used on the first packet of any logical stream to determine if - * that stream is a Theora stream. - * \param _info A #th_info structure to fill in. - * This must have been previously initialized with - * th_info_init(). - * The application may immediately begin using the contents of - * this structure after the first header is decoded, though it - * must continue to be passed in on all subsequent calls. - * \param _tc A #th_comment structure to fill in. - * The application may immediately begin using the contents of - * this structure after the second header is decoded, though it - * must continue to be passed in on all subsequent calls. - * \param _setup Returns a pointer to additional, private setup information - * needed by the decoder. - * The contents of this pointer must be initialized to - * NULL on the first call, and the returned value must - * continue to be passed in on all subsequent calls. - * \param _op An ogg_packet structure which contains one of the - * initial packets of an Ogg logical stream. - * \return A positive value indicates that a Theora header was successfully - * processed. - * \retval 0 The first video data packet was encountered after all - * required header packets were parsed. - * The packet just passed in on this call should be saved - * and fed to th_decode_packetin() to begin decoding - * video data. - * \retval TH_EFAULT One of \a _info, \a _tc, or \a _setup was - * NULL. - * \retval TH_EBADHEADER \a _op was NULL, the packet was not the next - * header packet in the expected sequence, or the format - * of the header data was invalid. - * \retval TH_EVERSION The packet data was a Theora info header, but for a - * bitstream version not decodable with this version of - * libtheoradec. - * \retval TH_ENOTFORMAT The packet was not a Theora header. - */ -extern int th_decode_headerin(th_info *_info,th_comment *_tc, - th_setup_info **_setup,ogg_packet *_op); -/**Allocates a decoder instance. - * - * Security Warning: The Theora format supports very large frame sizes, - * potentially even larger than the address space of a 32-bit machine, and - * creating a decoder context allocates the space for several frames of data. - * If the allocation fails here, your program will crash, possibly at some - * future point because the OS kernel returned a valid memory range and will - * only fail when it tries to map the pages in it the first time they are - * used. - * Even if it succeeds, you may experience a denial of service if the frame - * size is large enough to cause excessive paging. - * If you are integrating libtheora in a larger application where such things - * are undesirable, it is highly recommended that you check the frame size in - * \a _info before calling this function and refuse to decode streams where it - * is larger than some reasonable maximum. - * libtheora will not check this for you, because there may be machines that - * can handle such streams and applications that wish to. - * \param _info A #th_info struct filled via th_decode_headerin(). - * \param _setup A #th_setup_info handle returned via - * th_decode_headerin(). - * \return The initialized #th_dec_ctx handle. - * \retval NULL If the decoding parameters were invalid.*/ -extern th_dec_ctx *th_decode_alloc(const th_info *_info, - const th_setup_info *_setup); -/**Releases all storage used for the decoder setup information. - * This should be called after you no longer want to create any decoders for - * a stream whose headers you have parsed with th_decode_headerin(). - * \param _setup The setup information to free. - * This can safely be NULL.*/ -extern void th_setup_free(th_setup_info *_setup); -/**Decoder control function. - * This is used to provide advanced control of the decoding process. - * \param _dec A #th_dec_ctx handle. - * \param _req The control code to process. - * See \ref decctlcodes "the list of available control codes" - * for details. - * \param _buf The parameters for this control code. - * \param _buf_sz The size of the parameter buffer.*/ -extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, - size_t _buf_sz); -/**Submits a packet containing encoded video data to the decoder. - * \param _dec A #th_dec_ctx handle. - * \param _op An ogg_packet containing encoded video data. - * \param _granpos Returns the granule position of the decoded packet. - * If non-NULL, the granule position for this specific - * packet is stored in this location. - * This is computed incrementally from previously decoded - * packets. - * After a seek, the correct granule position must be set via - * #TH_DECCTL_SET_GRANPOS for this to work properly. - * \retval 0 Success. - * A new decoded frame can be retrieved by calling - * th_decode_ycbcr_out(). - * \retval TH_DUPFRAME The packet represented a dropped (0-byte) frame. - * The player can skip the call to th_decode_ycbcr_out(), - * as the contents of the decoded frame buffer have not - * changed. - * \retval TH_EFAULT \a _dec or \a _op was NULL. - * \retval TH_EBADPACKET \a _op does not contain encoded video data. - * \retval TH_EIMPL The video data uses bitstream features which this - * library does not support.*/ -extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, - ogg_int64_t *_granpos); -/**Outputs the next available frame of decoded Y'CbCr data. - * If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB, - * then the application does not need to call this function. - * \param _dec A #th_dec_ctx handle. - * \param _ycbcr A video buffer structure to fill in. - * libtheoradec will fill in all the members of this - * structure, including the pointers to the uncompressed video - * data. - * The memory for this video data is owned by - * libtheoradec. - * It may be freed or overwritten without notification when - * subsequent frames are decoded. - * \retval 0 Success - * \retval TH_EFAULT \a _dec or \a _ycbcr was NULL. - */ -extern int th_decode_ycbcr_out(th_dec_ctx *_dec, - th_ycbcr_buffer _ycbcr); -/**Frees an allocated decoder instance. - * \param _dec A #th_dec_ctx handle.*/ -extern void th_decode_free(th_dec_ctx *_dec); -/*@}*/ -/*@}*/ - - - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/drivers/theora/theoraenc.h b/drivers/theora/theoraenc.h deleted file mode 100644 index fdf2ab21e2..0000000000 --- a/drivers/theora/theoraenc.h +++ /dev/null @@ -1,486 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ - - ********************************************************************/ - -/**\file - * The libtheoraenc C encoding API.*/ - -#if !defined(_O_THEORA_THEORAENC_H_) -# define _O_THEORA_THEORAENC_H_ (1) -# include -# include -# include "codec.h" - -#if defined(__cplusplus) -extern "C" { -#endif - - - -/**\name th_encode_ctl() codes - * \anchor encctlcodes - * These are the available request codes for th_encode_ctl(). - * By convention, these are even, to distinguish them from the - * \ref decctlcodes "decoder control codes". - * Keep any experimental or vendor-specific values above \c 0x8000.*/ -/*@{*/ -/**Sets the Huffman tables to use. - * The tables are copied, not stored by reference, so they can be freed after - * this call. - * NULL may be specified to revert to the default tables. - * - * \param[in] _buf #th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS] - * \retval TH_EFAULT \a _enc_ctx is NULL. - * \retval TH_EINVAL Encoding has already begun or one or more of the given - * tables is not full or prefix-free, \a _buf is - * NULL and \a _buf_sz is not zero, or \a _buf is - * non-NULL and \a _buf_sz is not - * sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS. - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_HUFFMAN_CODES (0) -/**Sets the quantization parameters to use. - * The parameters are copied, not stored by reference, so they can be freed - * after this call. - * NULL may be specified to revert to the default parameters. - * - * \param[in] _buf #th_quant_info - * \retval TH_EFAULT \a _enc_ctx is NULL. - * \retval TH_EINVAL Encoding has already begun, \a _buf is - * NULL and \a _buf_sz is not zero, - * or \a _buf is non-NULL and - * \a _buf_sz is not sizeof(#th_quant_info). - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_QUANT_PARAMS (2) -/**Sets the maximum distance between key frames. - * This can be changed during an encode, but will be bounded by - * 1<. - * If it is set before encoding begins, th_info#keyframe_granule_shift will - * be enlarged appropriately. - * - * \param[in] _buf ogg_uint32_t: The maximum distance between key - * frames. - * \param[out] _buf ogg_uint32_t: The actual maximum distance set. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_uint32_t). - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) -/**Disables any encoder features that would prevent lossless transcoding back - * to VP3. - * This primarily means disabling block-adaptive quantization and always coding - * all four luma blocks in a macro block when 4MV is used. - * It also includes using the VP3 quantization tables and Huffman codes; if you - * set them explicitly after calling this function, the resulting stream will - * not be VP3-compatible. - * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source - * material, or when using a picture region smaller than the full frame (e.g. - * a non-multiple-of-16 width or height), then non-VP3 bitstream features will - * still be disabled, but the stream will still not be VP3-compatible, as VP3 - * was not capable of encoding such formats. - * If you call this after encoding has already begun, then the quantization - * tables and codebooks cannot be changed, but the frame-level features will - * be enabled or disabled as requested. - * - * \param[in] _buf int: a non-zero value to enable VP3 compatibility, - * or 0 to disable it (the default). - * \param[out] _buf int: 1 if all bitstream features required for - * VP3-compatibility could be set, and 0 otherwise. - * The latter will be returned if the pixel format is not - * 4:2:0, the picture region is smaller than the full frame, - * or if encoding has begun, preventing the quantization - * tables and codebooks from being set. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int). - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_VP3_COMPATIBLE (10) -/**Gets the maximum speed level. - * Higher speed levels favor quicker encoding over better quality per bit. - * Depending on the encoding mode, and the internal algorithms used, quality - * may actually improve, but in this case bitrate will also likely increase. - * In any case, overall rate/distortion performance will probably decrease. - * The maximum value, and the meaning of each value, may change depending on - * the current encoding mode (VBR vs. constant quality, etc.). - * - * \param[out] _buf int: The maximum encoding speed level. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int). - * \retval TH_EIMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_GET_SPLEVEL_MAX (12) -/**Sets the speed level. - * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL. - * - * \param[in] _buf int: The new encoding speed level. - * 0 is slowest, larger values use less CPU. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the - * encoding speed level is out of bounds. - * The maximum encoding speed level may be - * implementation- and encoding mode-specific, and can be - * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. - * \retval TH_EIMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_SET_SPLEVEL (14) -/**Gets the current speed level. - * The default speed level may vary according to encoder implementation, but if - * this control code is not supported (it returns #TH_EIMPL), the default may - * be assumed to be the slowest available speed (0). - * The maximum encoding speed level may be implementation- and encoding - * mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. - * - * \param[out] _buf int: The current encoding speed level. - * 0 is slowest, larger values use less CPU. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int). - * \retval TH_EIMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_GET_SPLEVEL (16) -/**Sets the number of duplicates of the next frame to produce. - * Although libtheora can encode duplicate frames very cheaply, it costs some - * amount of CPU to detect them, and a run of duplicates cannot span a - * keyframe boundary. - * This control code tells the encoder to produce the specified number of extra - * duplicates of the next frame. - * This allows the encoder to make smarter keyframe placement decisions and - * rate control decisions, and reduces CPU usage as well, when compared to - * just submitting the same frame for encoding multiple times. - * This setting only applies to the next frame submitted for encoding. - * You MUST call th_encode_packetout() repeatedly until it returns 0, or the - * extra duplicate frames will be lost. - * - * \param[in] _buf int: The number of duplicates to produce. - * If this is negative or zero, no duplicates will be produced. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the - * number of duplicates is greater than or equal to the - * maximum keyframe interval. - * In the latter case, NO duplicate frames will be produced. - * You must ensure that the maximum keyframe interval is set - * larger than the maximum number of duplicates you will - * ever wish to insert prior to encoding. - * \retval TH_EIMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_SET_DUP_COUNT (18) -/**Modifies the default bitrate management behavior. - * Use to allow or disallow frame dropping, and to enable or disable capping - * bit reservoir overflows and underflows. - * See \ref encctlcodes "the list of available flags". - * The flags are set by default to - * #TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW. - * - * \param[in] _buf int: Any combination of - * \ref ratectlflags "the available flags": - * - #TH_RATECTL_DROP_FRAMES: Enable frame dropping. - * - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later - * use. - * - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls - * later. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int) or rate control - * is not enabled. - * \retval TH_EIMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_SET_RATE_FLAGS (20) -/**Sets the size of the bitrate management bit reservoir as a function - * of number of frames. - * The reservoir size affects how quickly bitrate management reacts to - * instantaneous changes in the video complexity. - * Larger reservoirs react more slowly, and provide better overall quality, but - * require more buffering by a client, adding more latency to live streams. - * By default, libtheora sets the reservoir to the maximum distance between - * keyframes, subject to a minimum and maximum limit. - * This call may be used to increase or decrease the reservoir, increasing or - * decreasing the allowed temporary variance in bitrate. - * An implementation may impose some limits on the size of a reservoir it can - * handle, in which case the actual reservoir size may not be exactly what was - * requested. - * The actual value set will be returned. - * - * \param[in] _buf int: Requested size of the reservoir measured in - * frames. - * \param[out] _buf int: The actual size of the reservoir set. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or rate control - * is not enabled. The buffer has an implementation - * defined minimum and maximum size and the value in _buf - * will be adjusted to match the actual value set. - * \retval TH_EIMPL Not supported by this implementation in the current - * encoding mode.*/ -#define TH_ENCCTL_SET_RATE_BUFFER (22) -/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics. - * Pass 1 mode must be enabled before the first frame is encoded, and a target - * bitrate must have already been specified to the encoder. - * Although this does not have to be the exact rate that will be used in the - * second pass, closer values may produce better results. - * The first call returns the size of the two-pass header data, along with some - * placeholder content, and sets the encoder into pass 1 mode implicitly. - * This call sets the encoder to pass 1 mode implicitly. - * Then, a subsequent call must be made after each call to - * th_encode_ycbcr_in() to retrieve the metrics for that frame. - * An additional, final call must be made to retrieve the summary data, - * containing such information as the total number of frames, etc. - * This must be stored in place of the placeholder data that was returned - * in the first call, before the frame metrics data. - * All of this data must be presented back to the encoder during pass 2 using - * #TH_ENCCTL_2PASS_IN. - * - * \param[out] char *_buf: Returns a pointer to internal storage - * containing the two pass metrics data. - * This storage is only valid until the next call, or until the - * encoder context is freed, and must be copied by the - * application. - * \retval >=0 The number of bytes of metric data available in the - * returned buffer. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL \a _buf_sz is not sizeof(char *), no target - * bitrate has been set, or the first call was made after - * the first frame was submitted for encoding. - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_2PASS_OUT (24) -/**Submits two-pass encoding metric data collected the first encoding pass to - * the second pass. - * The first call must be made before the first frame is encoded, and a target - * bitrate must have already been specified to the encoder. - * It sets the encoder to pass 2 mode implicitly; this cannot be disabled. - * The encoder may require reading data from some or all of the frames in - * advance, depending on, e.g., the reservoir size used in the second pass. - * You must call this function repeatedly before each frame to provide data - * until either a) it fails to consume all of the data presented or b) all of - * the pass 1 data has been consumed. - * In the first case, you must save the remaining data to be presented after - * the next frame. - * You can call this function with a NULL argument to get an upper bound on - * the number of bytes that will be required before the next frame. - * - * When pass 2 is first enabled, the default bit reservoir is set to the entire - * file; this gives maximum flexibility but can lead to very high peak rates. - * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER - * (e.g., to set it to the keyframe interval for non-live streaming), however, - * you may then need to provide more data before the next frame. - * - * \param[in] _buf char[]: A buffer containing the data returned by - * #TH_ENCCTL_2PASS_OUT in pass 1. - * You may pass NULL for \a _buf to return an upper - * bound on the number of additional bytes needed before the - * next frame. - * The summary data returned at the end of pass 1 must be at - * the head of the buffer on the first call with a - * non-NULL \a _buf, and the placeholder data - * returned at the start of pass 1 should be omitted. - * After each call you should advance this buffer by the number - * of bytes consumed. - * \retval >0 The number of bytes of metric data required/consumed. - * \retval 0 No more data is required before the next frame. - * \retval TH_EFAULT \a _enc_ctx is NULL. - * \retval TH_EINVAL No target bitrate has been set, or the first call was - * made after the first frame was submitted for - * encoding. - * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible - * implementation of this library. - * \retval TH_EBADHEADER The data was invalid; this may be returned when - * attempting to read an aborted pass 1 file that still - * has the placeholder data in place of the summary - * data. - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_2PASS_IN (26) -/**Sets the current encoding quality. - * This is only valid so long as no bitrate has been specified, either through - * the #th_info struct used to initialize the encoder or through - * #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future - * version). - * If it is set before the headers are emitted, the target quality encoded in - * them will be updated. - * - * \param[in] _buf int: The new target quality, in the range 0...63, - * inclusive. - * \retval 0 Success. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL A target bitrate has already been specified, or the - * quality index was not in the range 0...63. - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_QUALITY (28) -/**Sets the current encoding bitrate. - * Once a bitrate is set, the encoder must use a rate-controlled mode for all - * future frames (this restriction may be relaxed in a future version). - * If it is set before the headers are emitted, the target bitrate encoded in - * them will be updated. - * Due to the buffer delay, the exact bitrate of each section of the encode is - * not guaranteed. - * The encoder may have already used more bits than allowed for the frames it - * has encoded, expecting to make them up in future frames, or it may have - * used fewer, holding the excess in reserve. - * The exact transition between the two bitrates is not well-defined by this - * API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS. - * After a number of frames equal to the buffer delay, one may expect further - * output to average at the target bitrate. - * - * \param[in] _buf long: The new target bitrate, in bits per second. - * \retval 0 Success. - * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. - * \retval TH_EINVAL The target bitrate was not positive. - * \retval TH_EIMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_BITRATE (30) - -/*@}*/ - - -/**\name TH_ENCCTL_SET_RATE_FLAGS flags - * \anchor ratectlflags - * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/ -/*@{*/ -/**Drop frames to keep within bitrate buffer constraints. - * This can have a severe impact on quality, but is the only way to ensure that - * bitrate targets are met at low rates during sudden bursts of activity.*/ -#define TH_RATECTL_DROP_FRAMES (0x1) -/**Ignore bitrate buffer overflows. - * If the encoder uses so few bits that the reservoir of available bits - * overflows, ignore the excess. - * The encoder will not try to use these extra bits in future frames. - * At high rates this may cause the result to be undersized, but allows a - * client to play the stream using a finite buffer; it should normally be - * enabled.*/ -#define TH_RATECTL_CAP_OVERFLOW (0x2) -/**Ignore bitrate buffer underflows. - * If the encoder uses so many bits that the reservoir of available bits - * underflows, ignore the deficit. - * The encoder will not try to make up these extra bits in future frames. - * At low rates this may cause the result to be oversized; it should normally - * be disabled.*/ -#define TH_RATECTL_CAP_UNDERFLOW (0x4) -/*@}*/ - - - -/**The quantization parameters used by VP3.*/ -extern const th_quant_info TH_VP31_QUANT_INFO; - -/**The Huffman tables used by VP3.*/ -extern const th_huff_code - TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; - - - -/**\name Encoder state - The following data structure is opaque, and its contents are not publicly - defined by this API. - Referring to its internals directly is unsupported, and may break without - warning.*/ -/*@{*/ -/**The encoder context.*/ -typedef struct th_enc_ctx th_enc_ctx; -/*@}*/ - - - -/**\defgroup encfuncs Functions for Encoding*/ -/*@{*/ -/**\name Functions for encoding - * You must link to libtheoraenc and libtheoradec - * if you use any of the functions in this section. - * - * The functions are listed in the order they are used in a typical encode. - * The basic steps are: - * - Fill in a #th_info structure with details on the format of the video you - * wish to encode. - * - Allocate a #th_enc_ctx handle with th_encode_alloc(). - * - Perform any additional encoder configuration required with - * th_encode_ctl(). - * - Repeatedly call th_encode_flushheader() to retrieve all the header - * packets. - * - For each uncompressed frame: - * - Submit the uncompressed frame via th_encode_ycbcr_in() - * - Repeatedly call th_encode_packetout() to retrieve any video data packets - * that are ready. - * - Call th_encode_free() to release all encoder memory.*/ -/*@{*/ -/**Allocates an encoder instance. - * \param _info A #th_info struct filled with the desired encoding parameters. - * \return The initialized #th_enc_ctx handle. - * \retval NULL If the encoding parameters were invalid.*/ -extern th_enc_ctx *th_encode_alloc(const th_info *_info); -/**Encoder control function. - * This is used to provide advanced control the encoding process. - * \param _enc A #th_enc_ctx handle. - * \param _req The control code to process. - * See \ref encctlcodes "the list of available control codes" - * for details. - * \param _buf The parameters for this control code. - * \param _buf_sz The size of the parameter buffer.*/ -extern int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz); -/**Outputs the next header packet. - * This should be called repeatedly after encoder initialization until it - * returns 0 in order to get all of the header packets, in order, before - * encoding actual video data. - * \param _enc A #th_enc_ctx handle. - * \param _comments The metadata to place in the comment header, when it is - * encoded. - * \param _op An ogg_packet structure to fill. - * All of the elements of this structure will be set, - * including a pointer to the header data. - * The memory for the header data is owned by - * libtheoraenc, and may be invalidated when the - * next encoder function is called. - * \return A positive value indicates that a header packet was successfully - * produced. - * \retval 0 No packet was produced, and no more header packets remain. - * \retval TH_EFAULT \a _enc, \a _comments, or \a _op was NULL.*/ -extern int th_encode_flushheader(th_enc_ctx *_enc, - th_comment *_comments,ogg_packet *_op); -/**Submits an uncompressed frame to the encoder. - * \param _enc A #th_enc_ctx handle. - * \param _ycbcr A buffer of Y'CbCr data to encode. - * \retval 0 Success. - * \retval TH_EFAULT \a _enc or \a _ycbcr is NULL. - * \retval TH_EINVAL The buffer size does not match the frame size the encoder - * was initialized with, or encoding has already - * completed.*/ -extern int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr); -/**Retrieves encoded video data packets. - * This should be called repeatedly after each frame is submitted to flush any - * encoded packets, until it returns 0. - * The encoder will not buffer these packets as subsequent frames are - * compressed, so a failure to do so will result in lost video data. - * \note Currently the encoder operates in a one-frame-in, one-packet-out - * manner. - * However, this may be changed in the future. - * \param _enc A #th_enc_ctx handle. - * \param _last Set this flag to a non-zero value if no more uncompressed - * frames will be submitted. - * This ensures that a proper EOS flag is set on the last packet. - * \param _op An ogg_packet structure to fill. - * All of the elements of this structure will be set, including a - * pointer to the video data. - * The memory for the video data is owned by - * libtheoraenc, and may be invalidated when the next - * encoder function is called. - * \return A positive value indicates that a video data packet was successfully - * produced. - * \retval 0 No packet was produced, and no more encoded video data - * remains. - * \retval TH_EFAULT \a _enc or \a _op was NULL.*/ -extern int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op); -/**Frees an allocated encoder instance. - * \param _enc A #th_enc_ctx handle.*/ -extern void th_encode_free(th_enc_ctx *_enc); -/*@}*/ -/*@}*/ - - - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/drivers/theora/tokenize.c b/drivers/theora/tokenize.c deleted file mode 100644 index 60574c3594..0000000000 --- a/drivers/theora/tokenize.c +++ /dev/null @@ -1,1072 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: tokenize.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ -#include -#include -#include "encint.h" - - - -static int oc_make_eob_token(int _run_count){ - if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1; - else{ - int cat; - cat=OC_ILOGNZ_32(_run_count)-3; - cat=OC_MINI(cat,3); - return OC_DCT_REPEAT_RUN0_TOKEN+cat; - } -} - -static int oc_make_eob_token_full(int _run_count,int *_eb){ - if(_run_count<4){ - *_eb=0; - return OC_DCT_EOB1_TOKEN+_run_count-1; - } - else{ - int cat; - cat=OC_ILOGNZ_32(_run_count)-3; - cat=OC_MINI(cat,3); - *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat); - return OC_DCT_REPEAT_RUN0_TOKEN+cat; - } -} - -/*Returns the number of blocks ended by an EOB token.*/ -static int oc_decode_eob_token(int _token,int _eb){ - return (0x20820C41U>>_token*5&0x1F)+_eb; -} - -/*TODO: This is now only used during DCT tokenization, and never for runs; it - should be simplified.*/ -static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){ - int neg; - int zero_run; - int token; - int eb; - neg=_val<0; - _val=abs(_val); - zero_run=_zzj-_zzi; - if(zero_run>0){ - int adj; - /*Implement a minor restriction on stack 1 so that we know during DC fixups - that extending a dctrun token from stack 1 will never overflow.*/ - adj=_zzi!=1; - if(_val<2&&zero_run<17+adj){ - if(zero_run<6){ - token=OC_DCT_RUN_CAT1A+zero_run-1; - eb=neg; - } - else if(zero_run<10){ - token=OC_DCT_RUN_CAT1B; - eb=zero_run-6+(neg<<2); - } - else{ - token=OC_DCT_RUN_CAT1C; - eb=zero_run-10+(neg<<3); - } - } - else if(_val<4&&zero_run<3+adj){ - if(zero_run<2){ - token=OC_DCT_RUN_CAT2A; - eb=_val-2+(neg<<1); - } - else{ - token=OC_DCT_RUN_CAT2B; - eb=zero_run-2+(_val-2<<1)+(neg<<2); - } - } - else{ - if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN; - else token=OC_DCT_ZRL_TOKEN; - eb=zero_run-1; - } - } - else if(_val<3){ - token=OC_ONE_TOKEN+(_val-1<<1)+neg; - eb=0; - } - else if(_val<7){ - token=OC_DCT_VAL_CAT2+_val-3; - eb=neg; - } - else if(_val<9){ - token=OC_DCT_VAL_CAT3; - eb=_val-7+(neg<<1); - } - else if(_val<13){ - token=OC_DCT_VAL_CAT4; - eb=_val-9+(neg<<2); - } - else if(_val<21){ - token=OC_DCT_VAL_CAT5; - eb=_val-13+(neg<<3); - } - else if(_val<37){ - token=OC_DCT_VAL_CAT6; - eb=_val-21+(neg<<4); - } - else if(_val<69){ - token=OC_DCT_VAL_CAT7; - eb=_val-37+(neg<<5); - } - else{ - token=OC_DCT_VAL_CAT8; - eb=_val-69+(neg<<9); - } - *_eb=eb; - return token; -} - -/*Token logging to allow a few fragments of efficient rollback. - Late SKIP analysis is tied up in the tokenization process, so we need to be - able to undo a fragment's tokens on a whim.*/ - -static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ - 0,16,16,16,16,16,32,32, - 32,32,32,32,32,32,32,48, - 48,48,48,48,48,48,48,48, - 48,48,48,48,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64 -}; - -static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){ - return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits - +OC_DCT_TOKEN_EXTRA_BITS[_token]; -} - -static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc, - oc_token_checkpoint *_cp,int _pli,int _zzi){ - _cp->pli=_pli; - _cp->zzi=_zzi; - _cp->eob_run=_enc->eob_run[_pli][_zzi]; - _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi]; -} - -void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, - const oc_token_checkpoint *_stack,int _n){ - int i; - for(i=_n;i-->0;){ - int pli; - int zzi; - pli=_stack[i].pli; - zzi=_stack[i].zzi; - _enc->eob_run[pli][zzi]=_stack[i].eob_run; - _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens; - } -} - -static void oc_enc_token_log(oc_enc_ctx *_enc, - int _pli,int _zzi,int _token,int _eb){ - ptrdiff_t ti; - ti=_enc->ndct_tokens[_pli][_zzi]++; - _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token; - _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb; -} - -static void oc_enc_eob_log(oc_enc_ctx *_enc, - int _pli,int _zzi,int _run_count){ - int token; - int eb; - token=oc_make_eob_token_full(_run_count,&eb); - oc_enc_token_log(_enc,_pli,_zzi,token,eb); -} - - -void oc_enc_tokenize_start(oc_enc_ctx *_enc){ - memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens)); - memset(_enc->eob_run,0,sizeof(_enc->eob_run)); - memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs)); - memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last)); -} - -typedef struct oc_quant_token oc_quant_token; - -/*A single node in the Viterbi trellis. - We maintain up to 2 of these per coefficient: - - A token to code if the value is zero (EOB, zero run, or combo token). - - A token to code if the value is not zero (DCT value token).*/ -struct oc_quant_token{ - unsigned char next; - signed char token; - ogg_int16_t eb; - ogg_uint32_t cost; - int bits; - int qc; -}; - -/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then - dequantizes and de-zig-zags the result. - The DC coefficient is not preserved; it should be restored by the caller.*/ -int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, - ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, - int _zzi,oc_token_checkpoint **_stack,int _acmin){ - oc_token_checkpoint *stack; - ogg_int64_t zflags; - ogg_int64_t nzflags; - ogg_int64_t best_flags; - ogg_uint32_t d2_accum[64]; - oc_quant_token tokens[64][2]; - ogg_uint16_t *eob_run; - const unsigned char *dct_fzig_zag; - ogg_uint32_t cost; - int bits; - int eob; - int token; - int eb; - int next; - int huffi; - int zzi; - int ti; - int zzj; - int qc; - huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1]; - eob_run=_enc->eob_run[_pli]; - memset(tokens[0],0,sizeof(tokens[0])); - best_flags=nzflags=0; - zflags=1; - d2_accum[0]=0; - zzj=64; - for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){ - ogg_int32_t lambda; - ogg_uint32_t best_cost; - int best_bits=best_bits; - int best_next=best_next; - int best_token=best_token; - int best_eb=best_eb; - int best_qc=best_qc; - int flush_bits; - ogg_uint32_t d2; - int dq; - int e; - int c; - int s; - int tj; - lambda=_enc->lambda; - qc=_qdct[zzi]; - s=-(qc<0); - qc=qc+s^s; - c=_dct[OC_FZIG_ZAG[zzi]]; - if(qc<=1){ - ogg_uint32_t sum_d2; - int nzeros; - int dc_reserve; - /*The hard case: try a zero run.*/ - if(!qc){ - /*Skip runs that are already quantized to zeros. - If we considered each zero coefficient in turn, we might - theoretically find a better way to partition long zero runs (e.g., - a run of > 17 zeros followed by a 1 might be better coded as a short - zero run followed by a combo token, rather than the longer zero - token followed by a 1 value token), but zeros are so common that - this becomes very computationally expensive (quadratic instead of - linear in the number of coefficients), for a marginal gain.*/ - while(zzi>1&&!_qdct[zzi-1])zzi--; - /*The distortion of coefficients originally quantized to zero is - treated as zero (since we'll never quantize them to anything else).*/ - d2=0; - } - else{ - c=c+s^s; - d2=c*(ogg_int32_t)c; - } - eob=eob_run[zzi]; - nzeros=zzj-zzi; - zzj&=63; - sum_d2=d2+d2_accum[zzj]; - d2_accum[zzi]=sum_d2; - flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; - /*We reserve 1 spot for combo run tokens that start in the 1st AC stack - to ensure they can be extended to include the DC coefficient if - necessary; this greatly simplifies stack-rewriting later on.*/ - dc_reserve=zzi+62>>6; - best_cost=0xFFFFFFFF; - for(;;){ - if(nzflags>>zzj&1){ - int cat; - int val; - int val_s; - int zzk; - int tk; - next=tokens[zzj][1].next; - tk=next&1; - zzk=next>>1; - /*Try a pure zero run to this point.*/ - cat=nzeros+55>>6; - token=OC_DCT_SHORT_ZRL_TOKEN+cat; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - d2=sum_d2-d2_accum[zzj]; - cost=d2+lambda*bits+tokens[zzj][1].cost; - if(cost<=best_cost){ - best_next=(zzj<<1)+1; - best_token=token; - best_eb=nzeros-1; - best_cost=cost; - best_bits=bits+tokens[zzj][1].bits; - best_qc=0; - } - if(nzeros<16+dc_reserve){ - val=_qdct[zzj]; - val_s=-(val<0); - val=val+val_s^val_s; - if(val<=2){ - /*Try a +/- 1 combo token.*/ - if(nzeros<6){ - token=OC_DCT_RUN_CAT1A+nzeros-1; - eb=-val_s; - } - else{ - cat=nzeros+54>>6; - token=OC_DCT_RUN_CAT1B+cat; - eb=(-val_s<>1; - token=OC_DCT_RUN_CAT2A+cat; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - val=2+((val+val_s^val_s)>2); - e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val; - d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; - cost=d2+lambda*bits+tokens[zzk][tk].cost; - if(cost<=best_cost){ - best_cost=cost; - best_bits=bits+tokens[zzk][tk].bits; - best_next=next; - best_token=token; - best_eb=(-val_s<<1+cat)+(val-2<>1); - best_qc=val+val_s^val_s; - } - } - } - /*zzj can't be coded as a zero, so stop trying to extend the run.*/ - if(!(zflags>>zzj&1))break; - } - /*We could try to consider _all_ potentially non-zero coefficients, but - if we already found a bunch of them not worth coding, it's fairly - unlikely they would now be worth coding from this position; skipping - them saves a lot of work.*/ - zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63; - if(zzj==0){ - /*We made it all the way to the end of the block; try an EOB token.*/ - if(eob<4095){ - bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1)) - -flush_bits; - } - else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN); - cost=sum_d2+bits*lambda; - /*If the best route so far is still a pure zero run to the end of the - block, force coding it as an EOB. - Even if it's not optimal for this block, it has a good chance of - getting combined with an EOB token from subsequent blocks, saving - bits overall.*/ - if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){ - best_next=0; - /*This token is just a marker; in reality we may not emit any - tokens, but update eob_run[] instead.*/ - best_token=OC_DCT_EOB1_TOKEN; - best_eb=0; - best_cost=cost; - best_bits=bits; - best_qc=0; - } - break; - } - nzeros=zzj-zzi; - } - tokens[zzi][0].next=(unsigned char)best_next; - tokens[zzi][0].token=(signed char)best_token; - tokens[zzi][0].eb=(ogg_int16_t)best_eb; - tokens[zzi][0].cost=best_cost; - tokens[zzi][0].bits=best_bits; - tokens[zzi][0].qc=best_qc; - zflags|=(ogg_int64_t)1<>zzj&1; - next=(zzj<<1)+tj; - tokens[zzi][1].next=(unsigned char)next; - tokens[zzi][1].token=(signed char)token; - tokens[zzi][1].eb=0; - tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost; - tokens[zzi][1].bits=bits+tokens[zzj][tj].bits; - tokens[zzi][1].qc=1+s^s; - nzflags|=(ogg_int64_t)1<0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; - if(qc<=2){ - e=2*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_TWO_TOKEN-s; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e-=dq; - d2=e*(ogg_int32_t)e; - token=OC_ONE_TOKEN-s; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_bits=bits; - best_cost=cost; - qc--; - } - best_eb=0; - } - else if(qc<=3){ - e=3*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT2; - best_eb=-s; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e-=dq; - d2=e*(ogg_int32_t)e; - token=OC_TWO_TOKEN-s; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_eb=0; - best_bits=bits; - best_cost=cost; - qc--; - } - } - else if(qc<=6){ - e=qc*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT2+qc-3; - best_eb=-s; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e-=dq; - d2=e*(ogg_int32_t)e; - token=best_token-1; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_bits=bits; - best_cost=cost; - qc--; - } - } - else if(qc<=8){ - e=qc*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT3; - best_eb=(-s<<1)+qc-7; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e=6*dq-c; - d2=e*(ogg_int32_t)e; - token=OC_DCT_VAL_CAT2+3; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_eb=-s; - best_bits=bits; - best_cost=cost; - qc=6; - } - } - else if(qc<=12){ - e=qc*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT4; - best_eb=(-s<<2)+qc-9; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e=8*dq-c; - d2=e*(ogg_int32_t)e; - token=best_token-1; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_eb=(-s<<1)+1; - best_bits=bits; - best_cost=cost; - qc=8; - } - } - else if(qc<=20){ - e=qc*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT5; - best_eb=(-s<<3)+qc-13; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e=12*dq-c; - d2=e*(ogg_int32_t)e; - token=best_token-1; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_eb=(-s<<2)+3; - best_bits=bits; - best_cost=cost; - qc=12; - } - } - else if(qc<=36){ - e=qc*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT6; - best_eb=(-s<<4)+qc-21; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e=20*dq-c; - d2=e*(ogg_int32_t)e; - token=best_token-1; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost<=best_cost){ - best_token=token; - best_eb=(-s<<3)+7; - best_bits=bits; - best_cost=cost; - qc=20; - } - } - else if(qc<=68){ - e=qc*dq-c; - d2=e*(ogg_int32_t)e; - best_token=OC_DCT_VAL_CAT7; - best_eb=(-s<<5)+qc-37; - best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); - best_cost=d2+lambda*best_bits; - e=36*dq-c; - d2=e*(ogg_int32_t)e; - token=best_token-1; - bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); - cost=d2+lambda*bits; - if(cost>zzj&1; - next=(zzj<<1)+tj; - tokens[zzi][1].next=(unsigned char)next; - tokens[zzi][1].token=(signed char)best_token; - tokens[zzi][1].eb=best_eb; - tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost; - tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits; - tokens[zzi][1].qc=qc+s^s; - nzflags|=(ogg_int64_t)1<state.opt_data.dct_fzig_zag; - zzi=1; - ti=best_flags>>1&1; - bits=tokens[zzi][ti].bits; - do{ - oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi); - eob=eob_run[zzi]; - if(tokens[zzi][ti].token=4095){ - oc_enc_eob_log(_enc,_pli,zzi,eob); - eob=0; - } - eob_run[zzi]=eob; - /*We don't include the actual EOB cost for this block in the return value. - It will be paid for by the fragment that terminates the EOB run.*/ - bits-=tokens[zzi][ti].bits; - zzi=_zzi; - break; - } - /*Emit pending EOB run if any.*/ - if(eob>0){ - oc_enc_eob_log(_enc,_pli,zzi,eob); - eob_run[zzi]=0; - } - oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb); - next=tokens[zzi][ti].next; - qc=tokens[zzi][ti].qc; - zzj=(next>>1)-1&63; - /*TODO: It may be worth saving the dequantized coefficient in the trellis - above; we had to compute it to measure the error anyway.*/ - _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]); - zzi=next>>1; - ti=next&1; - } - while(zzi); - *_stack=stack; - return bits; -} - -void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, - int _pli,int _fragy0,int _frag_yend){ - const oc_fragment_plane *fplane; - const oc_fragment *frags; - ogg_int16_t *frag_dc; - ptrdiff_t fragi; - int *pred_last; - int nhfrags; - int fragx; - int fragy; - fplane=_enc->state.fplanes+_pli; - frags=_enc->state.frags; - frag_dc=_enc->frag_dc; - pred_last=_enc->dc_pred_last[_pli]; - nhfrags=fplane->nhfrags; - fragi=fplane->froffset+_fragy0*nhfrags; - for(fragy=_fragy0;fragy<_frag_yend;fragy++){ - if(fragy==0){ - /*For the first row, all of the cases reduce to just using the previous - predictor for the same reference frame.*/ - for(fragx=0;fragx=nhfrags)ur_ref=-1; - else{ - ur_ref=u_frags[fragi+1].coded? - OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; - } - if(frags[fragi].coded){ - int pred; - int ref; - ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); - /*We break out a separate case based on which of our neighbors use - the same reference frames. - This is somewhat faster than trying to make a generic case which - handles all of them, since it reduces lots of poorly predicted - jumps to one switch statement, and also lets a number of the - multiplications be optimized out by strength reduction.*/ - switch((l_ref==ref)|(ul_ref==ref)<<1| - (u_ref==ref)<<2|(ur_ref==ref)<<3){ - default:pred=pred_last[ref];break; - case 1: - case 3:pred=frags[fragi-1].dc;break; - case 2:pred=u_frags[fragi-1].dc;break; - case 4: - case 6: - case 12:pred=u_frags[fragi].dc;break; - case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; - case 8:pred=u_frags[fragi+1].dc;break; - case 9: - case 11: - case 13:{ - pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; - }break; - case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; - case 14:{ - pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) - +10*u_frags[fragi].dc)/16; - }break; - case 7: - case 15:{ - int p0; - int p1; - int p2; - p0=frags[fragi-1].dc; - p1=u_frags[fragi-1].dc; - p2=u_frags[fragi].dc; - pred=(29*(p0+p2)-26*p1)/32; - if(abs(pred-p2)>128)pred=p2; - else if(abs(pred-p0)>128)pred=p0; - else if(abs(pred-p1)>128)pred=p1; - }break; - } - frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred); - pred_last[ref]=frags[fragi].dc; - l_ref=ref; - } - else l_ref=-1; - ul_ref=u_ref; - u_ref=ur_ref; - } - } - } -} - -void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, - const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, - int _prev_ndct_tokens1,int _prev_eob_run1){ - const ogg_int16_t *frag_dc; - ptrdiff_t fragii; - unsigned char *dct_tokens0; - unsigned char *dct_tokens1; - ogg_uint16_t *extra_bits0; - ogg_uint16_t *extra_bits1; - ptrdiff_t ti0; - ptrdiff_t ti1r; - ptrdiff_t ti1w; - int eob_run0; - int eob_run1; - int neobs1; - int token; - int eb; - int token1=token1; - int eb1=eb1; - /*Return immediately if there are no coded fragments; otherwise we'd flush - any trailing EOB run into the AC 1 list and never read it back out.*/ - if(_ncoded_fragis<=0)return; - frag_dc=_enc->frag_dc; - dct_tokens0=_enc->dct_tokens[_pli][0]; - dct_tokens1=_enc->dct_tokens[_pli][1]; - extra_bits0=_enc->extra_bits[_pli][0]; - extra_bits1=_enc->extra_bits[_pli][1]; - ti0=_enc->ndct_tokens[_pli][0]; - ti1w=ti1r=_prev_ndct_tokens1; - eob_run0=_enc->eob_run[_pli][0]; - /*Flush any trailing EOB run for the 1st AC coefficient. - This is needed to allow us to track tokens to the end of the list.*/ - eob_run1=_enc->eob_run[_pli][1]; - if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1); - /*If there was an active EOB run at the start of the 1st AC stack, read it - in and decode it.*/ - if(_prev_eob_run1>0){ - token1=dct_tokens1[ti1r]; - eb1=extra_bits1[ti1r]; - ti1r++; - eob_run1=oc_decode_eob_token(token1,eb1); - /*Consume the portion of the run that came before these fragments.*/ - neobs1=eob_run1-_prev_eob_run1; - } - else eob_run1=neobs1=0; - for(fragii=0;fragii<_ncoded_fragis;fragii++){ - int val; - /*All tokens in the 1st AC coefficient stack are regenerated as the DC - coefficients are produced. - This can be done in-place; stack 1 cannot get larger.*/ - if(!neobs1){ - /*There's no active EOB run in stack 1; read the next token.*/ - token1=dct_tokens1[ti1r]; - eb1=extra_bits1[ti1r]; - ti1r++; - if(token10){ - token=oc_make_eob_token_full(eob_run0,&eb); - dct_tokens0[ti0]=(unsigned char)token; - extra_bits0[ti0]=(ogg_uint16_t)eb; - ti0++; - eob_run0=0; - } - token=oc_make_dct_token_full(0,0,val,&eb); - dct_tokens0[ti0]=(unsigned char)token; - extra_bits0[ti0]=(ogg_uint16_t)eb; - ti0++; - } - else{ - /*Zero DC value; that means the entry in stack 1 might need to be coded - from stack 0. - This requires a stack 1 fixup.*/ - if(neobs1>0){ - /*We're in the middle of an active EOB run in stack 1. - Move it to stack 0.*/ - if(++eob_run0>=4095){ - token=oc_make_eob_token_full(eob_run0,&eb); - dct_tokens0[ti0]=(unsigned char)token; - extra_bits0[ti0]=(ogg_uint16_t)eb; - ti0++; - eob_run0=0; - } - eob_run1--; - } - else{ - /*No active EOB run in stack 1, so we can't extend one in stack 0. - Flush it if we've got it.*/ - if(eob_run0>0){ - token=oc_make_eob_token_full(eob_run0,&eb); - dct_tokens0[ti0]=(unsigned char)token; - extra_bits0[ti0]=(ogg_uint16_t)eb; - ti0++; - eob_run0=0; - } - /*Stack 1 token is one of: a pure zero run token, a single - coefficient token, or a zero run/coefficient combo token. - A zero run token is expanded and moved to token stack 0, and the - stack 1 entry dropped. - A single coefficient value may be transformed into combo token that - is moved to stack 0, or if it cannot be combined, it is left alone - and a single length-1 zero run is emitted in stack 0. - A combo token is extended and moved to stack 0. - During AC coding, we restrict the run lengths on combo tokens for - stack 1 to guarantee we can extend them.*/ - switch(token1){ - case OC_DCT_SHORT_ZRL_TOKEN:{ - if(eb1<7){ - dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; - extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); - ti0++; - /*Don't write the AC coefficient back out.*/ - continue; - } - /*Fall through.*/ - } - case OC_DCT_ZRL_TOKEN:{ - dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN; - extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_ONE_TOKEN: - case OC_MINUS_ONE_TOKEN:{ - dct_tokens0[ti0]=OC_DCT_RUN_CAT1A; - extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_TWO_TOKEN: - case OC_MINUS_TWO_TOKEN:{ - dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; - extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_DCT_VAL_CAT2:{ - dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; - extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_DCT_RUN_CAT1A: - case OC_DCT_RUN_CAT1A+1: - case OC_DCT_RUN_CAT1A+2: - case OC_DCT_RUN_CAT1A+3:{ - dct_tokens0[ti0]=(unsigned char)(token1+1); - extra_bits0[ti0]=(ogg_uint16_t)eb1; - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_DCT_RUN_CAT1A+4:{ - dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; - extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_DCT_RUN_CAT1B:{ - if((eb1&3)<3){ - dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; - extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); - ti0++; - /*Don't write the AC coefficient back out.*/ - continue; - } - eb1=((eb1&4)<<1)-1; - /*Fall through.*/ - } - case OC_DCT_RUN_CAT1C:{ - dct_tokens0[ti0]=OC_DCT_RUN_CAT1C; - extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - case OC_DCT_RUN_CAT2A:{ - eb1=(eb1<<1)-1; - /*Fall through.*/ - } - case OC_DCT_RUN_CAT2B:{ - dct_tokens0[ti0]=OC_DCT_RUN_CAT2B; - extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); - ti0++; - /*Don't write the AC coefficient back out.*/ - }continue; - } - /*We can't merge tokens, write a short zero run and keep going.*/ - dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; - extra_bits0[ti0]=0; - ti0++; - } - } - if(!neobs1){ - /*Flush any (inactive) EOB run.*/ - if(eob_run1>0){ - token=oc_make_eob_token_full(eob_run1,&eb); - dct_tokens1[ti1w]=(unsigned char)token; - extra_bits1[ti1w]=(ogg_uint16_t)eb; - ti1w++; - eob_run1=0; - } - /*There's no active EOB run, so log the current token.*/ - dct_tokens1[ti1w]=(unsigned char)token1; - extra_bits1[ti1w]=(ogg_uint16_t)eb1; - ti1w++; - } - else{ - /*Otherwise consume one EOB from the current run.*/ - neobs1--; - /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/ - if(eob_run1-neobs1>=4095){ - token=oc_make_eob_token_full(4095,&eb); - dct_tokens1[ti1w]=(unsigned char)token; - extra_bits1[ti1w]=(ogg_uint16_t)eb; - ti1w++; - eob_run1-=4095; - } - } - } - /*Save the current state.*/ - _enc->ndct_tokens[_pli][0]=ti0; - _enc->ndct_tokens[_pli][1]=ti1w; - _enc->eob_run[_pli][0]=eob_run0; - _enc->eob_run[_pli][1]=eob_run1; -} - -/*Final EOB run welding.*/ -void oc_enc_tokenize_finish(oc_enc_ctx *_enc){ - int pli; - int zzi; - /*Emit final EOB runs.*/ - for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ - int eob_run; - eob_run=_enc->eob_run[pli][zzi]; - if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run); - } - /*Merge the final EOB run of one token list with the start of the next, if - possible.*/ - for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){ - int old_tok1; - int old_tok2; - int old_eb1; - int old_eb2; - int new_tok; - int new_eb; - int zzj; - int plj; - ptrdiff_t ti=ti; - int run_count; - /*Make sure this coefficient has tokens at all.*/ - if(_enc->ndct_tokens[pli][zzi]<=0)continue; - /*Ensure the first token is an EOB run.*/ - old_tok2=_enc->dct_tokens[pli][zzi][0]; - if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue; - /*Search for a previous coefficient that has any tokens at all.*/ - old_tok1=OC_NDCT_EOB_TOKEN_MAX; - for(zzj=zzi,plj=pli;zzj>=0;zzj--){ - while(plj-->0){ - ti=_enc->ndct_tokens[plj][zzj]-1; - if(ti>=_enc->dct_token_offs[plj][zzj]){ - old_tok1=_enc->dct_tokens[plj][zzj][ti]; - break; - } - } - if(plj>=0)break; - plj=3; - } - /*Ensure its last token was an EOB run.*/ - if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue; - /*Pull off the associated extra bits, if any, and decode the runs.*/ - old_eb1=_enc->extra_bits[plj][zzj][ti]; - old_eb2=_enc->extra_bits[pli][zzi][0]; - run_count=oc_decode_eob_token(old_tok1,old_eb1) - +oc_decode_eob_token(old_tok2,old_eb2); - /*We can't possibly combine these into one run. - It might be possible to split them more optimally, but we'll just leave - them as-is.*/ - if(run_count>=4096)continue; - /*We CAN combine them into one run.*/ - new_tok=oc_make_eob_token_full(run_count,&new_eb); - _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok; - _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb; - _enc->dct_token_offs[pli][zzi]++; - } -} diff --git a/drivers/theora/video_stream_theora.cpp b/drivers/theora/video_stream_theora.cpp deleted file mode 100644 index 1f3832ec16..0000000000 --- a/drivers/theora/video_stream_theora.cpp +++ /dev/null @@ -1,945 +0,0 @@ -/*************************************************************************/ -/* video_stream_theora.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifdef THEORA_ENABLED - -#include "video_stream_theora.h" -#include "os/os.h" -#include "yuv2rgb.h" -#include "globals.h" - - -int VideoStreamPlaybackTheora:: buffer_data() { - - char *buffer=ogg_sync_buffer(&oy,4096); - -#ifdef THEORA_USE_THREAD_STREAMING - - int read; - - do { - thread_sem->post(); - read = MIN(ring_buffer.data_left(),4096); - if (read) { - ring_buffer.read((uint8_t*)buffer,read); - ogg_sync_wrote(&oy,read); - } else { - OS::get_singleton()->delay_usec(100); - } - - } while(read==0); - - return read; - -#else - - int bytes=file->get_buffer((uint8_t*)buffer, 4096); - ogg_sync_wrote(&oy,bytes); - return(bytes); - -#endif -} - -int VideoStreamPlaybackTheora::queue_page(ogg_page *page){ - if(theora_p) { - ogg_stream_pagein(&to,page); - if (to.e_o_s) - theora_eos=true; - } - if(vorbis_p) { - ogg_stream_pagein(&vo,page); - if (vo.e_o_s) - vorbis_eos=true; - } - return 0; -} - -void VideoStreamPlaybackTheora::video_write(void){ - th_ycbcr_buffer yuv; - th_decode_ycbcr_out(td,yuv); - - /* - int y_offset, uv_offset; - y_offset=(ti.pic_x&~1)+yuv[0].stride*(ti.pic_y&~1); - - { - int pixels = size.x * size.y; - frame_data.resize(pixels * 4); - DVector::Write w = frame_data.write(); - char* dst = (char*)w.ptr(); - int p = 0; - for (int i=0; i::Write w = frame_data.write(); - char* dst = (char*)w.ptr(); - - //uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2); - - if (px_fmt == TH_PF_444) { - - yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); - - } else if (px_fmt == TH_PF_422) { - - yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); - - } else if (px_fmt == TH_PF_420) { - - yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); - }; - - format = Image::FORMAT_RGBA; - } - - Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation - - texture->set_data(img); //zero copy send to visual server - - /* - - if (px_fmt == TH_PF_444) { - - int pitch = 3; - frame_data.resize(size.x * size.y * pitch); - DVector::Write w = frame_data.write(); - char* dst = (char*)w.ptr(); - - for(int i=0;i::Write w = frame_data.write(); - char* dst = (char*)w.ptr(); - - uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y / div); - for(int i=0;i::Write w = frame_data.write(); - char* dst = (char*)w.ptr(); - - uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y / div); - for(int i=0;i>1;j++) { - out[j*4+1] = in_u[j]; - out[j*4+3] = in_v[j]; - } - } - - format = Image::FORMAT_YUV_422; - }; - }; - // */ - - frames_pending = 1; -} - -void VideoStreamPlaybackTheora::clear() { - - if (!file) - return; - - if(vorbis_p){ - ogg_stream_clear(&vo); - if (vorbis_p >= 3) { - vorbis_block_clear(&vb); - vorbis_dsp_clear(&vd); - }; - vorbis_comment_clear(&vc); - vorbis_info_clear(&vi); - vorbis_p = 0; - } - if(theora_p){ - ogg_stream_clear(&to); - th_decode_free(td); - th_comment_clear(&tc); - th_info_clear(&ti); - theora_p = 0; - } - ogg_sync_clear(&oy); - -#ifdef THEORA_USE_THREAD_STREAMING - thread_exit=true; - thread_sem->post(); //just in case - Thread::wait_to_finish(thread); - memdelete(thread); - thread=NULL; - ring_buffer.clear(); -#endif - //file_name = ""; - - theora_p = 0; - vorbis_p = 0; - videobuf_ready = 0; - frames_pending = 0; - videobuf_time = 0; - theora_eos=false; - vorbis_eos=false; - - if (file) { - memdelete(file); - } - file=NULL; - playing = false; -}; - -void VideoStreamPlaybackTheora::set_file(const String& p_file) { - - ERR_FAIL_COND(playing); - ogg_packet op; - th_setup_info *ts = NULL; - - file_name = p_file; - if (file) { - memdelete(file); - } - file = FileAccess::open(p_file, FileAccess::READ); - ERR_FAIL_COND(!file); - -#ifdef THEORA_USE_THREAD_STREAMING - thread_exit=false; - thread_eof=false; - //pre-fill buffer - int to_read = ring_buffer.space_left(); - int read = file->get_buffer(read_buffer.ptr(),to_read); - ring_buffer.write(read_buffer.ptr(),read); - - thread=Thread::create(_streaming_thread,this); - -#endif - - ogg_sync_init(&oy); - - /* init supporting Vorbis structures needed in header parsing */ - vorbis_info_init(&vi); - vorbis_comment_init(&vc); - - /* init supporting Theora structures needed in header parsing */ - th_comment_init(&tc); - th_info_init(&ti); - - theora_eos=false; - vorbis_eos=false; - - /* Ogg file open; parse the headers */ - /* Only interested in Vorbis/Theora streams */ - int stateflag = 0; - - int audio_track_skip=audio_track; - - - while(!stateflag){ - int ret=buffer_data(); - if(ret==0)break; - while(ogg_sync_pageout(&oy,&og)>0){ - ogg_stream_state test; - - /* is this a mandated initial header? If not, stop parsing */ - if(!ogg_page_bos(&og)){ - /* don't leak the page; get it into the appropriate stream */ - queue_page(&og); - stateflag=1; - break; - } - - ogg_stream_init(&test,ogg_page_serialno(&og)); - ogg_stream_pagein(&test,&og); - ogg_stream_packetout(&test,&op); - - - /* identify the codec: try theora */ - if(!theora_p && th_decode_headerin(&ti,&tc,&ts,&op)>=0){ - /* it is theora */ - copymem(&to,&test,sizeof(test)); - theora_p=1; - }else if(!vorbis_p && vorbis_synthesis_headerin(&vi,&vc,&op)>=0){ - - - /* it is vorbis */ - if (audio_track_skip) { - vorbis_info_clear(&vi); - vorbis_comment_clear(&vc); - ogg_stream_clear(&test); - vorbis_info_init(&vi); - vorbis_comment_init(&vc); - - audio_track_skip--; - } else { - copymem(&vo,&test,sizeof(test)); - vorbis_p=1; - } - }else{ - /* whatever it is, we don't care about it */ - ogg_stream_clear(&test); - } - } - /* fall through to non-bos page parsing */ - } - - /* we're expecting more header packets. */ - while((theora_p && theora_p<3) || (vorbis_p && vorbis_p<3)){ - int ret; - - /* look for further theora headers */ - while(theora_p && (theora_p<3) && (ret=ogg_stream_packetout(&to,&op))){ - if(ret<0){ - fprintf(stderr,"Error parsing Theora stream headers; " - "corrupt stream?\n"); - clear(); - return; - } - if(!th_decode_headerin(&ti,&tc,&ts,&op)){ - fprintf(stderr,"Error parsing Theora stream headers; " - "corrupt stream?\n"); - clear(); - return; - } - theora_p++; - } - - /* look for more vorbis header packets */ - while(vorbis_p && (vorbis_p<3) && (ret=ogg_stream_packetout(&vo,&op))){ - if(ret<0){ - fprintf(stderr,"Error parsing Vorbis stream headers; corrupt stream?\n"); - clear(); - return; - } - ret = vorbis_synthesis_headerin(&vi,&vc,&op); - if(ret){ - fprintf(stderr,"Error parsing Vorbis stream headers; corrupt stream?\n"); - clear(); - return; - } - vorbis_p++; - if(vorbis_p==3)break; - } - - /* The header pages/packets will arrive before anything else we - care about, or the stream is not obeying spec */ - - if(ogg_sync_pageout(&oy,&og)>0){ - queue_page(&og); /* demux into the appropriate stream */ - }else{ - int ret=buffer_data(); /* someone needs more data */ - if(ret==0){ - fprintf(stderr,"End of file while searching for codec headers.\n"); - clear(); - return; - } - } - } - - /* and now we have it all. initialize decoders */ - if(theora_p){ - td=th_decode_alloc(&ti,ts); - printf("Ogg logical stream %lx is Theora %dx%d %.02f fps", - to.serialno,ti.pic_width,ti.pic_height, - (double)ti.fps_numerator/ti.fps_denominator); - px_fmt=ti.pixel_fmt; - switch(ti.pixel_fmt){ - case TH_PF_420: printf(" 4:2:0 video\n"); break; - case TH_PF_422: printf(" 4:2:2 video\n"); break; - case TH_PF_444: printf(" 4:4:4 video\n"); break; - case TH_PF_RSVD: - default: - printf(" video\n (UNKNOWN Chroma sampling!)\n"); - break; - } - if(ti.pic_width!=ti.frame_width || ti.pic_height!=ti.frame_height) - printf(" Frame content is %dx%d with offset (%d,%d).\n", - ti.frame_width, ti.frame_height, ti.pic_x, ti.pic_y); - th_decode_ctl(td,TH_DECCTL_GET_PPLEVEL_MAX,&pp_level_max, - sizeof(pp_level_max)); - pp_level=pp_level_max; - pp_level=0; - th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,sizeof(pp_level)); - pp_inc=0; - - /*{ - int arg = 0xffff; - th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_MBMODE,&arg,sizeof(arg)); - th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_MV,&arg,sizeof(arg)); - th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_QI,&arg,sizeof(arg)); - arg=10; - th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_BITS,&arg,sizeof(arg)); - }*/ - - int w; - int h; - w=(ti.pic_x+ti.frame_width+1&~1)-(ti.pic_x&~1); - h=(ti.pic_y+ti.frame_height+1&~1)-(ti.pic_y&~1); - size.x = w; - size.y = h; - - texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE); - - }else{ - /* tear down the partial theora setup */ - th_info_clear(&ti); - th_comment_clear(&tc); - } - - th_setup_free(ts); - - if(vorbis_p){ - vorbis_synthesis_init(&vd,&vi); - vorbis_block_init(&vd,&vb); - fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n", - vo.serialno,vi.channels,vi.rate); - //_setup(vi.channels, vi.rate); - - }else{ - /* tear down the partial vorbis setup */ - vorbis_info_clear(&vi); - vorbis_comment_clear(&vc); - } - - playing = false; - buffering=true; - time=0; - audio_frames_wrote=0; - - -}; - -float VideoStreamPlaybackTheora::get_time() const { - - //print_line("total: "+itos(get_total())+" todo: "+itos(get_todo())); - //return MAX(0,time-((get_total())/(float)vi.rate)); - return time-AudioServer::get_singleton()->get_output_delay()-delay_compensation;//-((get_total())/(float)vi.rate); -}; - -Ref VideoStreamPlaybackTheora::get_texture() { - - return texture; -} - -void VideoStreamPlaybackTheora::update(float p_delta) { - - if (!file) - return; - - if (!playing || paused) { - //printf("not playing\n"); - return; - }; - - - -#ifdef THEORA_USE_THREAD_STREAMING - thread_sem->post(); -#endif - - //double ctime =AudioServer::get_singleton()->get_mix_time(); - - //print_line("play "+rtos(p_delta)); - time+=p_delta; - - if (videobuf_time>get_time()) { - return; //no new frames need to be produced - } - - bool frame_done=false; - bool audio_done=!vorbis_p; - - while (!frame_done || (!audio_done && !vorbis_eos)) { - //a frame needs to be produced - - ogg_packet op; - bool no_theora=false; - - - while (vorbis_p) { - int ret; - float **pcm; - - bool buffer_full=false; - - /* if there's pending, decoded audio, grab it */ - if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) { - - - - const int AUXBUF_LEN=4096; - int to_read = ret; - int16_t aux_buffer[AUXBUF_LEN]; - - while(to_read) { - - int m = MIN(AUXBUF_LEN/vi.channels,to_read); - - int count = 0; - - for(int j=0;j32767)val=32767; - if(val<-32768)val=-32768; - aux_buffer[count++] = val; - } - } - - if (mix_callback) { - int mixed = mix_callback(mix_udata,aux_buffer,m); - to_read-=mixed; - if (mixed!=m) { //could mix no more - buffer_full=true; - break; - } - } else { - to_read-=m; //just pretend we sent the audio - } - - - } - - - int tr = vorbis_synthesis_read(&vd, ret-to_read); - - - if (vd.granulepos>=0) { - // print_line("wrote: "+itos(audio_frames_wrote)+" gpos: "+itos(vd.granulepos)); - } - - //print_line("mix audio!"); - - audio_frames_wrote+=ret-to_read; - - //print_line("AGP: "+itos(vd.granulepos)+" added "+itos(ret-to_read)); - - - } else { - - /* no pending audio; is there a pending packet to decode? */ - if (ogg_stream_packetout(&vo,&op)>0){ - if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */ - vorbis_synthesis_blockin(&vd,&vb); - } - } else { /* we need more data; break out to suck in another page */ - //printf("need moar data\n"); - break; - }; - } - - - audio_done = videobuf_time < (audio_frames_wrote/float(vi.rate)); - - if (buffer_full) - break; - } - - while(theora_p && !frame_done){ - /* theora is one in, one out... */ - if(ogg_stream_packetout(&to,&op)>0){ - - - if(false && pp_inc){ - pp_level+=pp_inc; - th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level, - sizeof(pp_level)); - pp_inc=0; - } - /*HACK: This should be set after a seek or a gap, but we might not have - a granulepos for the first packet (we only have them for the last - packet on a page), so we just set it as often as we get it. - To do this right, we should back-track from the last packet on the - page and compute the correct granulepos for the first packet after - a seek or a gap.*/ - if(op.granulepos>=0){ - th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos, - sizeof(op.granulepos)); - } - ogg_int64_t videobuf_granulepos; - if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){ - videobuf_time=th_granule_time(td,videobuf_granulepos); - - //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); - - /* is it already too old to be useful? This is only actually - useful cosmetically after a SIGSTOP. Note that we have to - decode the frame even if we don't show it (for now) due to - keyframing. Soon enough libtheora will be able to deal - with non-keyframe seeks. */ - - if(videobuf_time>=get_time()) { - frame_done=true; - } else{ - /*If we are too slow, reduce the pp level.*/ - pp_inc=pp_level>0?-1:0; - } - } else { - - } - - } else { - no_theora=true; - break; - } - } - - - //print_line("no theora: "+itos(no_theora)+" theora eos: "+itos(theora_eos)+" frame done "+itos(frame_done)); - -#ifdef THEORA_USE_THREAD_STREAMING - if (file && thread_eof && no_theora && theora_eos && ring_buffer.data_left()==0) { -#else - if (file && /*!videobuf_ready && */ no_theora && theora_eos) { -#endif - printf("video done, stopping\n"); - stop(); - return; - }; - #if 0 - if (!videobuf_ready || audio_todo > 0){ - /* no data yet for somebody. Grab another page */ - - buffer_data(); - while(ogg_sync_pageout(&oy,&og)>0){ - queue_page(&og); - } - } - #else - - - if (!frame_done || !audio_done){ - //what's the point of waiting for audio to grab a page? - - buffer_data(); - while(ogg_sync_pageout(&oy,&og)>0){ - queue_page(&og); - } - } - #endif - /* If playback has begun, top audio buffer off immediately. */ - //if(stateflag) audio_write_nonblocking(); - - /* are we at or past time for this video frame? */ - if(videobuf_ready && videobuf_time<=get_time()){ - - //video_write(); - //videobuf_ready=0; - } else { - //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); - } - - float tdiff=videobuf_time-get_time(); - /*If we have lots of extra time, increase the post-processing level.*/ - if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){ - pp_inc=pp_level0?-1:0; - } - - } - - video_write(); - -}; - - -void VideoStreamPlaybackTheora::play() { - - if (!playing) - time=0; - else { - stop(); - } - - playing = true; - delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms"); - delay_compensation/=1000.0; - - -}; - -void VideoStreamPlaybackTheora::stop() { - - if (playing) { - - clear(); - set_file(file_name); //reset - } - playing = false; - time=0; -}; - -bool VideoStreamPlaybackTheora::is_playing() const { - - return playing; -}; - -void VideoStreamPlaybackTheora::set_paused(bool p_paused) { - - paused=p_paused; - //pau = !p_paused; -}; - -bool VideoStreamPlaybackTheora::is_paused(bool p_paused) const { - - return paused; -}; - -void VideoStreamPlaybackTheora::set_loop(bool p_enable) { - -}; - -bool VideoStreamPlaybackTheora::has_loop() const { - - return false; -}; - -float VideoStreamPlaybackTheora::get_length() const { - - return 0; -}; - -String VideoStreamPlaybackTheora::get_stream_name() const { - - return ""; -}; - -int VideoStreamPlaybackTheora::get_loop_count() const { - - return 0; -}; - -float VideoStreamPlaybackTheora::get_pos() const { - - return get_time(); -}; - -void VideoStreamPlaybackTheora::seek_pos(float p_time) { - - // no -}; - -void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback,void *p_userdata) { - - mix_callback=p_callback; - mix_udata=p_userdata; -} - -int VideoStreamPlaybackTheora::get_channels() const{ - - return vi.channels; -} - -void VideoStreamPlaybackTheora::set_audio_track(int p_idx) { - - audio_track=p_idx; -} - -int VideoStreamPlaybackTheora::get_mix_rate() const{ - - return vi.rate; -} - -#ifdef THEORA_USE_THREAD_STREAMING - - -void VideoStreamPlaybackTheora::_streaming_thread(void *ud) { - - VideoStreamPlaybackTheora *vs=(VideoStreamPlaybackTheora*)ud; - - while(!vs->thread_exit) { - - //just fill back the buffer - if (!vs->thread_eof) { - - int to_read = vs->ring_buffer.space_left(); - if (to_read) { - int read = vs->file->get_buffer(vs->read_buffer.ptr(),to_read); - vs->ring_buffer.write(vs->read_buffer.ptr(),read); - vs->thread_eof=vs->file->eof_reached(); - } - - - } - - vs->thread_sem->wait(); - } -} - -#endif - -VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() { - - file = NULL; - theora_p = 0; - vorbis_p = 0; - videobuf_ready = 0; - playing = false; - frames_pending = 0; - videobuf_time = 0; - paused=false; - - buffering=false; - texture = Ref( memnew(ImageTexture )); - mix_callback=NULL; - mix_udata=NULL; - audio_track=0; - delay_compensation=0; - audio_frames_wrote=0; - -#ifdef THEORA_USE_THREAD_STREAMING - int rb_power = nearest_shift(RB_SIZE_KB*1024); - ring_buffer.resize(rb_power); - read_buffer.resize(RB_SIZE_KB*1024); - thread_sem=Semaphore::create(); - thread=NULL; - thread_exit=false; - thread_eof=false; - -#endif -}; - -VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() { - -#ifdef THEORA_USE_THREAD_STREAMING - - memdelete(thread_sem); -#endif - clear(); - - if (file) - memdelete(file); - - -}; - - -RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path, Error *r_error) { - if (r_error) - *r_error=ERR_FILE_CANT_OPEN; - - VideoStreamTheora *stream = memnew(VideoStreamTheora); - stream->set_file(p_path); - - if (r_error) - *r_error=OK; - - return Ref(stream); -} - -void ResourceFormatLoaderVideoStreamTheora::get_recognized_extensions(List *p_extensions) const { - - p_extensions->push_back("ogm"); - p_extensions->push_back("ogv"); -} -bool ResourceFormatLoaderVideoStreamTheora::handles_type(const String& p_type) const { - return (p_type=="VideoStream" || p_type=="VideoStreamTheora"); -} - -String ResourceFormatLoaderVideoStreamTheora::get_resource_type(const String &p_path) const { - - String exl=p_path.extension().to_lower(); - if (exl=="ogm" || exl=="ogv") - return "VideoStreamTheora"; - return ""; -} - -#endif - diff --git a/drivers/theora/video_stream_theora.h b/drivers/theora/video_stream_theora.h deleted file mode 100644 index 5484815844..0000000000 --- a/drivers/theora/video_stream_theora.h +++ /dev/null @@ -1,204 +0,0 @@ -/*************************************************************************/ -/* video_stream_theora.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#ifndef VIDEO_STREAM_THEORA_H -#define VIDEO_STREAM_THEORA_H - -#ifdef THEORA_ENABLED - -#include "theora/theoradec.h" -#include "vorbis/codec.h" -#include "os/file_access.h" -#include "ring_buffer.h" -#include "io/resource_loader.h" -#include "scene/resources/video_stream.h" -#include "os/thread.h" -#include "os/semaphore.h" - -//#define THEORA_USE_THREAD_STREAMING - -class VideoStreamPlaybackTheora : public VideoStreamPlayback { - - OBJ_TYPE(VideoStreamPlaybackTheora, VideoStreamPlayback); - - enum { - MAX_FRAMES = 4, - }; - - //Image frames[MAX_FRAMES]; - Image::Format format; - DVector frame_data; - int frames_pending; - FileAccess* file; - String file_name; - int audio_frames_wrote; - Point2i size; - - int buffer_data(); - int queue_page(ogg_page *page); - void video_write(void); - float get_time() const; - - bool theora_eos; - bool vorbis_eos; - - ogg_sync_state oy; - ogg_page og; - ogg_stream_state vo; - ogg_stream_state to; - th_info ti; - th_comment tc; - th_dec_ctx *td; - vorbis_info vi; - vorbis_dsp_state vd; - vorbis_block vb; - vorbis_comment vc; - th_pixel_fmt px_fmt; - double videobuf_time; - int pp_inc; - - int theora_p; - int vorbis_p; - int pp_level_max; - int pp_level; - int videobuf_ready; - - bool playing; - bool buffering; - - double last_update_time; - double time; - double delay_compensation; - - Ref texture; - - AudioMixCallback mix_callback; - void* mix_udata; - bool paused; - -#ifdef THEORA_USE_THREAD_STREAMING - - enum { - RB_SIZE_KB=1024 - }; - - RingBuffer ring_buffer; - Vector read_buffer; - bool thread_eof; - Semaphore *thread_sem; - Thread *thread; - volatile bool thread_exit; - - static void _streaming_thread(void *ud); - -#endif - - - int audio_track; - -protected: - - void clear(); - -public: - - virtual void play(); - virtual void stop(); - virtual bool is_playing() const; - - virtual void set_paused(bool p_paused); - virtual bool is_paused(bool p_paused) const; - - virtual void set_loop(bool p_enable); - virtual bool has_loop() const; - - virtual float get_length() const; - - virtual String get_stream_name() const; - - virtual int get_loop_count() const; - - virtual float get_pos() const; - virtual void seek_pos(float p_time); - - - void set_file(const String& p_file); - - virtual Ref get_texture(); - virtual void update(float p_delta); - - virtual void set_mix_callback(AudioMixCallback p_callback,void *p_userdata); - virtual int get_channels() const; - virtual int get_mix_rate() const; - - virtual void set_audio_track(int p_idx); - - VideoStreamPlaybackTheora(); - ~VideoStreamPlaybackTheora(); -}; - - - -class VideoStreamTheora : public VideoStream { - - OBJ_TYPE(VideoStreamTheora,VideoStream); - - String file; - int audio_track; - - -public: - - Ref instance_playback() { - Ref pb = memnew( VideoStreamPlaybackTheora ); - pb->set_audio_track(audio_track); - pb->set_file(file); - return pb; - } - - void set_file(const String& p_file) { file=p_file; } - void set_audio_track(int p_track) { audio_track=p_track; } - - VideoStreamTheora() { audio_track=0; } - -}; - -class ResourceFormatLoaderVideoStreamTheora : public ResourceFormatLoader { -public: - virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL); - virtual void get_recognized_extensions(List *p_extensions) const; - virtual bool handles_type(const String& p_type) const; - virtual String get_resource_type(const String &p_path) const; - -}; - - - -#endif - -#endif diff --git a/drivers/theora/x86/mmxencfrag.c b/drivers/theora/x86/mmxencfrag.c deleted file mode 100644 index c79ff01fcc..0000000000 --- a/drivers/theora/x86/mmxencfrag.c +++ /dev/null @@ -1,900 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $ - - ********************************************************************/ -#include -#include "x86enc.h" - -#if defined(OC_X86_ASM) - -unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride){ - ptrdiff_t ystride3; - ptrdiff_t ret; - __asm__ __volatile__( - /*Load the first 4 rows of each block.*/ - "movq (%[src]),%%mm0\n\t" - "movq (%[ref]),%%mm1\n\t" - "movq (%[src],%[ystride]),%%mm2\n\t" - "movq (%[ref],%[ystride]),%%mm3\n\t" - "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" - "movq (%[src],%[ystride],2),%%mm4\n\t" - "movq (%[ref],%[ystride],2),%%mm5\n\t" - "movq (%[src],%[ystride3]),%%mm6\n\t" - "movq (%[ref],%[ystride3]),%%mm7\n\t" - /*Compute their SADs and add them in %%mm0*/ - "psadbw %%mm1,%%mm0\n\t" - "psadbw %%mm3,%%mm2\n\t" - "lea (%[src],%[ystride],4),%[src]\n\t" - "paddw %%mm2,%%mm0\n\t" - "lea (%[ref],%[ystride],4),%[ref]\n\t" - /*Load the next 3 rows as registers become available.*/ - "movq (%[src]),%%mm2\n\t" - "movq (%[ref]),%%mm3\n\t" - "psadbw %%mm5,%%mm4\n\t" - "psadbw %%mm7,%%mm6\n\t" - "paddw %%mm4,%%mm0\n\t" - "movq (%[ref],%[ystride]),%%mm5\n\t" - "movq (%[src],%[ystride]),%%mm4\n\t" - "paddw %%mm6,%%mm0\n\t" - "movq (%[ref],%[ystride],2),%%mm7\n\t" - "movq (%[src],%[ystride],2),%%mm6\n\t" - /*Start adding their SADs to %%mm0*/ - "psadbw %%mm3,%%mm2\n\t" - "psadbw %%mm5,%%mm4\n\t" - "paddw %%mm2,%%mm0\n\t" - "psadbw %%mm7,%%mm6\n\t" - /*Load last row as registers become available.*/ - "movq (%[src],%[ystride3]),%%mm2\n\t" - "movq (%[ref],%[ystride3]),%%mm3\n\t" - /*And finish adding up their SADs.*/ - "paddw %%mm4,%%mm0\n\t" - "psadbw %%mm3,%%mm2\n\t" - "paddw %%mm6,%%mm0\n\t" - "paddw %%mm2,%%mm0\n\t" - "movd %%mm0,%[ret]\n\t" - :[ret]"=a"(ret),[src]"+%r"(_src),[ref]"+r"(_ref),[ystride3]"=&r"(ystride3) - :[ystride]"r"((ptrdiff_t)_ystride) - ); - return (unsigned)ret; -} - -unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh){ - /*Early termination is for suckers.*/ - return oc_enc_frag_sad_mmxext(_src,_ref,_ystride); -} - -/*Assumes the first two rows of %[ref1] and %[ref2] are in %%mm0...%%mm3, the - first two rows of %[src] are in %%mm4,%%mm5, and {1}x8 is in %%mm7. - We pre-load the next two rows of data as registers become available.*/ -#define OC_SAD2_LOOP \ - "#OC_SAD2_LOOP\n\t" \ - /*We want to compute (%%mm0+%%mm1>>1) on unsigned bytes without overflow, but \ - pavgb computes (%%mm0+%%mm1+1>>1). \ - The latter is exactly 1 too large when the low bit of two corresponding \ - bytes is only set in one of them. \ - Therefore we pxor the operands, pand to mask out the low bits, and psubb to \ - correct the output of pavgb.*/ \ - "movq %%mm0,%%mm6\n\t" \ - "lea (%[ref1],%[ystride],2),%[ref1]\n\t" \ - "pxor %%mm1,%%mm0\n\t" \ - "pavgb %%mm1,%%mm6\n\t" \ - "lea (%[ref2],%[ystride],2),%[ref2]\n\t" \ - "movq %%mm2,%%mm1\n\t" \ - "pand %%mm7,%%mm0\n\t" \ - "pavgb %%mm3,%%mm2\n\t" \ - "pxor %%mm3,%%mm1\n\t" \ - "movq (%[ref2],%[ystride]),%%mm3\n\t" \ - "psubb %%mm0,%%mm6\n\t" \ - "movq (%[ref1]),%%mm0\n\t" \ - "pand %%mm7,%%mm1\n\t" \ - "psadbw %%mm6,%%mm4\n\t" \ - "movd %[ret],%%mm6\n\t" \ - "psubb %%mm1,%%mm2\n\t" \ - "movq (%[ref2]),%%mm1\n\t" \ - "lea (%[src],%[ystride],2),%[src]\n\t" \ - "psadbw %%mm2,%%mm5\n\t" \ - "movq (%[ref1],%[ystride]),%%mm2\n\t" \ - "paddw %%mm4,%%mm5\n\t" \ - "movq (%[src]),%%mm4\n\t" \ - "paddw %%mm5,%%mm6\n\t" \ - "movq (%[src],%[ystride]),%%mm5\n\t" \ - "movd %%mm6,%[ret]\n\t" \ - -/*Same as above, but does not pre-load the next two rows.*/ -#define OC_SAD2_TAIL \ - "#OC_SAD2_TAIL\n\t" \ - "movq %%mm0,%%mm6\n\t" \ - "pavgb %%mm1,%%mm0\n\t" \ - "pxor %%mm1,%%mm6\n\t" \ - "movq %%mm2,%%mm1\n\t" \ - "pand %%mm7,%%mm6\n\t" \ - "pavgb %%mm3,%%mm2\n\t" \ - "pxor %%mm3,%%mm1\n\t" \ - "psubb %%mm6,%%mm0\n\t" \ - "pand %%mm7,%%mm1\n\t" \ - "psadbw %%mm0,%%mm4\n\t" \ - "psubb %%mm1,%%mm2\n\t" \ - "movd %[ret],%%mm6\n\t" \ - "psadbw %%mm2,%%mm5\n\t" \ - "paddw %%mm4,%%mm5\n\t" \ - "paddw %%mm5,%%mm6\n\t" \ - "movd %%mm6,%[ret]\n\t" \ - -unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh){ - ptrdiff_t ret; - __asm__ __volatile__( - "movq (%[ref1]),%%mm0\n\t" - "movq (%[ref2]),%%mm1\n\t" - "movq (%[ref1],%[ystride]),%%mm2\n\t" - "movq (%[ref2],%[ystride]),%%mm3\n\t" - "xor %[ret],%[ret]\n\t" - "movq (%[src]),%%mm4\n\t" - "pxor %%mm7,%%mm7\n\t" - "pcmpeqb %%mm6,%%mm6\n\t" - "movq (%[src],%[ystride]),%%mm5\n\t" - "psubb %%mm6,%%mm7\n\t" - OC_SAD2_LOOP - OC_SAD2_LOOP - OC_SAD2_LOOP - OC_SAD2_TAIL - :[ret]"=&a"(ret),[src]"+r"(_src),[ref1]"+%r"(_ref1),[ref2]"+r"(_ref2) - :[ystride]"r"((ptrdiff_t)_ystride) - ); - return (unsigned)ret; -} - -/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their - 16-bit difference in %%mm0...%%mm7.*/ -#define OC_LOAD_SUB_8x4(_off) \ - "#OC_LOAD_SUB_8x4\n\t" \ - "movd "_off"(%[src]),%%mm0\n\t" \ - "movd "_off"(%[ref]),%%mm4\n\t" \ - "movd "_off"(%[src],%[src_ystride]),%%mm1\n\t" \ - "lea (%[src],%[src_ystride],2),%[src]\n\t" \ - "movd "_off"(%[ref],%[ref_ystride]),%%mm5\n\t" \ - "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ - "movd "_off"(%[src]),%%mm2\n\t" \ - "movd "_off"(%[ref]),%%mm7\n\t" \ - "movd "_off"(%[src],%[src_ystride]),%%mm3\n\t" \ - "movd "_off"(%[ref],%[ref_ystride]),%%mm6\n\t" \ - "punpcklbw %%mm4,%%mm0\n\t" \ - "lea (%[src],%[src_ystride],2),%[src]\n\t" \ - "punpcklbw %%mm4,%%mm4\n\t" \ - "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ - "psubw %%mm4,%%mm0\n\t" \ - "movd "_off"(%[src]),%%mm4\n\t" \ - "movq %%mm0,"_off"*2(%[buf])\n\t" \ - "movd "_off"(%[ref]),%%mm0\n\t" \ - "punpcklbw %%mm5,%%mm1\n\t" \ - "punpcklbw %%mm5,%%mm5\n\t" \ - "psubw %%mm5,%%mm1\n\t" \ - "movd "_off"(%[src],%[src_ystride]),%%mm5\n\t" \ - "punpcklbw %%mm7,%%mm2\n\t" \ - "punpcklbw %%mm7,%%mm7\n\t" \ - "psubw %%mm7,%%mm2\n\t" \ - "movd "_off"(%[ref],%[ref_ystride]),%%mm7\n\t" \ - "punpcklbw %%mm6,%%mm3\n\t" \ - "lea (%[src],%[src_ystride],2),%[src]\n\t" \ - "punpcklbw %%mm6,%%mm6\n\t" \ - "psubw %%mm6,%%mm3\n\t" \ - "movd "_off"(%[src]),%%mm6\n\t" \ - "punpcklbw %%mm0,%%mm4\n\t" \ - "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ - "punpcklbw %%mm0,%%mm0\n\t" \ - "lea (%[src],%[src_ystride],2),%[src]\n\t" \ - "psubw %%mm0,%%mm4\n\t" \ - "movd "_off"(%[ref]),%%mm0\n\t" \ - "punpcklbw %%mm7,%%mm5\n\t" \ - "neg %[src_ystride]\n\t" \ - "punpcklbw %%mm7,%%mm7\n\t" \ - "psubw %%mm7,%%mm5\n\t" \ - "movd "_off"(%[src],%[src_ystride]),%%mm7\n\t" \ - "punpcklbw %%mm0,%%mm6\n\t" \ - "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ - "punpcklbw %%mm0,%%mm0\n\t" \ - "neg %[ref_ystride]\n\t" \ - "psubw %%mm0,%%mm6\n\t" \ - "movd "_off"(%[ref],%[ref_ystride]),%%mm0\n\t" \ - "lea (%[src],%[src_ystride],8),%[src]\n\t" \ - "punpcklbw %%mm0,%%mm7\n\t" \ - "neg %[src_ystride]\n\t" \ - "punpcklbw %%mm0,%%mm0\n\t" \ - "lea (%[ref],%[ref_ystride],8),%[ref]\n\t" \ - "psubw %%mm0,%%mm7\n\t" \ - "neg %[ref_ystride]\n\t" \ - "movq "_off"*2(%[buf]),%%mm0\n\t" \ - -/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/ -#define OC_LOAD_8x4(_off) \ - "#OC_LOAD_8x4\n\t" \ - "movd "_off"(%[src]),%%mm0\n\t" \ - "movd "_off"(%[src],%[ystride]),%%mm1\n\t" \ - "movd "_off"(%[src],%[ystride],2),%%mm2\n\t" \ - "pxor %%mm7,%%mm7\n\t" \ - "movd "_off"(%[src],%[ystride3]),%%mm3\n\t" \ - "punpcklbw %%mm7,%%mm0\n\t" \ - "movd "_off"(%[src4]),%%mm4\n\t" \ - "punpcklbw %%mm7,%%mm1\n\t" \ - "movd "_off"(%[src4],%[ystride]),%%mm5\n\t" \ - "punpcklbw %%mm7,%%mm2\n\t" \ - "movd "_off"(%[src4],%[ystride],2),%%mm6\n\t" \ - "punpcklbw %%mm7,%%mm3\n\t" \ - "movd "_off"(%[src4],%[ystride3]),%%mm7\n\t" \ - "punpcklbw %%mm4,%%mm4\n\t" \ - "punpcklbw %%mm5,%%mm5\n\t" \ - "psrlw $8,%%mm4\n\t" \ - "psrlw $8,%%mm5\n\t" \ - "punpcklbw %%mm6,%%mm6\n\t" \ - "punpcklbw %%mm7,%%mm7\n\t" \ - "psrlw $8,%%mm6\n\t" \ - "psrlw $8,%%mm7\n\t" \ - -/*Performs the first two stages of an 8-point 1-D Hadamard transform. - The transform is performed in place, except that outputs 0-3 are swapped with - outputs 4-7. - Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to - perform this stage in place with no temporary registers).*/ -#define OC_HADAMARD_AB_8x4 \ - "#OC_HADAMARD_AB_8x4\n\t" \ - /*Stage A: \ - Outputs 0-3 are swapped with 4-7 here.*/ \ - "paddw %%mm1,%%mm5\n\t" \ - "paddw %%mm2,%%mm6\n\t" \ - "paddw %%mm1,%%mm1\n\t" \ - "paddw %%mm2,%%mm2\n\t" \ - "psubw %%mm5,%%mm1\n\t" \ - "psubw %%mm6,%%mm2\n\t" \ - "paddw %%mm3,%%mm7\n\t" \ - "paddw %%mm0,%%mm4\n\t" \ - "paddw %%mm3,%%mm3\n\t" \ - "paddw %%mm0,%%mm0\n\t" \ - "psubw %%mm7,%%mm3\n\t" \ - "psubw %%mm4,%%mm0\n\t" \ - /*Stage B:*/ \ - "paddw %%mm2,%%mm0\n\t" \ - "paddw %%mm3,%%mm1\n\t" \ - "paddw %%mm6,%%mm4\n\t" \ - "paddw %%mm7,%%mm5\n\t" \ - "paddw %%mm2,%%mm2\n\t" \ - "paddw %%mm3,%%mm3\n\t" \ - "paddw %%mm6,%%mm6\n\t" \ - "paddw %%mm7,%%mm7\n\t" \ - "psubw %%mm0,%%mm2\n\t" \ - "psubw %%mm1,%%mm3\n\t" \ - "psubw %%mm4,%%mm6\n\t" \ - "psubw %%mm5,%%mm7\n\t" \ - -/*Performs the last stage of an 8-point 1-D Hadamard transform in place. - Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in - place with no temporary registers).*/ -#define OC_HADAMARD_C_8x4 \ - "#OC_HADAMARD_C_8x4\n\t" \ - /*Stage C:*/ \ - "paddw %%mm1,%%mm0\n\t" \ - "paddw %%mm3,%%mm2\n\t" \ - "paddw %%mm5,%%mm4\n\t" \ - "paddw %%mm7,%%mm6\n\t" \ - "paddw %%mm1,%%mm1\n\t" \ - "paddw %%mm3,%%mm3\n\t" \ - "paddw %%mm5,%%mm5\n\t" \ - "paddw %%mm7,%%mm7\n\t" \ - "psubw %%mm0,%%mm1\n\t" \ - "psubw %%mm2,%%mm3\n\t" \ - "psubw %%mm4,%%mm5\n\t" \ - "psubw %%mm6,%%mm7\n\t" \ - -/*Performs an 8-point 1-D Hadamard transform. - The transform is performed in place, except that outputs 0-3 are swapped with - outputs 4-7. - Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform - in place with no temporary registers).*/ -#define OC_HADAMARD_8x4 \ - OC_HADAMARD_AB_8x4 \ - OC_HADAMARD_C_8x4 \ - -/*Performs the first part of the final stage of the Hadamard transform and - summing of absolute values. - At the end of this part, %%mm1 will contain the DC coefficient of the - transform.*/ -#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ - /*We use the fact that \ - (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \ - to merge the final butterfly with the abs and the first stage of \ - accumulation. \ - Thus we can avoid using pabsw, which is not available until SSSE3. \ - Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \ - implementation would be (3+3)*8+7=55 instructions (+4 for spilling \ - registers). \ - Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \ - This implementation is only 26 (+4 for spilling registers).*/ \ - "#OC_HADAMARD_C_ABS_ACCUM_A_8x4\n\t" \ - "movq %%mm7,"_r7"(%[buf])\n\t" \ - "movq %%mm6,"_r6"(%[buf])\n\t" \ - /*mm7={0x7FFF}x4 \ - mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \ - "pcmpeqb %%mm7,%%mm7\n\t" \ - "movq %%mm0,%%mm6\n\t" \ - "psrlw $1,%%mm7\n\t" \ - "paddw %%mm1,%%mm6\n\t" \ - "pmaxsw %%mm1,%%mm0\n\t" \ - "paddsw %%mm7,%%mm6\n\t" \ - "psubw %%mm6,%%mm0\n\t" \ - /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \ - mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \ - "movq %%mm2,%%mm6\n\t" \ - "movq %%mm4,%%mm1\n\t" \ - "pmaxsw %%mm3,%%mm2\n\t" \ - "pmaxsw %%mm5,%%mm4\n\t" \ - "paddw %%mm3,%%mm6\n\t" \ - "paddw %%mm5,%%mm1\n\t" \ - "movq "_r7"(%[buf]),%%mm3\n\t" \ - -/*Performs the second part of the final stage of the Hadamard transform and - summing of absolute values.*/ -#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ - "#OC_HADAMARD_C_ABS_ACCUM_B_8x4\n\t" \ - "paddsw %%mm7,%%mm6\n\t" \ - "movq "_r6"(%[buf]),%%mm5\n\t" \ - "paddsw %%mm7,%%mm1\n\t" \ - "psubw %%mm6,%%mm2\n\t" \ - "psubw %%mm1,%%mm4\n\t" \ - /*mm7={1}x4 (needed for the horizontal add that follows) \ - mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \ - "movq %%mm3,%%mm6\n\t" \ - "pmaxsw %%mm5,%%mm3\n\t" \ - "paddw %%mm2,%%mm0\n\t" \ - "paddw %%mm5,%%mm6\n\t" \ - "paddw %%mm4,%%mm0\n\t" \ - "paddsw %%mm7,%%mm6\n\t" \ - "paddw %%mm3,%%mm0\n\t" \ - "psrlw $14,%%mm7\n\t" \ - "psubw %%mm6,%%mm0\n\t" \ - -/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the - absolute value of each component, and accumulates everything into mm0. - This is the only portion of SATD which requires MMXEXT (we could use plain - MMX, but it takes 4 instructions and an extra register to work around the - lack of a pmaxsw, which is a pretty serious penalty).*/ -#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \ - OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ - OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ - -/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each - component, and accumulates everything into mm0. - Note that mm0 will have an extra 4 added to each column, and that after - removing this value, the remainder will be half the conventional value.*/ -#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) \ - OC_HADAMARD_AB_8x4 \ - OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) - -/*Performs two 4x4 transposes (mostly) in place. - On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7} - contains rows {a,b,c,d}. - On output, {0x40,0x50,0x60,0x70}+_off(%[buf]) contains {e,f,g,h}^T, and - {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/ -#define OC_TRANSPOSE_4x4x2(_off) \ - "#OC_TRANSPOSE_4x4x2\n\t" \ - /*First 4x4 transpose:*/ \ - "movq %%mm5,0x10+"_off"(%[buf])\n\t" \ - /*mm0 = e3 e2 e1 e0 \ - mm1 = f3 f2 f1 f0 \ - mm2 = g3 g2 g1 g0 \ - mm3 = h3 h2 h1 h0*/ \ - "movq %%mm2,%%mm5\n\t" \ - "punpcklwd %%mm3,%%mm2\n\t" \ - "punpckhwd %%mm3,%%mm5\n\t" \ - "movq %%mm0,%%mm3\n\t" \ - "punpcklwd %%mm1,%%mm0\n\t" \ - "punpckhwd %%mm1,%%mm3\n\t" \ - /*mm0 = f1 e1 f0 e0 \ - mm3 = f3 e3 f2 e2 \ - mm2 = h1 g1 h0 g0 \ - mm5 = h3 g3 h2 g2*/ \ - "movq %%mm0,%%mm1\n\t" \ - "punpckldq %%mm2,%%mm0\n\t" \ - "punpckhdq %%mm2,%%mm1\n\t" \ - "movq %%mm3,%%mm2\n\t" \ - "punpckhdq %%mm5,%%mm3\n\t" \ - "movq %%mm0,0x40+"_off"(%[buf])\n\t" \ - "punpckldq %%mm5,%%mm2\n\t" \ - /*mm0 = h0 g0 f0 e0 \ - mm1 = h1 g1 f1 e1 \ - mm2 = h2 g2 f2 e2 \ - mm3 = h3 g3 f3 e3*/ \ - "movq 0x10+"_off"(%[buf]),%%mm5\n\t" \ - /*Second 4x4 transpose:*/ \ - /*mm4 = a3 a2 a1 a0 \ - mm5 = b3 b2 b1 b0 \ - mm6 = c3 c2 c1 c0 \ - mm7 = d3 d2 d1 d0*/ \ - "movq %%mm6,%%mm0\n\t" \ - "punpcklwd %%mm7,%%mm6\n\t" \ - "movq %%mm1,0x50+"_off"(%[buf])\n\t" \ - "punpckhwd %%mm7,%%mm0\n\t" \ - "movq %%mm4,%%mm7\n\t" \ - "punpcklwd %%mm5,%%mm4\n\t" \ - "movq %%mm2,0x60+"_off"(%[buf])\n\t" \ - "punpckhwd %%mm5,%%mm7\n\t" \ - /*mm4 = b1 a1 b0 a0 \ - mm7 = b3 a3 b2 a2 \ - mm6 = d1 c1 d0 c0 \ - mm0 = d3 c3 d2 c2*/ \ - "movq %%mm4,%%mm5\n\t" \ - "punpckldq %%mm6,%%mm4\n\t" \ - "movq %%mm3,0x70+"_off"(%[buf])\n\t" \ - "punpckhdq %%mm6,%%mm5\n\t" \ - "movq %%mm7,%%mm6\n\t" \ - "punpckhdq %%mm0,%%mm7\n\t" \ - "punpckldq %%mm0,%%mm6\n\t" \ - /*mm4 = d0 c0 b0 a0 \ - mm5 = d1 c1 b1 a1 \ - mm6 = d2 c2 b2 a2 \ - mm7 = d3 c3 b3 a3*/ \ - -static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src, - int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){ - OC_ALIGN8(ogg_int16_t buf[64]); - ogg_int16_t *bufp; - unsigned ret; - unsigned ret2; - bufp=buf; - __asm__ __volatile__( - OC_LOAD_SUB_8x4("0x00") - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2("0x00") - /*Finish swapping out this 8x4 block to make room for the next one. - mm0...mm3 have been swapped out already.*/ - "movq %%mm4,0x00(%[buf])\n\t" - "movq %%mm5,0x10(%[buf])\n\t" - "movq %%mm6,0x20(%[buf])\n\t" - "movq %%mm7,0x30(%[buf])\n\t" - OC_LOAD_SUB_8x4("0x04") - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2("0x08") - /*Here the first 4x4 block of output from the last transpose is the second - 4x4 block of input for the next transform. - We have cleverly arranged that it already be in the appropriate place, so - we only have to do half the loads.*/ - "movq 0x10(%[buf]),%%mm1\n\t" - "movq 0x20(%[buf]),%%mm2\n\t" - "movq 0x30(%[buf]),%%mm3\n\t" - "movq 0x00(%[buf]),%%mm0\n\t" - OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38") - /*Up to this point, everything fit in 16 bits (8 input + 1 for the - difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 - for the factor of two we dropped + 3 for the vertical accumulation). - Now we finally have to promote things to dwords. - We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long - latency of pmaddwd by starting the next series of loads now.*/ - "mov %[thresh],%[ret2]\n\t" - "pmaddwd %%mm7,%%mm0\n\t" - "movq 0x50(%[buf]),%%mm1\n\t" - "movq 0x58(%[buf]),%%mm5\n\t" - "movq %%mm0,%%mm4\n\t" - "movq 0x60(%[buf]),%%mm2\n\t" - "punpckhdq %%mm0,%%mm0\n\t" - "movq 0x68(%[buf]),%%mm6\n\t" - "paddd %%mm0,%%mm4\n\t" - "movq 0x70(%[buf]),%%mm3\n\t" - "movd %%mm4,%[ret]\n\t" - "movq 0x78(%[buf]),%%mm7\n\t" - /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4 - added to them, and a factor of two removed; correct the final sum here.*/ - "lea -32(%[ret],%[ret]),%[ret]\n\t" - "movq 0x40(%[buf]),%%mm0\n\t" - "cmp %[ret2],%[ret]\n\t" - "movq 0x48(%[buf]),%%mm4\n\t" - "jae 1f\n\t" - OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78") - "pmaddwd %%mm7,%%mm0\n\t" - /*There isn't much to stick in here to hide the latency this time, but the - alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose - latency is even worse.*/ - "sub $32,%[ret]\n\t" - "movq %%mm0,%%mm4\n\t" - "punpckhdq %%mm0,%%mm0\n\t" - "paddd %%mm0,%%mm4\n\t" - "movd %%mm4,%[ret2]\n\t" - "lea (%[ret],%[ret2],2),%[ret]\n\t" - ".p2align 4,,15\n\t" - "1:\n\t" - /*Although it looks like we're using 7 registers here, gcc can alias %[ret] - and %[ret2] with some of the inputs, since for once we don't write to - them until after we're done using everything but %[buf] (which is also - listed as an output to ensure gcc _doesn't_ alias them against it).*/ - /*Note that _src_ystride and _ref_ystride must be given non-overlapping - constraints, otherewise if gcc can prove they're equal it will allocate - them to the same register (which is bad); _src and _ref face a similar - problem, though those are never actually the same.*/ - :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp) - :[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride), - [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride), - [thresh]"m"(_thresh) - /*We have to use neg, so we actually clobber the condition codes for once - (not to mention cmp, sub, and add).*/ - :"cc" - ); - return ret; -} - -unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh){ - return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh); -} - -/*Our internal implementation of frag_copy2 takes an extra stride parameter so - we can share code with oc_enc_frag_satd2_thresh_mmxext().*/ -static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){ - __asm__ __volatile__( - /*Load the first 3 rows.*/ - "movq (%[src1]),%%mm0\n\t" - "movq (%[src2]),%%mm1\n\t" - "movq (%[src1],%[src_ystride]),%%mm2\n\t" - "lea (%[src1],%[src_ystride],2),%[src1]\n\t" - "movq (%[src2],%[src_ystride]),%%mm3\n\t" - "lea (%[src2],%[src_ystride],2),%[src2]\n\t" - "pxor %%mm7,%%mm7\n\t" - "movq (%[src1]),%%mm4\n\t" - "pcmpeqb %%mm6,%%mm6\n\t" - "movq (%[src2]),%%mm5\n\t" - /*mm7={1}x8.*/ - "psubb %%mm6,%%mm7\n\t" - /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ - "movq %%mm0,%%mm6\n\t" - "pxor %%mm1,%%mm0\n\t" - "pavgb %%mm1,%%mm6\n\t" - /*%%mm1 is free, start averaging %%mm3 into %%mm2 using %%mm1.*/ - "movq %%mm2,%%mm1\n\t" - "pand %%mm7,%%mm0\n\t" - "pavgb %%mm3,%%mm2\n\t" - "pxor %%mm3,%%mm1\n\t" - /*%%mm3 is free.*/ - "psubb %%mm0,%%mm6\n\t" - /*%%mm0 is free, start loading the next row.*/ - "movq (%[src1],%[src_ystride]),%%mm0\n\t" - /*Start averaging %%mm5 and %%mm4 using %%mm3.*/ - "movq %%mm4,%%mm3\n\t" - /*%%mm6 (row 0) is done; write it out.*/ - "movq %%mm6,(%[dst])\n\t" - "pand %%mm7,%%mm1\n\t" - "pavgb %%mm5,%%mm4\n\t" - "psubb %%mm1,%%mm2\n\t" - /*%%mm1 is free, continue loading the next row.*/ - "movq (%[src2],%[src_ystride]),%%mm1\n\t" - "pxor %%mm5,%%mm3\n\t" - "lea (%[src1],%[src_ystride],2),%[src1]\n\t" - /*%%mm2 (row 1) is done; write it out.*/ - "movq %%mm2,(%[dst],%[dst_ystride])\n\t" - "pand %%mm7,%%mm3\n\t" - /*Start loading the next row.*/ - "movq (%[src1]),%%mm2\n\t" - "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" - "psubb %%mm3,%%mm4\n\t" - "lea (%[src2],%[src_ystride],2),%[src2]\n\t" - /*%%mm4 (row 2) is done; write it out.*/ - "movq %%mm4,(%[dst])\n\t" - /*Continue loading the next row.*/ - "movq (%[src2]),%%mm3\n\t" - /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ - "movq %%mm0,%%mm6\n\t" - "pxor %%mm1,%%mm0\n\t" - /*Start loading the next row.*/ - "movq (%[src1],%[src_ystride]),%%mm4\n\t" - "pavgb %%mm1,%%mm6\n\t" - /*%%mm1 is free; start averaging %%mm3 into %%mm2 using %%mm1.*/ - "movq %%mm2,%%mm1\n\t" - "pand %%mm7,%%mm0\n\t" - /*Continue loading the next row.*/ - "movq (%[src2],%[src_ystride]),%%mm5\n\t" - "pavgb %%mm3,%%mm2\n\t" - "lea (%[src1],%[src_ystride],2),%[src1]\n\t" - "pxor %%mm3,%%mm1\n\t" - /*%%mm3 is free.*/ - "psubb %%mm0,%%mm6\n\t" - /*%%mm0 is free, start loading the next row.*/ - "movq (%[src1]),%%mm0\n\t" - /*Start averaging %%mm5 into %%mm4 using %%mm3.*/ - "movq %%mm4,%%mm3\n\t" - /*%%mm6 (row 3) is done; write it out.*/ - "movq %%mm6,(%[dst],%[dst_ystride])\n\t" - "pand %%mm7,%%mm1\n\t" - "lea (%[src2],%[src_ystride],2),%[src2]\n\t" - "pavgb %%mm5,%%mm4\n\t" - "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" - "psubb %%mm1,%%mm2\n\t" - /*%%mm1 is free; continue loading the next row.*/ - "movq (%[src2]),%%mm1\n\t" - "pxor %%mm5,%%mm3\n\t" - /*%%mm2 (row 4) is done; write it out.*/ - "movq %%mm2,(%[dst])\n\t" - "pand %%mm7,%%mm3\n\t" - /*Start loading the next row.*/ - "movq (%[src1],%[src_ystride]),%%mm2\n\t" - "psubb %%mm3,%%mm4\n\t" - /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ - "movq %%mm0,%%mm6\n\t" - /*Continue loading the next row.*/ - "movq (%[src2],%[src_ystride]),%%mm3\n\t" - /*%%mm4 (row 5) is done; write it out.*/ - "movq %%mm4,(%[dst],%[dst_ystride])\n\t" - "pxor %%mm1,%%mm0\n\t" - "pavgb %%mm1,%%mm6\n\t" - /*%%mm4 is free; start averaging %%mm3 into %%mm2 using %%mm4.*/ - "movq %%mm2,%%mm4\n\t" - "pand %%mm7,%%mm0\n\t" - "pavgb %%mm3,%%mm2\n\t" - "pxor %%mm3,%%mm4\n\t" - "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" - "psubb %%mm0,%%mm6\n\t" - "pand %%mm7,%%mm4\n\t" - /*%%mm6 (row 6) is done, write it out.*/ - "movq %%mm6,(%[dst])\n\t" - "psubb %%mm4,%%mm2\n\t" - /*%%mm2 (row 7) is done, write it out.*/ - "movq %%mm2,(%[dst],%[dst_ystride])\n\t" - :[dst]"+r"(_dst),[src1]"+%r"(_src1),[src2]"+r"(_src2) - :[dst_ystride]"r"((ptrdiff_t)_dst_ystride), - [src_ystride]"r"((ptrdiff_t)_src_ystride) - :"memory" - ); -} - -unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh){ - OC_ALIGN8(unsigned char ref[64]); - oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride); - return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh); -} - -unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src, - int _ystride){ - OC_ALIGN8(ogg_int16_t buf[64]); - ogg_int16_t *bufp; - unsigned ret; - unsigned ret2; - bufp=buf; - __asm__ __volatile__( - OC_LOAD_8x4("0x00") - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2("0x00") - /*Finish swapping out this 8x4 block to make room for the next one. - mm0...mm3 have been swapped out already.*/ - "movq %%mm4,0x00(%[buf])\n\t" - "movq %%mm5,0x10(%[buf])\n\t" - "movq %%mm6,0x20(%[buf])\n\t" - "movq %%mm7,0x30(%[buf])\n\t" - OC_LOAD_8x4("0x04") - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2("0x08") - /*Here the first 4x4 block of output from the last transpose is the second - 4x4 block of input for the next transform. - We have cleverly arranged that it already be in the appropriate place, so - we only have to do half the loads.*/ - "movq 0x10(%[buf]),%%mm1\n\t" - "movq 0x20(%[buf]),%%mm2\n\t" - "movq 0x30(%[buf]),%%mm3\n\t" - "movq 0x00(%[buf]),%%mm0\n\t" - /*We split out the stages here so we can save the DC coefficient in the - middle.*/ - OC_HADAMARD_AB_8x4 - OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38") - "movd %%mm1,%[ret]\n\t" - OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38") - /*Up to this point, everything fit in 16 bits (8 input + 1 for the - difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 - for the factor of two we dropped + 3 for the vertical accumulation). - Now we finally have to promote things to dwords. - We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long - latency of pmaddwd by starting the next series of loads now.*/ - "pmaddwd %%mm7,%%mm0\n\t" - "movq 0x50(%[buf]),%%mm1\n\t" - "movq 0x58(%[buf]),%%mm5\n\t" - "movq 0x60(%[buf]),%%mm2\n\t" - "movq %%mm0,%%mm4\n\t" - "movq 0x68(%[buf]),%%mm6\n\t" - "punpckhdq %%mm0,%%mm0\n\t" - "movq 0x70(%[buf]),%%mm3\n\t" - "paddd %%mm0,%%mm4\n\t" - "movq 0x78(%[buf]),%%mm7\n\t" - "movd %%mm4,%[ret2]\n\t" - "movq 0x40(%[buf]),%%mm0\n\t" - "movq 0x48(%[buf]),%%mm4\n\t" - OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78") - "pmaddwd %%mm7,%%mm0\n\t" - /*We assume that the DC coefficient is always positive (which is true, - because the input to the INTRA transform was not a difference).*/ - "movzx %w[ret],%[ret]\n\t" - "add %[ret2],%[ret2]\n\t" - "sub %[ret],%[ret2]\n\t" - "movq %%mm0,%%mm4\n\t" - "punpckhdq %%mm0,%%mm0\n\t" - "paddd %%mm0,%%mm4\n\t" - "movd %%mm4,%[ret]\n\t" - "lea -64(%[ret2],%[ret],2),%[ret]\n\t" - /*Although it looks like we're using 7 registers here, gcc can alias %[ret] - and %[ret2] with some of the inputs, since for once we don't write to - them until after we're done using everything but %[buf] (which is also - listed as an output to ensure gcc _doesn't_ alias them against it).*/ - :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp) - :[src]"r"(_src),[src4]"r"(_src+4*_ystride), - [ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride) - /*We have to use sub, so we actually clobber the condition codes for once - (not to mention add).*/ - :"cc" - ); - return ret; -} - -void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64], - const unsigned char *_src,const unsigned char *_ref,int _ystride){ - int i; - __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); - for(i=4;i-->0;){ - __asm__ __volatile__( - /*mm0=[src]*/ - "movq (%[src]),%%mm0\n\t" - /*mm1=[ref]*/ - "movq (%[ref]),%%mm1\n\t" - /*mm4=[src+ystride]*/ - "movq (%[src],%[ystride]),%%mm4\n\t" - /*mm5=[ref+ystride]*/ - "movq (%[ref],%[ystride]),%%mm5\n\t" - /*Compute [src]-[ref].*/ - "movq %%mm0,%%mm2\n\t" - "punpcklbw %%mm7,%%mm0\n\t" - "movq %%mm1,%%mm3\n\t" - "punpckhbw %%mm7,%%mm2\n\t" - "punpcklbw %%mm7,%%mm1\n\t" - "punpckhbw %%mm7,%%mm3\n\t" - "psubw %%mm1,%%mm0\n\t" - "psubw %%mm3,%%mm2\n\t" - /*Compute [src+ystride]-[ref+ystride].*/ - "movq %%mm4,%%mm1\n\t" - "punpcklbw %%mm7,%%mm4\n\t" - "movq %%mm5,%%mm3\n\t" - "punpckhbw %%mm7,%%mm1\n\t" - "lea (%[src],%[ystride],2),%[src]\n\t" - "punpcklbw %%mm7,%%mm5\n\t" - "lea (%[ref],%[ystride],2),%[ref]\n\t" - "punpckhbw %%mm7,%%mm3\n\t" - "psubw %%mm5,%%mm4\n\t" - "psubw %%mm3,%%mm1\n\t" - /*Write the answer out.*/ - "movq %%mm0,0x00(%[residue])\n\t" - "movq %%mm2,0x08(%[residue])\n\t" - "movq %%mm4,0x10(%[residue])\n\t" - "movq %%mm1,0x18(%[residue])\n\t" - "lea 0x20(%[residue]),%[residue]\n\t" - :[residue]"+r"(_residue),[src]"+r"(_src),[ref]"+r"(_ref) - :[ystride]"r"((ptrdiff_t)_ystride) - :"memory" - ); - } -} - -void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64], - const unsigned char *_src,int _ystride){ - ptrdiff_t ystride3; - __asm__ __volatile__( - /*mm0=[src]*/ - "movq (%[src]),%%mm0\n\t" - /*mm1=[src+ystride]*/ - "movq (%[src],%[ystride]),%%mm1\n\t" - /*mm6={-1}x4*/ - "pcmpeqw %%mm6,%%mm6\n\t" - /*mm2=[src+2*ystride]*/ - "movq (%[src],%[ystride],2),%%mm2\n\t" - /*[ystride3]=3*[ystride]*/ - "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" - /*mm6={1}x4*/ - "psllw $15,%%mm6\n\t" - /*mm3=[src+3*ystride]*/ - "movq (%[src],%[ystride3]),%%mm3\n\t" - /*mm6={128}x4*/ - "psrlw $8,%%mm6\n\t" - /*mm7=0*/ - "pxor %%mm7,%%mm7\n\t" - /*[src]=[src]+4*[ystride]*/ - "lea (%[src],%[ystride],4),%[src]\n\t" - /*Compute [src]-128 and [src+ystride]-128*/ - "movq %%mm0,%%mm4\n\t" - "punpcklbw %%mm7,%%mm0\n\t" - "movq %%mm1,%%mm5\n\t" - "punpckhbw %%mm7,%%mm4\n\t" - "psubw %%mm6,%%mm0\n\t" - "punpcklbw %%mm7,%%mm1\n\t" - "psubw %%mm6,%%mm4\n\t" - "punpckhbw %%mm7,%%mm5\n\t" - "psubw %%mm6,%%mm1\n\t" - "psubw %%mm6,%%mm5\n\t" - /*Write the answer out.*/ - "movq %%mm0,0x00(%[residue])\n\t" - "movq %%mm4,0x08(%[residue])\n\t" - "movq %%mm1,0x10(%[residue])\n\t" - "movq %%mm5,0x18(%[residue])\n\t" - /*mm0=[src+4*ystride]*/ - "movq (%[src]),%%mm0\n\t" - /*mm1=[src+5*ystride]*/ - "movq (%[src],%[ystride]),%%mm1\n\t" - /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/ - "movq %%mm2,%%mm4\n\t" - "punpcklbw %%mm7,%%mm2\n\t" - "movq %%mm3,%%mm5\n\t" - "punpckhbw %%mm7,%%mm4\n\t" - "psubw %%mm6,%%mm2\n\t" - "punpcklbw %%mm7,%%mm3\n\t" - "psubw %%mm6,%%mm4\n\t" - "punpckhbw %%mm7,%%mm5\n\t" - "psubw %%mm6,%%mm3\n\t" - "psubw %%mm6,%%mm5\n\t" - /*Write the answer out.*/ - "movq %%mm2,0x20(%[residue])\n\t" - "movq %%mm4,0x28(%[residue])\n\t" - "movq %%mm3,0x30(%[residue])\n\t" - "movq %%mm5,0x38(%[residue])\n\t" - /*mm2=[src+6*ystride]*/ - "movq (%[src],%[ystride],2),%%mm2\n\t" - /*mm3=[src+7*ystride]*/ - "movq (%[src],%[ystride3]),%%mm3\n\t" - /*Compute [src+4*ystride]-128 and [src+5*ystride]-128*/ - "movq %%mm0,%%mm4\n\t" - "punpcklbw %%mm7,%%mm0\n\t" - "movq %%mm1,%%mm5\n\t" - "punpckhbw %%mm7,%%mm4\n\t" - "psubw %%mm6,%%mm0\n\t" - "punpcklbw %%mm7,%%mm1\n\t" - "psubw %%mm6,%%mm4\n\t" - "punpckhbw %%mm7,%%mm5\n\t" - "psubw %%mm6,%%mm1\n\t" - "psubw %%mm6,%%mm5\n\t" - /*Write the answer out.*/ - "movq %%mm0,0x40(%[residue])\n\t" - "movq %%mm4,0x48(%[residue])\n\t" - "movq %%mm1,0x50(%[residue])\n\t" - "movq %%mm5,0x58(%[residue])\n\t" - /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ - "movq %%mm2,%%mm4\n\t" - "punpcklbw %%mm7,%%mm2\n\t" - "movq %%mm3,%%mm5\n\t" - "punpckhbw %%mm7,%%mm4\n\t" - "psubw %%mm6,%%mm2\n\t" - "punpcklbw %%mm7,%%mm3\n\t" - "psubw %%mm6,%%mm4\n\t" - "punpckhbw %%mm7,%%mm5\n\t" - "psubw %%mm6,%%mm3\n\t" - "psubw %%mm6,%%mm5\n\t" - /*Write the answer out.*/ - "movq %%mm2,0x60(%[residue])\n\t" - "movq %%mm4,0x68(%[residue])\n\t" - "movq %%mm3,0x70(%[residue])\n\t" - "movq %%mm5,0x78(%[residue])\n\t" - :[src]"+r"(_src),[ystride3]"=&r"(ystride3) - :[residue]"r"(_residue),[ystride]"r"((ptrdiff_t)_ystride) - :"memory" - ); -} - -void oc_enc_frag_copy2_mmxext(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride){ - oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride); -} - -#endif diff --git a/drivers/theora/x86/mmxfdct.c b/drivers/theora/x86/mmxfdct.c deleted file mode 100644 index 211875255e..0000000000 --- a/drivers/theora/x86/mmxfdct.c +++ /dev/null @@ -1,665 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ********************************************************************/ -/*MMX fDCT implementation for x86_32*/ -/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/ -#include "x86enc.h" - -#if defined(OC_X86_ASM) - -# define OC_FDCT_STAGE1_8x4 \ - "#OC_FDCT_STAGE1_8x4\n\t" \ - /*Stage 1:*/ \ - /*mm0=t7'=t0-t7*/ \ - "psubw %%mm7,%%mm0\n\t" \ - "paddw %%mm7,%%mm7\n\t" \ - /*mm1=t6'=t1-t6*/ \ - "psubw %%mm6,%%mm1\n\t" \ - "paddw %%mm6,%%mm6\n\t" \ - /*mm2=t5'=t2-t5*/ \ - "psubw %%mm5,%%mm2\n\t" \ - "paddw %%mm5,%%mm5\n\t" \ - /*mm3=t4'=t3-t4*/ \ - "psubw %%mm4,%%mm3\n\t" \ - "paddw %%mm4,%%mm4\n\t" \ - /*mm7=t0'=t0+t7*/ \ - "paddw %%mm0,%%mm7\n\t" \ - /*mm6=t1'=t1+t6*/ \ - "paddw %%mm1,%%mm6\n\t" \ - /*mm5=t2'=t2+t5*/ \ - "paddw %%mm2,%%mm5\n\t" \ - /*mm4=t3'=t3+t4*/ \ - "paddw %%mm3,%%mm4\n\t" \ - -# define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \ - "#OC_FDCT8x4\n\t" \ - /*Stage 2:*/ \ - /*mm7=t3''=t0'-t3'*/ \ - "psubw %%mm4,%%mm7\n\t" \ - "paddw %%mm4,%%mm4\n\t" \ - /*mm6=t2''=t1'-t2'*/ \ - "psubw %%mm5,%%mm6\n\t" \ - "movq %%mm7,"_r6"(%[y])\n\t" \ - "paddw %%mm5,%%mm5\n\t" \ - /*mm1=t5''=t6'-t5'*/ \ - "psubw %%mm2,%%mm1\n\t" \ - "movq %%mm6,"_r2"(%[y])\n\t" \ - /*mm4=t0''=t0'+t3'*/ \ - "paddw %%mm7,%%mm4\n\t" \ - "paddw %%mm2,%%mm2\n\t" \ - /*mm5=t1''=t1'+t2'*/ \ - "movq %%mm4,"_r0"(%[y])\n\t" \ - "paddw %%mm6,%%mm5\n\t" \ - /*mm2=t6''=t6'+t5'*/ \ - "paddw %%mm1,%%mm2\n\t" \ - "movq %%mm5,"_r4"(%[y])\n\t" \ - /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \ - /*mm4, mm5, mm6, mm7 are free.*/ \ - /*Stage 3:*/ \ - /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \ - "mov $0x5A806A0A,%[a]\n\t" \ - "pcmpeqb %%mm6,%%mm6\n\t" \ - "movd %[a],%%mm7\n\t" \ - "psrlw $15,%%mm6\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "paddw %%mm6,%%mm6\n\t" \ - /*mm0=0, m2={-1}x4 \ - mm5:mm4=t5''*27146+0xB500*/ \ - "movq %%mm1,%%mm4\n\t" \ - "movq %%mm1,%%mm5\n\t" \ - "punpcklwd %%mm6,%%mm4\n\t" \ - "movq %%mm2,"_r3"(%[y])\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "movq %%mm0,"_r7"(%[y])\n\t" \ - "punpckhwd %%mm6,%%mm5\n\t" \ - "pxor %%mm0,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "pcmpeqb %%mm2,%%mm2\n\t" \ - /*mm2=t6'', mm1=t5''+(t5''!=0) \ - mm4=(t5''*27146+0xB500>>16)*/ \ - "pcmpeqw %%mm1,%%mm0\n\t" \ - "psrad $16,%%mm4\n\t" \ - "psubw %%mm2,%%mm0\n\t" \ - "movq "_r3"(%[y]),%%mm2\n\t" \ - "psrad $16,%%mm5\n\t" \ - "paddw %%mm0,%%mm1\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ - "paddw %%mm1,%%mm4\n\t" \ - "movq "_r7"(%[y]),%%mm0\n\t" \ - "psraw $1,%%mm4\n\t" \ - "movq %%mm3,%%mm1\n\t" \ - /*mm3=t4''=t4'+s*/ \ - "paddw %%mm4,%%mm3\n\t" \ - /*mm1=t5'''=t4'-s*/ \ - "psubw %%mm4,%%mm1\n\t" \ - /*mm1=0, mm3={-1}x4 \ - mm5:mm4=t6''*27146+0xB500*/ \ - "movq %%mm2,%%mm4\n\t" \ - "movq %%mm2,%%mm5\n\t" \ - "punpcklwd %%mm6,%%mm4\n\t" \ - "movq %%mm1,"_r5"(%[y])\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "movq %%mm3,"_r1"(%[y])\n\t" \ - "punpckhwd %%mm6,%%mm5\n\t" \ - "pxor %%mm1,%%mm1\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "pcmpeqb %%mm3,%%mm3\n\t" \ - /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \ - "psrad $16,%%mm4\n\t" \ - "pcmpeqw %%mm2,%%mm1\n\t" \ - "psrad $16,%%mm5\n\t" \ - "psubw %%mm3,%%mm1\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "paddw %%mm1,%%mm2\n\t" \ - /*mm1=t1'' \ - mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ - "paddw %%mm2,%%mm4\n\t" \ - "movq "_r4"(%[y]),%%mm1\n\t" \ - "psraw $1,%%mm4\n\t" \ - "movq %%mm0,%%mm2\n\t" \ - /*mm7={54491-0x7FFF,0x7FFF}x2 \ - mm0=t7''=t7'+s*/ \ - "paddw %%mm4,%%mm0\n\t" \ - /*mm2=t6'''=t7'-s*/ \ - "psubw %%mm4,%%mm2\n\t" \ - /*Stage 4:*/ \ - /*mm0=0, mm2=t0'' \ - mm5:mm4=t1''*27146+0xB500*/ \ - "movq %%mm1,%%mm4\n\t" \ - "movq %%mm1,%%mm5\n\t" \ - "punpcklwd %%mm6,%%mm4\n\t" \ - "movq %%mm2,"_r3"(%[y])\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "movq "_r0"(%[y]),%%mm2\n\t" \ - "punpckhwd %%mm6,%%mm5\n\t" \ - "movq %%mm0,"_r7"(%[y])\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "pxor %%mm0,%%mm0\n\t" \ - /*mm7={27146,0x4000>>1}x2 \ - mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ - "psrad $16,%%mm4\n\t" \ - "mov $0x20006A0A,%[a]\n\t" \ - "pcmpeqw %%mm1,%%mm0\n\t" \ - "movd %[a],%%mm7\n\t" \ - "psrad $16,%%mm5\n\t" \ - "psubw %%mm3,%%mm0\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "paddw %%mm1,%%mm0\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "paddw %%mm4,%%mm0\n\t" \ - /*mm6={0x00000E3D}x2 \ - mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \ - "movq %%mm2,%%mm4\n\t" \ - "movq %%mm2,%%mm5\n\t" \ - "punpcklwd %%mm6,%%mm4\n\t" \ - "mov $0x0E3D,%[a]\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "punpckhwd %%mm6,%%mm5\n\t" \ - "movd %[a],%%mm6\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "pxor %%mm1,%%mm1\n\t" \ - "punpckldq %%mm6,%%mm6\n\t" \ - "pcmpeqw %%mm2,%%mm1\n\t" \ - /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ - "psrad $16,%%mm4\n\t" \ - "psubw %%mm3,%%mm1\n\t" \ - "psrad $16,%%mm5\n\t" \ - "paddw %%mm1,%%mm2\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "movq "_r5"(%[y]),%%mm1\n\t" \ - "paddw %%mm2,%%mm4\n\t" \ - /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \ - The naive implementation could cause overflow, so we use \ - u=(r&s)+((r^s)>>1).*/ \ - "movq "_r3"(%[y]),%%mm2\n\t" \ - "movq %%mm0,%%mm7\n\t" \ - "pxor %%mm4,%%mm0\n\t" \ - "pand %%mm4,%%mm7\n\t" \ - "psraw $1,%%mm0\n\t" \ - "mov $0x7FFF54DC,%[a]\n\t" \ - "paddw %%mm7,%%mm0\n\t" \ - "movd %[a],%%mm7\n\t" \ - /*mm7={54491-0x7FFF,0x7FFF}x2 \ - mm4=_y[4]=v=r-u*/ \ - "psubw %%mm0,%%mm4\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "movq %%mm4,"_r4"(%[y])\n\t" \ - /*mm0=0, mm7={36410}x4 \ - mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \ - "movq %%mm1,%%mm4\n\t" \ - "movq %%mm1,%%mm5\n\t" \ - "punpcklwd %%mm1,%%mm4\n\t" \ - "mov $0x8E3A8E3A,%[a]\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "movq %%mm0,"_r0"(%[y])\n\t" \ - "punpckhwd %%mm1,%%mm5\n\t" \ - "pxor %%mm0,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "pcmpeqw %%mm0,%%mm1\n\t" \ - "movd %[a],%%mm7\n\t" \ - "psubw %%mm3,%%mm1\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "paddd %%mm6,%%mm4\n\t" \ - "paddd %%mm6,%%mm5\n\t" \ - /*mm0=0 \ - mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \ - "movq %%mm2,%%mm6\n\t" \ - "movq %%mm2,%%mm3\n\t" \ - "pmulhw %%mm7,%%mm6\n\t" \ - "paddw %%mm2,%%mm1\n\t" \ - "pmullw %%mm7,%%mm3\n\t" \ - "pxor %%mm0,%%mm0\n\t" \ - "paddw %%mm1,%%mm6\n\t" \ - "movq %%mm3,%%mm1\n\t" \ - "punpckhwd %%mm6,%%mm3\n\t" \ - "punpcklwd %%mm6,%%mm1\n\t" \ - /*mm3={-1}x4, mm6={1}x4 \ - mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ - "paddd %%mm3,%%mm5\n\t" \ - "paddd %%mm1,%%mm4\n\t" \ - "psrad $16,%%mm5\n\t" \ - "pxor %%mm6,%%mm6\n\t" \ - "psrad $16,%%mm4\n\t" \ - "pcmpeqb %%mm3,%%mm3\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "psubw %%mm3,%%mm6\n\t" \ - /*mm1=t7'', mm7={26568,0x3400}x2 \ - mm2=s=t6'''-(36410*u>>16)*/ \ - "movq %%mm4,%%mm1\n\t" \ - "mov $0x340067C8,%[a]\n\t" \ - "pmulhw %%mm7,%%mm4\n\t" \ - "movd %[a],%%mm7\n\t" \ - "movq %%mm1,"_r5"(%[y])\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "paddw %%mm1,%%mm4\n\t" \ - "movq "_r7"(%[y]),%%mm1\n\t" \ - "psubw %%mm4,%%mm2\n\t" \ - /*mm6={0x00007B1B}x2 \ - mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \ - "movq %%mm2,%%mm4\n\t" \ - "movq %%mm2,%%mm5\n\t" \ - "punpcklwd %%mm6,%%mm4\n\t" \ - "pcmpeqw %%mm2,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "mov $0x7B1B,%[a]\n\t" \ - "punpckhwd %%mm6,%%mm5\n\t" \ - "movd %[a],%%mm6\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "psubw %%mm3,%%mm0\n\t" \ - "punpckldq %%mm6,%%mm6\n\t" \ - /*mm7={64277-0x7FFF,0x7FFF}x2 \ - mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ - "psrad $17,%%mm4\n\t" \ - "paddw %%mm0,%%mm2\n\t" \ - "psrad $17,%%mm5\n\t" \ - "mov $0x7FFF7B16,%[a]\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "movd %[a],%%mm7\n\t" \ - "paddw %%mm4,%%mm2\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - /*mm0=0, mm7={12785}x4 \ - mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \ - "movq %%mm1,%%mm4\n\t" \ - "movq %%mm1,%%mm5\n\t" \ - "movq %%mm2,"_r3"(%[y])\n\t" \ - "punpcklwd %%mm1,%%mm4\n\t" \ - "movq "_r1"(%[y]),%%mm2\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "mov $0x31F131F1,%[a]\n\t" \ - "punpckhwd %%mm1,%%mm5\n\t" \ - "pxor %%mm0,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "pcmpeqw %%mm0,%%mm1\n\t" \ - "movd %[a],%%mm7\n\t" \ - "psubw %%mm3,%%mm1\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "paddd %%mm6,%%mm4\n\t" \ - "paddd %%mm6,%%mm5\n\t" \ - /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \ - "movq %%mm2,%%mm6\n\t" \ - "movq %%mm2,%%mm3\n\t" \ - "pmulhw %%mm7,%%mm6\n\t" \ - "pmullw %%mm7,%%mm3\n\t" \ - "paddw %%mm1,%%mm6\n\t" \ - "movq %%mm3,%%mm1\n\t" \ - "punpckhwd %%mm6,%%mm3\n\t" \ - "punpcklwd %%mm6,%%mm1\n\t" \ - /*mm3={-1}x4, mm6={1}x4 \ - mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ - "paddd %%mm3,%%mm5\n\t" \ - "paddd %%mm1,%%mm4\n\t" \ - "psrad $16,%%mm5\n\t" \ - "pxor %%mm6,%%mm6\n\t" \ - "psrad $16,%%mm4\n\t" \ - "pcmpeqb %%mm3,%%mm3\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "psubw %%mm3,%%mm6\n\t" \ - /*mm1=t3'', mm7={20539,0x3000}x2 \ - mm4=s=(12785*u>>16)-t4''*/ \ - "movq %%mm4,"_r1"(%[y])\n\t" \ - "pmulhw %%mm7,%%mm4\n\t" \ - "mov $0x3000503B,%[a]\n\t" \ - "movq "_r6"(%[y]),%%mm1\n\t" \ - "movd %[a],%%mm7\n\t" \ - "psubw %%mm2,%%mm4\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - /*mm6={0x00006CB7}x2 \ - mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \ - "movq %%mm4,%%mm5\n\t" \ - "movq %%mm4,%%mm2\n\t" \ - "punpcklwd %%mm6,%%mm4\n\t" \ - "pcmpeqw %%mm2,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "mov $0x6CB7,%[a]\n\t" \ - "punpckhwd %%mm6,%%mm5\n\t" \ - "movd %[a],%%mm6\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "psubw %%mm3,%%mm0\n\t" \ - "punpckldq %%mm6,%%mm6\n\t" \ - /*mm7={60547-0x7FFF,0x7FFF}x2 \ - mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ - "psrad $20,%%mm4\n\t" \ - "paddw %%mm0,%%mm2\n\t" \ - "psrad $20,%%mm5\n\t" \ - "mov $0x7FFF6C84,%[a]\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - "movd %[a],%%mm7\n\t" \ - "paddw %%mm4,%%mm2\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - /*mm0=0, mm7={25080}x4 \ - mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \ - "movq %%mm1,%%mm4\n\t" \ - "movq %%mm1,%%mm5\n\t" \ - "movq %%mm2,"_r7"(%[y])\n\t" \ - "punpcklwd %%mm1,%%mm4\n\t" \ - "movq "_r2"(%[y]),%%mm2\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "mov $0x61F861F8,%[a]\n\t" \ - "punpckhwd %%mm1,%%mm5\n\t" \ - "pxor %%mm0,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm5\n\t" \ - "movd %[a],%%mm7\n\t" \ - "pcmpeqw %%mm0,%%mm1\n\t" \ - "psubw %%mm3,%%mm1\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "paddd %%mm6,%%mm4\n\t" \ - "paddd %%mm6,%%mm5\n\t" \ - /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \ - "movq %%mm2,%%mm6\n\t" \ - "movq %%mm2,%%mm3\n\t" \ - "pmulhw %%mm7,%%mm6\n\t" \ - "pmullw %%mm7,%%mm3\n\t" \ - "paddw %%mm1,%%mm6\n\t" \ - "movq %%mm3,%%mm1\n\t" \ - "punpckhwd %%mm6,%%mm3\n\t" \ - "punpcklwd %%mm6,%%mm1\n\t" \ - /*mm1={-1}x4 \ - mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ - "paddd %%mm3,%%mm5\n\t" \ - "paddd %%mm1,%%mm4\n\t" \ - "psrad $16,%%mm5\n\t" \ - "mov $0x28005460,%[a]\n\t" \ - "psrad $16,%%mm4\n\t" \ - "pcmpeqb %%mm1,%%mm1\n\t" \ - "packssdw %%mm5,%%mm4\n\t" \ - /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \ - mm4=s=(25080*u>>16)-t2''*/ \ - "movq %%mm4,%%mm6\n\t" \ - "pmulhw %%mm7,%%mm4\n\t" \ - "pxor %%mm5,%%mm5\n\t" \ - "movd %[a],%%mm7\n\t" \ - "psubw %%mm1,%%mm5\n\t" \ - "punpckldq %%mm7,%%mm7\n\t" \ - "psubw %%mm2,%%mm4\n\t" \ - /*mm2=s+(s!=0) \ - mm4:mm3=s*21600+0x2800*/ \ - "movq %%mm4,%%mm3\n\t" \ - "movq %%mm4,%%mm2\n\t" \ - "punpckhwd %%mm5,%%mm4\n\t" \ - "pcmpeqw %%mm2,%%mm0\n\t" \ - "pmaddwd %%mm7,%%mm4\n\t" \ - "psubw %%mm1,%%mm0\n\t" \ - "punpcklwd %%mm5,%%mm3\n\t" \ - "paddw %%mm0,%%mm2\n\t" \ - "pmaddwd %%mm7,%%mm3\n\t" \ - /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \ - mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ - "movq "_r4"(%[y]),%%mm0\n\t" \ - "psrad $18,%%mm4\n\t" \ - "movq "_r5"(%[y]),%%mm5\n\t" \ - "psrad $18,%%mm3\n\t" \ - "movq "_r7"(%[y]),%%mm1\n\t" \ - "packssdw %%mm4,%%mm3\n\t" \ - "movq "_r0"(%[y]),%%mm4\n\t" \ - "paddw %%mm2,%%mm3\n\t" \ - -/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7]. - On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and - {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/ -# define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \ - "#OC_TRANSPOSE8x4\n\t" \ - /*First 4x4 transpose:*/ \ - /*mm0 = e3 e2 e1 e0 \ - mm5 = f3 f2 f1 f0 \ - mm3 = g3 g2 g1 g0 \ - mm1 = h3 h2 h1 h0*/ \ - "movq %%mm0,%%mm2\n\t" \ - "punpcklwd %%mm5,%%mm0\n\t" \ - "punpckhwd %%mm5,%%mm2\n\t" \ - "movq %%mm3,%%mm5\n\t" \ - "punpcklwd %%mm1,%%mm3\n\t" \ - "punpckhwd %%mm1,%%mm5\n\t" \ - /*mm0 = f1 e1 f0 e0 \ - mm2 = f3 e3 f2 e2 \ - mm3 = h1 g1 h0 g0 \ - mm5 = h3 g3 h2 g2*/ \ - "movq %%mm0,%%mm1\n\t" \ - "punpckldq %%mm3,%%mm0\n\t" \ - "movq %%mm0,"_r4"(%[y])\n\t" \ - "punpckhdq %%mm3,%%mm1\n\t" \ - "movq "_r1"(%[y]),%%mm0\n\t" \ - "movq %%mm2,%%mm3\n\t" \ - "punpckldq %%mm5,%%mm2\n\t" \ - "punpckhdq %%mm5,%%mm3\n\t" \ - "movq "_r3"(%[y]),%%mm5\n\t" \ - /*_y[4] = h0 g0 f0 e0 \ - mm1 = h1 g1 f1 e1 \ - mm2 = h2 g2 f2 e2 \ - mm3 = h3 g3 f3 e3*/ \ - /*Second 4x4 transpose:*/ \ - /*mm4 = a3 a2 a1 a0 \ - mm0 = b3 b2 b1 b0 \ - mm6 = c3 c2 c1 c0 \ - mm5 = d3 d2 d1 d0*/ \ - "movq %%mm4,%%mm7\n\t" \ - "punpcklwd %%mm0,%%mm4\n\t" \ - "punpckhwd %%mm0,%%mm7\n\t" \ - "movq %%mm6,%%mm0\n\t" \ - "punpcklwd %%mm5,%%mm6\n\t" \ - "punpckhwd %%mm5,%%mm0\n\t" \ - /*mm4 = b1 a1 b0 a0 \ - mm7 = b3 a3 b2 a2 \ - mm6 = d1 c1 d0 c0 \ - mm0 = d3 c3 d2 c2*/ \ - "movq %%mm4,%%mm5\n\t" \ - "punpckldq %%mm6,%%mm4\n\t" \ - "punpckhdq %%mm6,%%mm5\n\t" \ - "movq %%mm7,%%mm6\n\t" \ - "punpckhdq %%mm0,%%mm7\n\t" \ - "punpckldq %%mm0,%%mm6\n\t" \ - /*mm4 = d0 c0 b0 a0 \ - mm5 = d1 c1 b1 a1 \ - mm6 = d2 c2 b2 a2 \ - mm7 = d3 c3 b3 a3*/ \ - -/*MMX implementation of the fDCT.*/ -void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - ptrdiff_t a; - __asm__ __volatile__( - /*Add two extra bits of working precision to improve accuracy; any more and - we could overflow.*/ - /*We also add biases to correct for some systematic error that remains in - the full fDCT->iDCT round trip.*/ - "movq 0x00(%[x]),%%mm0\n\t" - "movq 0x10(%[x]),%%mm1\n\t" - "movq 0x20(%[x]),%%mm2\n\t" - "movq 0x30(%[x]),%%mm3\n\t" - "pcmpeqb %%mm4,%%mm4\n\t" - "pxor %%mm7,%%mm7\n\t" - "movq %%mm0,%%mm5\n\t" - "psllw $2,%%mm0\n\t" - "pcmpeqw %%mm7,%%mm5\n\t" - "movq 0x70(%[x]),%%mm7\n\t" - "psllw $2,%%mm1\n\t" - "psubw %%mm4,%%mm5\n\t" - "psllw $2,%%mm2\n\t" - "mov $1,%[a]\n\t" - "pslld $16,%%mm5\n\t" - "movd %[a],%%mm6\n\t" - "psllq $16,%%mm5\n\t" - "mov $0x10001,%[a]\n\t" - "psllw $2,%%mm3\n\t" - "movd %[a],%%mm4\n\t" - "punpckhwd %%mm6,%%mm5\n\t" - "psubw %%mm6,%%mm1\n\t" - "movq 0x60(%[x]),%%mm6\n\t" - "paddw %%mm5,%%mm0\n\t" - "movq 0x50(%[x]),%%mm5\n\t" - "paddw %%mm4,%%mm0\n\t" - "movq 0x40(%[x]),%%mm4\n\t" - /*We inline stage1 of the transform here so we can get better instruction - scheduling with the shifts.*/ - /*mm0=t7'=t0-t7*/ - "psllw $2,%%mm7\n\t" - "psubw %%mm7,%%mm0\n\t" - "psllw $2,%%mm6\n\t" - "paddw %%mm7,%%mm7\n\t" - /*mm1=t6'=t1-t6*/ - "psllw $2,%%mm5\n\t" - "psubw %%mm6,%%mm1\n\t" - "psllw $2,%%mm4\n\t" - "paddw %%mm6,%%mm6\n\t" - /*mm2=t5'=t2-t5*/ - "psubw %%mm5,%%mm2\n\t" - "paddw %%mm5,%%mm5\n\t" - /*mm3=t4'=t3-t4*/ - "psubw %%mm4,%%mm3\n\t" - "paddw %%mm4,%%mm4\n\t" - /*mm7=t0'=t0+t7*/ - "paddw %%mm0,%%mm7\n\t" - /*mm6=t1'=t1+t6*/ - "paddw %%mm1,%%mm6\n\t" - /*mm5=t2'=t2+t5*/ - "paddw %%mm2,%%mm5\n\t" - /*mm4=t3'=t3+t4*/ - "paddw %%mm3,%%mm4\n\t" - OC_FDCT8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70") - OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70") - /*Swap out this 8x4 block for the next one.*/ - "movq 0x08(%[x]),%%mm0\n\t" - "movq %%mm7,0x30(%[y])\n\t" - "movq 0x78(%[x]),%%mm7\n\t" - "movq %%mm1,0x50(%[y])\n\t" - "movq 0x18(%[x]),%%mm1\n\t" - "movq %%mm6,0x20(%[y])\n\t" - "movq 0x68(%[x]),%%mm6\n\t" - "movq %%mm2,0x60(%[y])\n\t" - "movq 0x28(%[x]),%%mm2\n\t" - "movq %%mm5,0x10(%[y])\n\t" - "movq 0x58(%[x]),%%mm5\n\t" - "movq %%mm3,0x70(%[y])\n\t" - "movq 0x38(%[x]),%%mm3\n\t" - /*And increase its working precision, too.*/ - "psllw $2,%%mm0\n\t" - "movq %%mm4,0x00(%[y])\n\t" - "psllw $2,%%mm7\n\t" - "movq 0x48(%[x]),%%mm4\n\t" - /*We inline stage1 of the transform here so we can get better instruction - scheduling with the shifts.*/ - /*mm0=t7'=t0-t7*/ - "psubw %%mm7,%%mm0\n\t" - "psllw $2,%%mm1\n\t" - "paddw %%mm7,%%mm7\n\t" - "psllw $2,%%mm6\n\t" - /*mm1=t6'=t1-t6*/ - "psubw %%mm6,%%mm1\n\t" - "psllw $2,%%mm2\n\t" - "paddw %%mm6,%%mm6\n\t" - "psllw $2,%%mm5\n\t" - /*mm2=t5'=t2-t5*/ - "psubw %%mm5,%%mm2\n\t" - "psllw $2,%%mm3\n\t" - "paddw %%mm5,%%mm5\n\t" - "psllw $2,%%mm4\n\t" - /*mm3=t4'=t3-t4*/ - "psubw %%mm4,%%mm3\n\t" - "paddw %%mm4,%%mm4\n\t" - /*mm7=t0'=t0+t7*/ - "paddw %%mm0,%%mm7\n\t" - /*mm6=t1'=t1+t6*/ - "paddw %%mm1,%%mm6\n\t" - /*mm5=t2'=t2+t5*/ - "paddw %%mm2,%%mm5\n\t" - /*mm4=t3'=t3+t4*/ - "paddw %%mm3,%%mm4\n\t" - OC_FDCT8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78") - OC_TRANSPOSE8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78") - /*Here the first 4x4 block of output from the last transpose is the second - 4x4 block of input for the next transform. - We have cleverly arranged that it already be in the appropriate place, - so we only have to do half the stores and loads.*/ - "movq 0x00(%[y]),%%mm0\n\t" - "movq %%mm1,0x58(%[y])\n\t" - "movq 0x10(%[y]),%%mm1\n\t" - "movq %%mm2,0x68(%[y])\n\t" - "movq 0x20(%[y]),%%mm2\n\t" - "movq %%mm3,0x78(%[y])\n\t" - "movq 0x30(%[y]),%%mm3\n\t" - OC_FDCT_STAGE1_8x4 - OC_FDCT8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38") - OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38") - /*mm0={-2}x4*/ - "pcmpeqw %%mm0,%%mm0\n\t" - "paddw %%mm0,%%mm0\n\t" - /*Round the results.*/ - "psubw %%mm0,%%mm1\n\t" - "psubw %%mm0,%%mm2\n\t" - "psraw $2,%%mm1\n\t" - "psubw %%mm0,%%mm3\n\t" - "movq %%mm1,0x18(%[y])\n\t" - "psraw $2,%%mm2\n\t" - "psubw %%mm0,%%mm4\n\t" - "movq 0x08(%[y]),%%mm1\n\t" - "psraw $2,%%mm3\n\t" - "psubw %%mm0,%%mm5\n\t" - "psraw $2,%%mm4\n\t" - "psubw %%mm0,%%mm6\n\t" - "psraw $2,%%mm5\n\t" - "psubw %%mm0,%%mm7\n\t" - "psraw $2,%%mm6\n\t" - "psubw %%mm0,%%mm1\n\t" - "psraw $2,%%mm7\n\t" - "movq 0x40(%[y]),%%mm0\n\t" - "psraw $2,%%mm1\n\t" - "movq %%mm7,0x30(%[y])\n\t" - "movq 0x78(%[y]),%%mm7\n\t" - "movq %%mm1,0x08(%[y])\n\t" - "movq 0x50(%[y]),%%mm1\n\t" - "movq %%mm6,0x20(%[y])\n\t" - "movq 0x68(%[y]),%%mm6\n\t" - "movq %%mm2,0x28(%[y])\n\t" - "movq 0x60(%[y]),%%mm2\n\t" - "movq %%mm5,0x10(%[y])\n\t" - "movq 0x58(%[y]),%%mm5\n\t" - "movq %%mm3,0x38(%[y])\n\t" - "movq 0x70(%[y]),%%mm3\n\t" - "movq %%mm4,0x00(%[y])\n\t" - "movq 0x48(%[y]),%%mm4\n\t" - OC_FDCT_STAGE1_8x4 - OC_FDCT8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78") - OC_TRANSPOSE8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78") - /*mm0={-2}x4*/ - "pcmpeqw %%mm0,%%mm0\n\t" - "paddw %%mm0,%%mm0\n\t" - /*Round the results.*/ - "psubw %%mm0,%%mm1\n\t" - "psubw %%mm0,%%mm2\n\t" - "psraw $2,%%mm1\n\t" - "psubw %%mm0,%%mm3\n\t" - "movq %%mm1,0x58(%[y])\n\t" - "psraw $2,%%mm2\n\t" - "psubw %%mm0,%%mm4\n\t" - "movq 0x48(%[y]),%%mm1\n\t" - "psraw $2,%%mm3\n\t" - "psubw %%mm0,%%mm5\n\t" - "movq %%mm2,0x68(%[y])\n\t" - "psraw $2,%%mm4\n\t" - "psubw %%mm0,%%mm6\n\t" - "movq %%mm3,0x78(%[y])\n\t" - "psraw $2,%%mm5\n\t" - "psubw %%mm0,%%mm7\n\t" - "movq %%mm4,0x40(%[y])\n\t" - "psraw $2,%%mm6\n\t" - "psubw %%mm0,%%mm1\n\t" - "movq %%mm5,0x50(%[y])\n\t" - "psraw $2,%%mm7\n\t" - "movq %%mm6,0x60(%[y])\n\t" - "psraw $2,%%mm1\n\t" - "movq %%mm7,0x70(%[y])\n\t" - "movq %%mm1,0x48(%[y])\n\t" - :[a]"=&r"(a) - :[y]"r"(_y),[x]"r"(_x) - :"memory" - ); -} - -#endif diff --git a/drivers/theora/x86/mmxfrag.c b/drivers/theora/x86/mmxfrag.c deleted file mode 100644 index 2c732939c3..0000000000 --- a/drivers/theora/x86/mmxfrag.c +++ /dev/null @@ -1,293 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxfrag.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -/*MMX acceleration of fragment reconstruction for motion compensation. - Originally written by Rudolf Marek. - Additional optimization by Nils Pipenbrinck. - Note: Loops are unrolled for best performance. - The iteration each instruction belongs to is marked in the comments as #i.*/ -#include -#include "x86int.h" -#include "mmxfrag.h" - -#if defined(OC_X86_ASM) - -/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes - between rows.*/ -void oc_frag_copy_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride){ - OC_FRAG_COPY_MMX(_dst,_src,_ystride); -} - -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, - const ogg_int16_t *_residue){ - __asm__ __volatile__( - /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ - "pcmpeqw %%mm0,%%mm0\n\t" - /*#0 Load low residue.*/ - "movq 0*8(%[residue]),%%mm1\n\t" - /*#0 Load high residue.*/ - "movq 1*8(%[residue]),%%mm2\n\t" - /*Set mm0 to 0x8000800080008000.*/ - "psllw $15,%%mm0\n\t" - /*#1 Load low residue.*/ - "movq 2*8(%[residue]),%%mm3\n\t" - /*#1 Load high residue.*/ - "movq 3*8(%[residue]),%%mm4\n\t" - /*Set mm0 to 0x0080008000800080.*/ - "psrlw $8,%%mm0\n\t" - /*#2 Load low residue.*/ - "movq 4*8(%[residue]),%%mm5\n\t" - /*#2 Load high residue.*/ - "movq 5*8(%[residue]),%%mm6\n\t" - /*#0 Bias low residue.*/ - "paddsw %%mm0,%%mm1\n\t" - /*#0 Bias high residue.*/ - "paddsw %%mm0,%%mm2\n\t" - /*#0 Pack to byte.*/ - "packuswb %%mm2,%%mm1\n\t" - /*#1 Bias low residue.*/ - "paddsw %%mm0,%%mm3\n\t" - /*#1 Bias high residue.*/ - "paddsw %%mm0,%%mm4\n\t" - /*#1 Pack to byte.*/ - "packuswb %%mm4,%%mm3\n\t" - /*#2 Bias low residue.*/ - "paddsw %%mm0,%%mm5\n\t" - /*#2 Bias high residue.*/ - "paddsw %%mm0,%%mm6\n\t" - /*#2 Pack to byte.*/ - "packuswb %%mm6,%%mm5\n\t" - /*#0 Write row.*/ - "movq %%mm1,(%[dst])\n\t" - /*#1 Write row.*/ - "movq %%mm3,(%[dst],%[ystride])\n\t" - /*#2 Write row.*/ - "movq %%mm5,(%[dst],%[ystride],2)\n\t" - /*#3 Load low residue.*/ - "movq 6*8(%[residue]),%%mm1\n\t" - /*#3 Load high residue.*/ - "movq 7*8(%[residue]),%%mm2\n\t" - /*#4 Load high residue.*/ - "movq 8*8(%[residue]),%%mm3\n\t" - /*#4 Load high residue.*/ - "movq 9*8(%[residue]),%%mm4\n\t" - /*#5 Load high residue.*/ - "movq 10*8(%[residue]),%%mm5\n\t" - /*#5 Load high residue.*/ - "movq 11*8(%[residue]),%%mm6\n\t" - /*#3 Bias low residue.*/ - "paddsw %%mm0,%%mm1\n\t" - /*#3 Bias high residue.*/ - "paddsw %%mm0,%%mm2\n\t" - /*#3 Pack to byte.*/ - "packuswb %%mm2,%%mm1\n\t" - /*#4 Bias low residue.*/ - "paddsw %%mm0,%%mm3\n\t" - /*#4 Bias high residue.*/ - "paddsw %%mm0,%%mm4\n\t" - /*#4 Pack to byte.*/ - "packuswb %%mm4,%%mm3\n\t" - /*#5 Bias low residue.*/ - "paddsw %%mm0,%%mm5\n\t" - /*#5 Bias high residue.*/ - "paddsw %%mm0,%%mm6\n\t" - /*#5 Pack to byte.*/ - "packuswb %%mm6,%%mm5\n\t" - /*#3 Write row.*/ - "movq %%mm1,(%[dst],%[ystride3])\n\t" - /*#4 Write row.*/ - "movq %%mm3,(%[dst4])\n\t" - /*#5 Write row.*/ - "movq %%mm5,(%[dst4],%[ystride])\n\t" - /*#6 Load low residue.*/ - "movq 12*8(%[residue]),%%mm1\n\t" - /*#6 Load high residue.*/ - "movq 13*8(%[residue]),%%mm2\n\t" - /*#7 Load low residue.*/ - "movq 14*8(%[residue]),%%mm3\n\t" - /*#7 Load high residue.*/ - "movq 15*8(%[residue]),%%mm4\n\t" - /*#6 Bias low residue.*/ - "paddsw %%mm0,%%mm1\n\t" - /*#6 Bias high residue.*/ - "paddsw %%mm0,%%mm2\n\t" - /*#6 Pack to byte.*/ - "packuswb %%mm2,%%mm1\n\t" - /*#7 Bias low residue.*/ - "paddsw %%mm0,%%mm3\n\t" - /*#7 Bias high residue.*/ - "paddsw %%mm0,%%mm4\n\t" - /*#7 Pack to byte.*/ - "packuswb %%mm4,%%mm3\n\t" - /*#6 Write row.*/ - "movq %%mm1,(%[dst4],%[ystride],2)\n\t" - /*#7 Write row.*/ - "movq %%mm3,(%[dst4],%[ystride3])\n\t" - : - :[residue]"r"(_residue), - [dst]"r"(_dst), - [dst4]"r"(_dst+(_ystride<<2)), - [ystride]"r"((ptrdiff_t)_ystride), - [ystride3]"r"((ptrdiff_t)_ystride*3) - :"memory" - ); -} - -void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, - int _ystride,const ogg_int16_t *_residue){ - int i; - /*Zero mm0.*/ - __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::); - for(i=4;i-->0;){ - __asm__ __volatile__( - /*#0 Load source.*/ - "movq (%[src]),%%mm3\n\t" - /*#1 Load source.*/ - "movq (%[src],%[ystride]),%%mm7\n\t" - /*#0 Get copy of src.*/ - "movq %%mm3,%%mm4\n\t" - /*#0 Expand high source.*/ - "punpckhbw %%mm0,%%mm4\n\t" - /*#0 Expand low source.*/ - "punpcklbw %%mm0,%%mm3\n\t" - /*#0 Add residue high.*/ - "paddsw 8(%[residue]),%%mm4\n\t" - /*#1 Get copy of src.*/ - "movq %%mm7,%%mm2\n\t" - /*#0 Add residue low.*/ - "paddsw (%[residue]), %%mm3\n\t" - /*#1 Expand high source.*/ - "punpckhbw %%mm0,%%mm2\n\t" - /*#0 Pack final row pixels.*/ - "packuswb %%mm4,%%mm3\n\t" - /*#1 Expand low source.*/ - "punpcklbw %%mm0,%%mm7\n\t" - /*#1 Add residue low.*/ - "paddsw 16(%[residue]),%%mm7\n\t" - /*#1 Add residue high.*/ - "paddsw 24(%[residue]),%%mm2\n\t" - /*Advance residue.*/ - "lea 32(%[residue]),%[residue]\n\t" - /*#1 Pack final row pixels.*/ - "packuswb %%mm2,%%mm7\n\t" - /*Advance src.*/ - "lea (%[src],%[ystride],2),%[src]\n\t" - /*#0 Write row.*/ - "movq %%mm3,(%[dst])\n\t" - /*#1 Write row.*/ - "movq %%mm7,(%[dst],%[ystride])\n\t" - /*Advance dst.*/ - "lea (%[dst],%[ystride],2),%[dst]\n\t" - :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src) - :[ystride]"r"((ptrdiff_t)_ystride) - :"memory" - ); - } -} - -void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ - int i; - /*Zero mm7.*/ - __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); - for(i=4;i-->0;){ - __asm__ __volatile__( - /*#0 Load src1.*/ - "movq (%[src1]),%%mm0\n\t" - /*#0 Load src2.*/ - "movq (%[src2]),%%mm2\n\t" - /*#0 Copy src1.*/ - "movq %%mm0,%%mm1\n\t" - /*#0 Copy src2.*/ - "movq %%mm2,%%mm3\n\t" - /*#1 Load src1.*/ - "movq (%[src1],%[ystride]),%%mm4\n\t" - /*#0 Unpack lower src1.*/ - "punpcklbw %%mm7,%%mm0\n\t" - /*#1 Load src2.*/ - "movq (%[src2],%[ystride]),%%mm5\n\t" - /*#0 Unpack higher src1.*/ - "punpckhbw %%mm7,%%mm1\n\t" - /*#0 Unpack lower src2.*/ - "punpcklbw %%mm7,%%mm2\n\t" - /*#0 Unpack higher src2.*/ - "punpckhbw %%mm7,%%mm3\n\t" - /*Advance src1 ptr.*/ - "lea (%[src1],%[ystride],2),%[src1]\n\t" - /*Advance src2 ptr.*/ - "lea (%[src2],%[ystride],2),%[src2]\n\t" - /*#0 Lower src1+src2.*/ - "paddsw %%mm2,%%mm0\n\t" - /*#0 Higher src1+src2.*/ - "paddsw %%mm3,%%mm1\n\t" - /*#1 Copy src1.*/ - "movq %%mm4,%%mm2\n\t" - /*#0 Build lo average.*/ - "psraw $1,%%mm0\n\t" - /*#1 Copy src2.*/ - "movq %%mm5,%%mm3\n\t" - /*#1 Unpack lower src1.*/ - "punpcklbw %%mm7,%%mm4\n\t" - /*#0 Build hi average.*/ - "psraw $1,%%mm1\n\t" - /*#1 Unpack higher src1.*/ - "punpckhbw %%mm7,%%mm2\n\t" - /*#0 low+=residue.*/ - "paddsw (%[residue]),%%mm0\n\t" - /*#1 Unpack lower src2.*/ - "punpcklbw %%mm7,%%mm5\n\t" - /*#0 high+=residue.*/ - "paddsw 8(%[residue]),%%mm1\n\t" - /*#1 Unpack higher src2.*/ - "punpckhbw %%mm7,%%mm3\n\t" - /*#1 Lower src1+src2.*/ - "paddsw %%mm4,%%mm5\n\t" - /*#0 Pack and saturate.*/ - "packuswb %%mm1,%%mm0\n\t" - /*#1 Higher src1+src2.*/ - "paddsw %%mm2,%%mm3\n\t" - /*#0 Write row.*/ - "movq %%mm0,(%[dst])\n\t" - /*#1 Build lo average.*/ - "psraw $1,%%mm5\n\t" - /*#1 Build hi average.*/ - "psraw $1,%%mm3\n\t" - /*#1 low+=residue.*/ - "paddsw 16(%[residue]),%%mm5\n\t" - /*#1 high+=residue.*/ - "paddsw 24(%[residue]),%%mm3\n\t" - /*#1 Pack and saturate.*/ - "packuswb %%mm3,%%mm5\n\t" - /*#1 Write row ptr.*/ - "movq %%mm5,(%[dst],%[ystride])\n\t" - /*Advance residue ptr.*/ - "add $32,%[residue]\n\t" - /*Advance dest ptr.*/ - "lea (%[dst],%[ystride],2),%[dst]\n\t" - :[dst]"+r"(_dst),[residue]"+r"(_residue), - [src1]"+%r"(_src1),[src2]"+r"(_src2) - :[ystride]"r"((ptrdiff_t)_ystride) - :"memory" - ); - } -} - -void oc_restore_fpu_mmx(void){ - __asm__ __volatile__("emms\n\t"); -} -#endif diff --git a/drivers/theora/x86/mmxfrag.h b/drivers/theora/x86/mmxfrag.h deleted file mode 100644 index a398427629..0000000000 --- a/drivers/theora/x86/mmxfrag.h +++ /dev/null @@ -1,64 +0,0 @@ -#if !defined(_x86_mmxfrag_H) -# define _x86_mmxfrag_H (1) -# include -# include "x86int.h" - -#if defined(OC_X86_ASM) - -/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes - between rows.*/ -#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ - do{ \ - const unsigned char *src; \ - unsigned char *dst; \ - ptrdiff_t ystride3; \ - src=(_src); \ - dst=(_dst); \ - __asm__ __volatile__( \ - /*src+0*ystride*/ \ - "movq (%[src]),%%mm0\n\t" \ - /*src+1*ystride*/ \ - "movq (%[src],%[ystride]),%%mm1\n\t" \ - /*ystride3=ystride*3*/ \ - "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ - /*src+2*ystride*/ \ - "movq (%[src],%[ystride],2),%%mm2\n\t" \ - /*src+3*ystride*/ \ - "movq (%[src],%[ystride3]),%%mm3\n\t" \ - /*dst+0*ystride*/ \ - "movq %%mm0,(%[dst])\n\t" \ - /*dst+1*ystride*/ \ - "movq %%mm1,(%[dst],%[ystride])\n\t" \ - /*Pointer to next 4.*/ \ - "lea (%[src],%[ystride],4),%[src]\n\t" \ - /*dst+2*ystride*/ \ - "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ - /*dst+3*ystride*/ \ - "movq %%mm3,(%[dst],%[ystride3])\n\t" \ - /*Pointer to next 4.*/ \ - "lea (%[dst],%[ystride],4),%[dst]\n\t" \ - /*src+0*ystride*/ \ - "movq (%[src]),%%mm0\n\t" \ - /*src+1*ystride*/ \ - "movq (%[src],%[ystride]),%%mm1\n\t" \ - /*src+2*ystride*/ \ - "movq (%[src],%[ystride],2),%%mm2\n\t" \ - /*src+3*ystride*/ \ - "movq (%[src],%[ystride3]),%%mm3\n\t" \ - /*dst+0*ystride*/ \ - "movq %%mm0,(%[dst])\n\t" \ - /*dst+1*ystride*/ \ - "movq %%mm1,(%[dst],%[ystride])\n\t" \ - /*dst+2*ystride*/ \ - "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ - /*dst+3*ystride*/ \ - "movq %%mm3,(%[dst],%[ystride3])\n\t" \ - :[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \ - :[ystride]"r"((ptrdiff_t)(_ystride)) \ - :"memory" \ - ); \ - } \ - while(0) - -# endif -#endif diff --git a/drivers/theora/x86/mmxidct.c b/drivers/theora/x86/mmxidct.c deleted file mode 100644 index 76424e6364..0000000000 --- a/drivers/theora/x86/mmxidct.c +++ /dev/null @@ -1,564 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -/*MMX acceleration of Theora's iDCT. - Originally written by Rudolf Marek, based on code from On2's VP3.*/ -#include "x86int.h" -#include "../dct.h" - -#if defined(OC_X86_ASM) - -/*These are offsets into the table of constants below.*/ -/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ -#define OC_COSINE_OFFSET (0) -/*A row of 8's.*/ -#define OC_EIGHT_OFFSET (56) - - - -/*A table of constants used by the MMX routines.*/ -static const ogg_uint16_t __attribute__((aligned(8),used)) - OC_IDCT_CONSTS[(7+1)*4]={ - (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, - (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, - (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, - (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, - (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, - (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, - (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, - (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, - (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, - (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, - (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, - (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, - (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, - (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, - 8, 8, 8, 8 -}; - -/*Converts the expression in the argument to a string.*/ -#define OC_M2STR(_s) #_s - -/*38 cycles*/ -#define OC_IDCT_BEGIN \ - "#OC_IDCT_BEGIN\n\t" \ - "movq "OC_I(3)",%%mm2\n\t" \ - "movq "OC_C(3)",%%mm6\n\t" \ - "movq %%mm2,%%mm4\n\t" \ - "movq "OC_J(5)",%%mm7\n\t" \ - "pmulhw %%mm6,%%mm4\n\t" \ - "movq "OC_C(5)",%%mm1\n\t" \ - "pmulhw %%mm7,%%mm6\n\t" \ - "movq %%mm1,%%mm5\n\t" \ - "pmulhw %%mm2,%%mm1\n\t" \ - "movq "OC_I(1)",%%mm3\n\t" \ - "pmulhw %%mm7,%%mm5\n\t" \ - "movq "OC_C(1)",%%mm0\n\t" \ - "paddw %%mm2,%%mm4\n\t" \ - "paddw %%mm7,%%mm6\n\t" \ - "paddw %%mm1,%%mm2\n\t" \ - "movq "OC_J(7)",%%mm1\n\t" \ - "paddw %%mm5,%%mm7\n\t" \ - "movq %%mm0,%%mm5\n\t" \ - "pmulhw %%mm3,%%mm0\n\t" \ - "paddw %%mm7,%%mm4\n\t" \ - "pmulhw %%mm1,%%mm5\n\t" \ - "movq "OC_C(7)",%%mm7\n\t" \ - "psubw %%mm2,%%mm6\n\t" \ - "paddw %%mm3,%%mm0\n\t" \ - "pmulhw %%mm7,%%mm3\n\t" \ - "movq "OC_I(2)",%%mm2\n\t" \ - "pmulhw %%mm1,%%mm7\n\t" \ - "paddw %%mm1,%%mm5\n\t" \ - "movq %%mm2,%%mm1\n\t" \ - "pmulhw "OC_C(2)",%%mm2\n\t" \ - "psubw %%mm5,%%mm3\n\t" \ - "movq "OC_J(6)",%%mm5\n\t" \ - "paddw %%mm7,%%mm0\n\t" \ - "movq %%mm5,%%mm7\n\t" \ - "psubw %%mm4,%%mm0\n\t" \ - "pmulhw "OC_C(2)",%%mm5\n\t" \ - "paddw %%mm1,%%mm2\n\t" \ - "pmulhw "OC_C(6)",%%mm1\n\t" \ - "paddw %%mm4,%%mm4\n\t" \ - "paddw %%mm0,%%mm4\n\t" \ - "psubw %%mm6,%%mm3\n\t" \ - "paddw %%mm7,%%mm5\n\t" \ - "paddw %%mm6,%%mm6\n\t" \ - "pmulhw "OC_C(6)",%%mm7\n\t" \ - "paddw %%mm3,%%mm6\n\t" \ - "movq %%mm4,"OC_I(1)"\n\t" \ - "psubw %%mm5,%%mm1\n\t" \ - "movq "OC_C(4)",%%mm4\n\t" \ - "movq %%mm3,%%mm5\n\t" \ - "pmulhw %%mm4,%%mm3\n\t" \ - "paddw %%mm2,%%mm7\n\t" \ - "movq %%mm6,"OC_I(2)"\n\t" \ - "movq %%mm0,%%mm2\n\t" \ - "movq "OC_I(0)",%%mm6\n\t" \ - "pmulhw %%mm4,%%mm0\n\t" \ - "paddw %%mm3,%%mm5\n\t" \ - "movq "OC_J(4)",%%mm3\n\t" \ - "psubw %%mm1,%%mm5\n\t" \ - "paddw %%mm0,%%mm2\n\t" \ - "psubw %%mm3,%%mm6\n\t" \ - "movq %%mm6,%%mm0\n\t" \ - "pmulhw %%mm4,%%mm6\n\t" \ - "paddw %%mm3,%%mm3\n\t" \ - "paddw %%mm1,%%mm1\n\t" \ - "paddw %%mm0,%%mm3\n\t" \ - "paddw %%mm5,%%mm1\n\t" \ - "pmulhw %%mm3,%%mm4\n\t" \ - "paddw %%mm0,%%mm6\n\t" \ - "psubw %%mm2,%%mm6\n\t" \ - "paddw %%mm2,%%mm2\n\t" \ - "movq "OC_I(1)",%%mm0\n\t" \ - "paddw %%mm6,%%mm2\n\t" \ - "paddw %%mm3,%%mm4\n\t" \ - "psubw %%mm1,%%mm2\n\t" \ - "#end OC_IDCT_BEGIN\n\t" \ - -/*38+8=46 cycles.*/ -#define OC_ROW_IDCT \ - "#OC_ROW_IDCT\n" \ - OC_IDCT_BEGIN \ - /*r3=D'*/ \ - "movq "OC_I(2)",%%mm3\n\t" \ - /*r4=E'=E-G*/ \ - "psubw %%mm7,%%mm4\n\t" \ - /*r1=H'+H'*/ \ - "paddw %%mm1,%%mm1\n\t" \ - /*r7=G+G*/ \ - "paddw %%mm7,%%mm7\n\t" \ - /*r1=R1=A''+H'*/ \ - "paddw %%mm2,%%mm1\n\t" \ - /*r7=G'=E+G*/ \ - "paddw %%mm4,%%mm7\n\t" \ - /*r4=R4=E'-D'*/ \ - "psubw %%mm3,%%mm4\n\t" \ - "paddw %%mm3,%%mm3\n\t" \ - /*r6=R6=F'-B''*/ \ - "psubw %%mm5,%%mm6\n\t" \ - "paddw %%mm5,%%mm5\n\t" \ - /*r3=R3=E'+D'*/ \ - "paddw %%mm4,%%mm3\n\t" \ - /*r5=R5=F'+B''*/ \ - "paddw %%mm6,%%mm5\n\t" \ - /*r7=R7=G'-C'*/ \ - "psubw %%mm0,%%mm7\n\t" \ - "paddw %%mm0,%%mm0\n\t" \ - /*Save R1.*/ \ - "movq %%mm1,"OC_I(1)"\n\t" \ - /*r0=R0=G.+C.*/ \ - "paddw %%mm7,%%mm0\n\t" \ - "#end OC_ROW_IDCT\n\t" \ - -/*The following macro does two 4x4 transposes in place. - At entry, we assume: - r0 = a3 a2 a1 a0 - I(1) = b3 b2 b1 b0 - r2 = c3 c2 c1 c0 - r3 = d3 d2 d1 d0 - - r4 = e3 e2 e1 e0 - r5 = f3 f2 f1 f0 - r6 = g3 g2 g1 g0 - r7 = h3 h2 h1 h0 - - At exit, we have: - I(0) = d0 c0 b0 a0 - I(1) = d1 c1 b1 a1 - I(2) = d2 c2 b2 a2 - I(3) = d3 c3 b3 a3 - - J(4) = h0 g0 f0 e0 - J(5) = h1 g1 f1 e1 - J(6) = h2 g2 f2 e2 - J(7) = h3 g3 f3 e3 - - I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. - J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. - - Since r1 is free at entry, we calculate the Js first.*/ -/*19 cycles.*/ -#define OC_TRANSPOSE \ - "#OC_TRANSPOSE\n\t" \ - "movq %%mm4,%%mm1\n\t" \ - "punpcklwd %%mm5,%%mm4\n\t" \ - "movq %%mm0,"OC_I(0)"\n\t" \ - "punpckhwd %%mm5,%%mm1\n\t" \ - "movq %%mm6,%%mm0\n\t" \ - "punpcklwd %%mm7,%%mm6\n\t" \ - "movq %%mm4,%%mm5\n\t" \ - "punpckldq %%mm6,%%mm4\n\t" \ - "punpckhdq %%mm6,%%mm5\n\t" \ - "movq %%mm1,%%mm6\n\t" \ - "movq %%mm4,"OC_J(4)"\n\t" \ - "punpckhwd %%mm7,%%mm0\n\t" \ - "movq %%mm5,"OC_J(5)"\n\t" \ - "punpckhdq %%mm0,%%mm6\n\t" \ - "movq "OC_I(0)",%%mm4\n\t" \ - "punpckldq %%mm0,%%mm1\n\t" \ - "movq "OC_I(1)",%%mm5\n\t" \ - "movq %%mm4,%%mm0\n\t" \ - "movq %%mm6,"OC_J(7)"\n\t" \ - "punpcklwd %%mm5,%%mm0\n\t" \ - "movq %%mm1,"OC_J(6)"\n\t" \ - "punpckhwd %%mm5,%%mm4\n\t" \ - "movq %%mm2,%%mm5\n\t" \ - "punpcklwd %%mm3,%%mm2\n\t" \ - "movq %%mm0,%%mm1\n\t" \ - "punpckldq %%mm2,%%mm0\n\t" \ - "punpckhdq %%mm2,%%mm1\n\t" \ - "movq %%mm4,%%mm2\n\t" \ - "movq %%mm0,"OC_I(0)"\n\t" \ - "punpckhwd %%mm3,%%mm5\n\t" \ - "movq %%mm1,"OC_I(1)"\n\t" \ - "punpckhdq %%mm5,%%mm4\n\t" \ - "punpckldq %%mm5,%%mm2\n\t" \ - "movq %%mm4,"OC_I(3)"\n\t" \ - "movq %%mm2,"OC_I(2)"\n\t" \ - "#end OC_TRANSPOSE\n\t" \ - -/*38+19=57 cycles.*/ -#define OC_COLUMN_IDCT \ - "#OC_COLUMN_IDCT\n" \ - OC_IDCT_BEGIN \ - "paddw "OC_8",%%mm2\n\t" \ - /*r1=H'+H'*/ \ - "paddw %%mm1,%%mm1\n\t" \ - /*r1=R1=A''+H'*/ \ - "paddw %%mm2,%%mm1\n\t" \ - /*r2=NR2*/ \ - "psraw $4,%%mm2\n\t" \ - /*r4=E'=E-G*/ \ - "psubw %%mm7,%%mm4\n\t" \ - /*r1=NR1*/ \ - "psraw $4,%%mm1\n\t" \ - /*r3=D'*/ \ - "movq "OC_I(2)",%%mm3\n\t" \ - /*r7=G+G*/ \ - "paddw %%mm7,%%mm7\n\t" \ - /*Store NR2 at I(2).*/ \ - "movq %%mm2,"OC_I(2)"\n\t" \ - /*r7=G'=E+G*/ \ - "paddw %%mm4,%%mm7\n\t" \ - /*Store NR1 at I(1).*/ \ - "movq %%mm1,"OC_I(1)"\n\t" \ - /*r4=R4=E'-D'*/ \ - "psubw %%mm3,%%mm4\n\t" \ - "paddw "OC_8",%%mm4\n\t" \ - /*r3=D'+D'*/ \ - "paddw %%mm3,%%mm3\n\t" \ - /*r3=R3=E'+D'*/ \ - "paddw %%mm4,%%mm3\n\t" \ - /*r4=NR4*/ \ - "psraw $4,%%mm4\n\t" \ - /*r6=R6=F'-B''*/ \ - "psubw %%mm5,%%mm6\n\t" \ - /*r3=NR3*/ \ - "psraw $4,%%mm3\n\t" \ - "paddw "OC_8",%%mm6\n\t" \ - /*r5=B''+B''*/ \ - "paddw %%mm5,%%mm5\n\t" \ - /*r5=R5=F'+B''*/ \ - "paddw %%mm6,%%mm5\n\t" \ - /*r6=NR6*/ \ - "psraw $4,%%mm6\n\t" \ - /*Store NR4 at J(4).*/ \ - "movq %%mm4,"OC_J(4)"\n\t" \ - /*r5=NR5*/ \ - "psraw $4,%%mm5\n\t" \ - /*Store NR3 at I(3).*/ \ - "movq %%mm3,"OC_I(3)"\n\t" \ - /*r7=R7=G'-C'*/ \ - "psubw %%mm0,%%mm7\n\t" \ - "paddw "OC_8",%%mm7\n\t" \ - /*r0=C'+C'*/ \ - "paddw %%mm0,%%mm0\n\t" \ - /*r0=R0=G'+C'*/ \ - "paddw %%mm7,%%mm0\n\t" \ - /*r7=NR7*/ \ - "psraw $4,%%mm7\n\t" \ - /*Store NR6 at J(6).*/ \ - "movq %%mm6,"OC_J(6)"\n\t" \ - /*r0=NR0*/ \ - "psraw $4,%%mm0\n\t" \ - /*Store NR5 at J(5).*/ \ - "movq %%mm5,"OC_J(5)"\n\t" \ - /*Store NR7 at J(7).*/ \ - "movq %%mm7,"OC_J(7)"\n\t" \ - /*Store NR0 at I(0).*/ \ - "movq %%mm0,"OC_I(0)"\n\t" \ - "#end OC_COLUMN_IDCT\n\t" \ - -#define OC_MID(_m,_i) OC_M2STR(_m+(_i)*8)"(%[c])" -#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) -#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) - -static void oc_idct8x8_slow(ogg_int16_t _y[64]){ - /*This routine accepts an 8x8 matrix, but in partially transposed form. - Every 4x4 block is transposed.*/ - __asm__ __volatile__( -#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" -#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" - OC_ROW_IDCT - OC_TRANSPOSE -#undef OC_I -#undef OC_J -#define OC_I(_k) OC_M2STR((_k*16)+64)"(%[y])" -#define OC_J(_k) OC_M2STR(((_k-4)*16)+72)"(%[y])" - OC_ROW_IDCT - OC_TRANSPOSE -#undef OC_I -#undef OC_J -#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT -#undef OC_I -#undef OC_J -#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])" -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT -#undef OC_I -#undef OC_J - : - :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) - ); -} - -/*25 cycles.*/ -#define OC_IDCT_BEGIN_10 \ - "#OC_IDCT_BEGIN_10\n\t" \ - "movq "OC_I(3)",%%mm2\n\t" \ - "nop\n\t" \ - "movq "OC_C(3)",%%mm6\n\t" \ - "movq %%mm2,%%mm4\n\t" \ - "movq "OC_C(5)",%%mm1\n\t" \ - "pmulhw %%mm6,%%mm4\n\t" \ - "movq "OC_I(1)",%%mm3\n\t" \ - "pmulhw %%mm2,%%mm1\n\t" \ - "movq "OC_C(1)",%%mm0\n\t" \ - "paddw %%mm2,%%mm4\n\t" \ - "pxor %%mm6,%%mm6\n\t" \ - "paddw %%mm1,%%mm2\n\t" \ - "movq "OC_I(2)",%%mm5\n\t" \ - "pmulhw %%mm3,%%mm0\n\t" \ - "movq %%mm5,%%mm1\n\t" \ - "paddw %%mm3,%%mm0\n\t" \ - "pmulhw "OC_C(7)",%%mm3\n\t" \ - "psubw %%mm2,%%mm6\n\t" \ - "pmulhw "OC_C(2)",%%mm5\n\t" \ - "psubw %%mm4,%%mm0\n\t" \ - "movq "OC_I(2)",%%mm7\n\t" \ - "paddw %%mm4,%%mm4\n\t" \ - "paddw %%mm5,%%mm7\n\t" \ - "paddw %%mm0,%%mm4\n\t" \ - "pmulhw "OC_C(6)",%%mm1\n\t" \ - "psubw %%mm6,%%mm3\n\t" \ - "movq %%mm4,"OC_I(1)"\n\t" \ - "paddw %%mm6,%%mm6\n\t" \ - "movq "OC_C(4)",%%mm4\n\t" \ - "paddw %%mm3,%%mm6\n\t" \ - "movq %%mm3,%%mm5\n\t" \ - "pmulhw %%mm4,%%mm3\n\t" \ - "movq %%mm6,"OC_I(2)"\n\t" \ - "movq %%mm0,%%mm2\n\t" \ - "movq "OC_I(0)",%%mm6\n\t" \ - "pmulhw %%mm4,%%mm0\n\t" \ - "paddw %%mm3,%%mm5\n\t" \ - "paddw %%mm0,%%mm2\n\t" \ - "psubw %%mm1,%%mm5\n\t" \ - "pmulhw %%mm4,%%mm6\n\t" \ - "paddw "OC_I(0)",%%mm6\n\t" \ - "paddw %%mm1,%%mm1\n\t" \ - "movq %%mm6,%%mm4\n\t" \ - "paddw %%mm5,%%mm1\n\t" \ - "psubw %%mm2,%%mm6\n\t" \ - "paddw %%mm2,%%mm2\n\t" \ - "movq "OC_I(1)",%%mm0\n\t" \ - "paddw %%mm6,%%mm2\n\t" \ - "psubw %%mm1,%%mm2\n\t" \ - "nop\n\t" \ - "#end OC_IDCT_BEGIN_10\n\t" \ - -/*25+8=33 cycles.*/ -#define OC_ROW_IDCT_10 \ - "#OC_ROW_IDCT_10\n\t" \ - OC_IDCT_BEGIN_10 \ - /*r3=D'*/ \ - "movq "OC_I(2)",%%mm3\n\t" \ - /*r4=E'=E-G*/ \ - "psubw %%mm7,%%mm4\n\t" \ - /*r1=H'+H'*/ \ - "paddw %%mm1,%%mm1\n\t" \ - /*r7=G+G*/ \ - "paddw %%mm7,%%mm7\n\t" \ - /*r1=R1=A''+H'*/ \ - "paddw %%mm2,%%mm1\n\t" \ - /*r7=G'=E+G*/ \ - "paddw %%mm4,%%mm7\n\t" \ - /*r4=R4=E'-D'*/ \ - "psubw %%mm3,%%mm4\n\t" \ - "paddw %%mm3,%%mm3\n\t" \ - /*r6=R6=F'-B''*/ \ - "psubw %%mm5,%%mm6\n\t" \ - "paddw %%mm5,%%mm5\n\t" \ - /*r3=R3=E'+D'*/ \ - "paddw %%mm4,%%mm3\n\t" \ - /*r5=R5=F'+B''*/ \ - "paddw %%mm6,%%mm5\n\t" \ - /*r7=R7=G'-C'*/ \ - "psubw %%mm0,%%mm7\n\t" \ - "paddw %%mm0,%%mm0\n\t" \ - /*Save R1.*/ \ - "movq %%mm1,"OC_I(1)"\n\t" \ - /*r0=R0=G'+C'*/ \ - "paddw %%mm7,%%mm0\n\t" \ - "#end OC_ROW_IDCT_10\n\t" \ - -/*25+19=44 cycles'*/ -#define OC_COLUMN_IDCT_10 \ - "#OC_COLUMN_IDCT_10\n\t" \ - OC_IDCT_BEGIN_10 \ - "paddw "OC_8",%%mm2\n\t" \ - /*r1=H'+H'*/ \ - "paddw %%mm1,%%mm1\n\t" \ - /*r1=R1=A''+H'*/ \ - "paddw %%mm2,%%mm1\n\t" \ - /*r2=NR2*/ \ - "psraw $4,%%mm2\n\t" \ - /*r4=E'=E-G*/ \ - "psubw %%mm7,%%mm4\n\t" \ - /*r1=NR1*/ \ - "psraw $4,%%mm1\n\t" \ - /*r3=D'*/ \ - "movq "OC_I(2)",%%mm3\n\t" \ - /*r7=G+G*/ \ - "paddw %%mm7,%%mm7\n\t" \ - /*Store NR2 at I(2).*/ \ - "movq %%mm2,"OC_I(2)"\n\t" \ - /*r7=G'=E+G*/ \ - "paddw %%mm4,%%mm7\n\t" \ - /*Store NR1 at I(1).*/ \ - "movq %%mm1,"OC_I(1)"\n\t" \ - /*r4=R4=E'-D'*/ \ - "psubw %%mm3,%%mm4\n\t" \ - "paddw "OC_8",%%mm4\n\t" \ - /*r3=D'+D'*/ \ - "paddw %%mm3,%%mm3\n\t" \ - /*r3=R3=E'+D'*/ \ - "paddw %%mm4,%%mm3\n\t" \ - /*r4=NR4*/ \ - "psraw $4,%%mm4\n\t" \ - /*r6=R6=F'-B''*/ \ - "psubw %%mm5,%%mm6\n\t" \ - /*r3=NR3*/ \ - "psraw $4,%%mm3\n\t" \ - "paddw "OC_8",%%mm6\n\t" \ - /*r5=B''+B''*/ \ - "paddw %%mm5,%%mm5\n\t" \ - /*r5=R5=F'+B''*/ \ - "paddw %%mm6,%%mm5\n\t" \ - /*r6=NR6*/ \ - "psraw $4,%%mm6\n\t" \ - /*Store NR4 at J(4).*/ \ - "movq %%mm4,"OC_J(4)"\n\t" \ - /*r5=NR5*/ \ - "psraw $4,%%mm5\n\t" \ - /*Store NR3 at I(3).*/ \ - "movq %%mm3,"OC_I(3)"\n\t" \ - /*r7=R7=G'-C'*/ \ - "psubw %%mm0,%%mm7\n\t" \ - "paddw "OC_8",%%mm7\n\t" \ - /*r0=C'+C'*/ \ - "paddw %%mm0,%%mm0\n\t" \ - /*r0=R0=G'+C'*/ \ - "paddw %%mm7,%%mm0\n\t" \ - /*r7=NR7*/ \ - "psraw $4,%%mm7\n\t" \ - /*Store NR6 at J(6).*/ \ - "movq %%mm6,"OC_J(6)"\n\t" \ - /*r0=NR0*/ \ - "psraw $4,%%mm0\n\t" \ - /*Store NR5 at J(5).*/ \ - "movq %%mm5,"OC_J(5)"\n\t" \ - /*Store NR7 at J(7).*/ \ - "movq %%mm7,"OC_J(7)"\n\t" \ - /*Store NR0 at I(0).*/ \ - "movq %%mm0,"OC_I(0)"\n\t" \ - "#end OC_COLUMN_IDCT_10\n\t" \ - -static void oc_idct8x8_10(ogg_int16_t _y[64]){ - __asm__ __volatile__( -#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" -#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" - /*Done with dequant, descramble, and partial transpose. - Now do the iDCT itself.*/ - OC_ROW_IDCT_10 - OC_TRANSPOSE -#undef OC_I -#undef OC_J -#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT_10 -#undef OC_I -#undef OC_J -#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])" -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT_10 -#undef OC_I -#undef OC_J - : - :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) - ); -} - -/*Performs an inverse 8x8 Type-II DCT transform. - The input is assumed to be scaled by a factor of 4 relative to orthonormal - version of the transform.*/ -void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ - /*_last_zzi is subtly different from an actual count of the number of - coefficients we decoded for this block. - It contains the value of zzi BEFORE the final token in the block was - decoded. - In most cases this is an EOB token (the continuation of an EOB run from a - previous block counts), and so this is the same as the coefficient count. - However, in the case that the last token was NOT an EOB token, but filled - the block up with exactly 64 coefficients, _last_zzi will be less than 64. - Provided the last token was not a pure zero run, the minimum value it can - be is 46, and so that doesn't affect any of the cases in this routine. - However, if the last token WAS a pure zero run of length 63, then _last_zzi - will be 1 while the number of coefficients decoded is 64. - Thus, we will trigger the following special case, where the real - coefficient count would not. - Note also that a zero run of length 64 will give _last_zzi a value of 0, - but we still process the DC coefficient, which might have a non-zero value - due to DC prediction. - Although convoluted, this is arguably the correct behavior: it allows us to - use a smaller transform when the block ends with a long zero run instead - of a normal EOB token. - It could be smarter... multiple separate zero runs at the end of a block - will fool it, but an encoder that generates these really deserves what it - gets. - Needless to say we inherited this approach from VP3.*/ - /*Then perform the iDCT.*/ - if(_last_zzi<10)oc_idct8x8_10(_y); - else oc_idct8x8_slow(_y); -} - -#endif diff --git a/drivers/theora/x86/mmxloop.h b/drivers/theora/x86/mmxloop.h deleted file mode 100644 index 2e870c795d..0000000000 --- a/drivers/theora/x86/mmxloop.h +++ /dev/null @@ -1,215 +0,0 @@ -#if !defined(_x86_mmxloop_H) -# define _x86_mmxloop_H (1) -# include -# include "x86int.h" - -#if defined(OC_X86_ASM) - -/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. - On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and - mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ -#define OC_LOOP_FILTER8_MMX \ - "#OC_LOOP_FILTER8_MMX\n\t" \ - /*mm7=0*/ \ - "pxor %%mm7,%%mm7\n\t" \ - /*mm6:mm0={a0,...,a7}*/ \ - "movq %%mm0,%%mm6\n\t" \ - "punpcklbw %%mm7,%%mm0\n\t" \ - "punpckhbw %%mm7,%%mm6\n\t" \ - /*mm3:mm5={d0,...,d7}*/ \ - "movq %%mm3,%%mm5\n\t" \ - "punpcklbw %%mm7,%%mm3\n\t" \ - "punpckhbw %%mm7,%%mm5\n\t" \ - /*mm6:mm0={a0-d0,...,a7-d7}*/ \ - "psubw %%mm3,%%mm0\n\t" \ - "psubw %%mm5,%%mm6\n\t" \ - /*mm3:mm1={b0,...,b7}*/ \ - "movq %%mm1,%%mm3\n\t" \ - "punpcklbw %%mm7,%%mm1\n\t" \ - "movq %%mm2,%%mm4\n\t" \ - "punpckhbw %%mm7,%%mm3\n\t" \ - /*mm5:mm4={c0,...,c7}*/ \ - "movq %%mm2,%%mm5\n\t" \ - "punpcklbw %%mm7,%%mm4\n\t" \ - "punpckhbw %%mm7,%%mm5\n\t" \ - /*mm7={3}x4 \ - mm5:mm4={c0-b0,...,c7-b7}*/ \ - "pcmpeqw %%mm7,%%mm7\n\t" \ - "psubw %%mm1,%%mm4\n\t" \ - "psrlw $14,%%mm7\n\t" \ - "psubw %%mm3,%%mm5\n\t" \ - /*Scale by 3.*/ \ - "pmullw %%mm7,%%mm4\n\t" \ - "pmullw %%mm7,%%mm5\n\t" \ - /*mm7={4}x4 \ - mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ - "psrlw $1,%%mm7\n\t" \ - "paddw %%mm0,%%mm4\n\t" \ - "psllw $2,%%mm7\n\t" \ - "movq (%[ll]),%%mm0\n\t" \ - "paddw %%mm6,%%mm5\n\t" \ - /*R_i has the range [-127,128], so we compute -R_i instead. \ - mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ - "psubw %%mm7,%%mm4\n\t" \ - "psubw %%mm7,%%mm5\n\t" \ - "psraw $3,%%mm4\n\t" \ - "psraw $3,%%mm5\n\t" \ - "pcmpeqb %%mm7,%%mm7\n\t" \ - "packsswb %%mm5,%%mm4\n\t" \ - "pxor %%mm6,%%mm6\n\t" \ - "pxor %%mm7,%%mm4\n\t" \ - "packuswb %%mm3,%%mm1\n\t" \ - /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ - /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ - we have to split things by sign (the other option is to work in 16 bits, \ - but working in 8 bits gives much better parallelism). \ - We compute abs(R_i), but save a mask of which terms were negative in mm6. \ - Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ - Finally, we split mm4 into positive and negative pieces using the mask in \ - mm6, and add and subtract them as appropriate.*/ \ - /*mm4=abs(-R_i)*/ \ - /*mm7=255-2*L*/ \ - "pcmpgtb %%mm4,%%mm6\n\t" \ - "psubb %%mm0,%%mm7\n\t" \ - "pxor %%mm6,%%mm4\n\t" \ - "psubb %%mm0,%%mm7\n\t" \ - "psubb %%mm6,%%mm4\n\t" \ - /*mm7=255-max(2*L-abs(R_i),0)*/ \ - "paddusb %%mm4,%%mm7\n\t" \ - /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ - "paddusb %%mm7,%%mm4\n\t" \ - "psubusb %%mm7,%%mm4\n\t" \ - /*Now split mm4 by the original sign of -R_i.*/ \ - "movq %%mm4,%%mm5\n\t" \ - "pand %%mm6,%%mm4\n\t" \ - "pandn %%mm5,%%mm6\n\t" \ - /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ - /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ - "paddusb %%mm4,%%mm1\n\t" \ - "psubusb %%mm4,%%mm2\n\t" \ - "psubusb %%mm6,%%mm1\n\t" \ - "paddusb %%mm6,%%mm2\n\t" \ - -#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ - do{ \ - ptrdiff_t ystride3__; \ - __asm__ __volatile__( \ - /*mm0={a0,...,a7}*/ \ - "movq (%[pix]),%%mm0\n\t" \ - /*ystride3=_ystride*3*/ \ - "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ - /*mm3={d0,...,d7}*/ \ - "movq (%[pix],%[ystride3]),%%mm3\n\t" \ - /*mm1={b0,...,b7}*/ \ - "movq (%[pix],%[ystride]),%%mm1\n\t" \ - /*mm2={c0,...,c7}*/ \ - "movq (%[pix],%[ystride],2),%%mm2\n\t" \ - OC_LOOP_FILTER8_MMX \ - /*Write it back out.*/ \ - "movq %%mm1,(%[pix],%[ystride])\n\t" \ - "movq %%mm2,(%[pix],%[ystride],2)\n\t" \ - :[ystride3]"=&r"(ystride3__) \ - :[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \ - [ll]"r"(_ll) \ - :"memory" \ - ); \ - } \ - while(0) - -#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ - do{ \ - unsigned char *pix__; \ - ptrdiff_t ystride3__; \ - ptrdiff_t d__; \ - pix__=(_pix)-2; \ - __asm__ __volatile__( \ - /*x x x x d0 c0 b0 a0*/ \ - "movd (%[pix]),%%mm0\n\t" \ - /*x x x x d1 c1 b1 a1*/ \ - "movd (%[pix],%[ystride]),%%mm1\n\t" \ - /*ystride3=_ystride*3*/ \ - "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ - /*x x x x d2 c2 b2 a2*/ \ - "movd (%[pix],%[ystride],2),%%mm2\n\t" \ - /*x x x x d3 c3 b3 a3*/ \ - "lea (%[pix],%[ystride],4),%[d]\n\t" \ - "movd (%[pix],%[ystride3]),%%mm3\n\t" \ - /*x x x x d4 c4 b4 a4*/ \ - "movd (%[d]),%%mm4\n\t" \ - /*x x x x d5 c5 b5 a5*/ \ - "movd (%[d],%[ystride]),%%mm5\n\t" \ - /*x x x x d6 c6 b6 a6*/ \ - "movd (%[d],%[ystride],2),%%mm6\n\t" \ - /*x x x x d7 c7 b7 a7*/ \ - "movd (%[d],%[ystride3]),%%mm7\n\t" \ - /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ - "punpcklbw %%mm1,%%mm0\n\t" \ - /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ - "punpcklbw %%mm3,%%mm2\n\t" \ - /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ - "movq %%mm0,%%mm3\n\t" \ - /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ - "punpcklwd %%mm2,%%mm0\n\t" \ - /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ - "punpckhwd %%mm2,%%mm3\n\t" \ - /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ - "movq %%mm0,%%mm1\n\t" \ - /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ - "punpcklbw %%mm5,%%mm4\n\t" \ - /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ - "punpcklbw %%mm7,%%mm6\n\t" \ - /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ - "movq %%mm4,%%mm5\n\t" \ - /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ - "punpcklwd %%mm6,%%mm4\n\t" \ - /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ - "punpckhwd %%mm6,%%mm5\n\t" \ - /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ - "movq %%mm3,%%mm2\n\t" \ - /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ - "punpckldq %%mm4,%%mm0\n\t" \ - /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ - "punpckhdq %%mm4,%%mm1\n\t" \ - /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ - "punpckldq %%mm5,%%mm2\n\t" \ - /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ - "punpckhdq %%mm5,%%mm3\n\t" \ - OC_LOOP_FILTER8_MMX \ - /*mm2={b0+R_0'',...,b7+R_7''}*/ \ - "movq %%mm1,%%mm0\n\t" \ - /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ - "punpcklbw %%mm2,%%mm1\n\t" \ - /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ - "punpckhbw %%mm2,%%mm0\n\t" \ - /*[d]=c1 b1 c0 b0*/ \ - "movd %%mm1,%[d]\n\t" \ - "movw %w[d],1(%[pix])\n\t" \ - "psrlq $32,%%mm1\n\t" \ - "shr $16,%[d]\n\t" \ - "movw %w[d],1(%[pix],%[ystride])\n\t" \ - /*[d]=c3 b3 c2 b2*/ \ - "movd %%mm1,%[d]\n\t" \ - "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ - "shr $16,%[d]\n\t" \ - "movw %w[d],1(%[pix],%[ystride3])\n\t" \ - "lea (%[pix],%[ystride],4),%[pix]\n\t" \ - /*[d]=c5 b5 c4 b4*/ \ - "movd %%mm0,%[d]\n\t" \ - "movw %w[d],1(%[pix])\n\t" \ - "psrlq $32,%%mm0\n\t" \ - "shr $16,%[d]\n\t" \ - "movw %w[d],1(%[pix],%[ystride])\n\t" \ - /*[d]=c7 b7 c6 b6*/ \ - "movd %%mm0,%[d]\n\t" \ - "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ - "shr $16,%[d]\n\t" \ - "movw %w[d],1(%[pix],%[ystride3])\n\t" \ - :[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \ - :[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \ - :"memory" \ - ); \ - } \ - while(0) - -# endif -#endif diff --git a/drivers/theora/x86/mmxstate.c b/drivers/theora/x86/mmxstate.c deleted file mode 100644 index 808b0a789b..0000000000 --- a/drivers/theora/x86/mmxstate.c +++ /dev/null @@ -1,188 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxstate.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -/*MMX acceleration of complete fragment reconstruction algorithm. - Originally written by Rudolf Marek.*/ -#include -#include "x86int.h" -#include "mmxfrag.h" -#include "mmxloop.h" - -#if defined(OC_X86_ASM) - -void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ - unsigned char *dst; - ptrdiff_t frag_buf_off; - int ystride; - int mb_mode; - /*Apply the inverse transform.*/ - /*Special case only having a DC component.*/ - if(_last_zzi<2){ - /*Note that this value must be unsigned, to keep the __asm__ block from - sign-extending it when it puts it in a register.*/ - ogg_uint16_t p; - /*We round this dequant product (and not any of the others) because there's - no iDCT rounding.*/ - p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); - /*Fill _dct_coeffs with p.*/ - __asm__ __volatile__( - /*mm0=0000 0000 0000 AAAA*/ - "movd %[p],%%mm0\n\t" - /*mm0=0000 0000 AAAA AAAA*/ - "punpcklwd %%mm0,%%mm0\n\t" - /*mm0=AAAA AAAA AAAA AAAA*/ - "punpckldq %%mm0,%%mm0\n\t" - "movq %%mm0,(%[y])\n\t" - "movq %%mm0,8(%[y])\n\t" - "movq %%mm0,16(%[y])\n\t" - "movq %%mm0,24(%[y])\n\t" - "movq %%mm0,32(%[y])\n\t" - "movq %%mm0,40(%[y])\n\t" - "movq %%mm0,48(%[y])\n\t" - "movq %%mm0,56(%[y])\n\t" - "movq %%mm0,64(%[y])\n\t" - "movq %%mm0,72(%[y])\n\t" - "movq %%mm0,80(%[y])\n\t" - "movq %%mm0,88(%[y])\n\t" - "movq %%mm0,96(%[y])\n\t" - "movq %%mm0,104(%[y])\n\t" - "movq %%mm0,112(%[y])\n\t" - "movq %%mm0,120(%[y])\n\t" - : - :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p) - :"memory" - ); - } - else{ - /*Dequantize the DC coefficient.*/ - _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); - oc_idct8x8_mmx(_dct_coeffs,_last_zzi); - } - /*Fill in the target buffer.*/ - frag_buf_off=_state->frag_buf_offs[_fragi]; - mb_mode=_state->frags[_fragi].mb_mode; - ystride=_state->ref_ystride[_pli]; - dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; - if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); - else{ - const unsigned char *ref; - int mvoffsets[2]; - ref= - _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] - +frag_buf_off; - if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, - _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ - oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, - _dct_coeffs); - } - else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); - } -} - -/*We copy these entire function to inline the actual MMX routines so that we - use only a single indirect call.*/ - -/*Copies the fragments specified by the lists of fragment indices from one - frame to another. - _fragis: A pointer to a list of fragment indices. - _nfragis: The number of fragment indices to copy. - _dst_frame: The reference frame to copy to. - _src_frame: The reference frame to copy from. - _pli: The color plane the fragments lie in.*/ -void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli){ - const ptrdiff_t *frag_buf_offs; - const unsigned char *src_frame_data; - unsigned char *dst_frame_data; - ptrdiff_t fragii; - int ystride; - dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; - src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; - ystride=_state->ref_ystride[_pli]; - frag_buf_offs=_state->frag_buf_offs; - for(fragii=0;fragii<_nfragis;fragii++){ - ptrdiff_t frag_buf_off; - frag_buf_off=frag_buf_offs[_fragis[fragii]]; - OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, - src_frame_data+frag_buf_off,ystride); - } -} - -/*Apply the loop filter to a given set of fragment rows in the given plane. - The filter may be run on the bottom edge, affecting pixels in the next row of - fragments, so this row also needs to be available. - _bv: The bounding values array. - _refi: The index of the frame buffer to filter. - _pli: The color plane to filter. - _fragy0: The Y coordinate of the first fragment row to filter. - _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ -void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ - OC_ALIGN8(unsigned char ll[8]); - const oc_fragment_plane *fplane; - const oc_fragment *frags; - const ptrdiff_t *frag_buf_offs; - unsigned char *ref_frame_data; - ptrdiff_t fragi_top; - ptrdiff_t fragi_bot; - ptrdiff_t fragi0; - ptrdiff_t fragi0_end; - int ystride; - int nhfrags; - memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); - fplane=_state->fplanes+_pli; - nhfrags=fplane->nhfrags; - fragi_top=fplane->froffset; - fragi_bot=fragi_top+fplane->nfrags; - fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; - fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; - ystride=_state->ref_ystride[_pli]; - frags=_state->frags; - frag_buf_offs=_state->frag_buf_offs; - ref_frame_data=_state->ref_frame_data[_refi]; - /*The following loops are constructed somewhat non-intuitively on purpose. - The main idea is: if a block boundary has at least one coded fragment on - it, the filter is applied to it. - However, the order that the filters are applied in matters, and VP3 chose - the somewhat strange ordering used below.*/ - while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); - if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); - if(fragi+1 -#include "x86enc.h" - -#if defined(OC_X86_64_ASM) - -# define OC_FDCT8x8 \ - /*Note: xmm15={0}x8 and xmm14={-1}x8.*/ \ - "#OC_FDCT8x8\n\t" \ - /*Stage 1:*/ \ - "movdqa %%xmm0,%%xmm11\n\t" \ - "movdqa %%xmm1,%%xmm10\n\t" \ - "movdqa %%xmm2,%%xmm9\n\t" \ - "movdqa %%xmm3,%%xmm8\n\t" \ - /*xmm11=t7'=t0-t7*/ \ - "psubw %%xmm7,%%xmm11\n\t" \ - /*xmm10=t6'=t1-t6*/ \ - "psubw %%xmm6,%%xmm10\n\t" \ - /*xmm9=t5'=t2-t5*/ \ - "psubw %%xmm5,%%xmm9\n\t" \ - /*xmm8=t4'=t3-t4*/ \ - "psubw %%xmm4,%%xmm8\n\t" \ - /*xmm0=t0'=t0+t7*/ \ - "paddw %%xmm7,%%xmm0\n\t" \ - /*xmm1=t1'=t1+t6*/ \ - "paddw %%xmm6,%%xmm1\n\t" \ - /*xmm5=t2'=t2+t5*/ \ - "paddw %%xmm2,%%xmm5\n\t" \ - /*xmm4=t3'=t3+t4*/ \ - "paddw %%xmm3,%%xmm4\n\t" \ - /*xmm2,3,6,7 are now free.*/ \ - /*Stage 2:*/ \ - "movdqa %%xmm0,%%xmm3\n\t" \ - "mov $0x5A806A0A,%[a]\n\t" \ - "movdqa %%xmm1,%%xmm2\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "movdqa %%xmm10,%%xmm6\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - /*xmm2=t2''=t1'-t2'*/ \ - "psubw %%xmm5,%%xmm2\n\t" \ - "pxor %%xmm12,%%xmm12\n\t" \ - /*xmm3=t3''=t0'-t3'*/ \ - "psubw %%xmm4,%%xmm3\n\t" \ - "psubw %%xmm14,%%xmm12\n\t" \ - /*xmm10=t5''=t6'-t5'*/ \ - "psubw %%xmm9,%%xmm10\n\t" \ - "paddw %%xmm12,%%xmm12\n\t" \ - /*xmm4=t0''=t0'+t3'*/ \ - "paddw %%xmm0,%%xmm4\n\t" \ - /*xmm1=t1''=t1'+t2'*/ \ - "paddw %%xmm5,%%xmm1\n\t" \ - /*xmm6=t6''=t6'+t5'*/ \ - "paddw %%xmm9,%%xmm6\n\t" \ - /*xmm0,xmm5,xmm9 are now free.*/ \ - /*Stage 3:*/ \ - /*xmm10:xmm5=t5''*27146+0xB500 \ - xmm0=t5''*/ \ - "movdqa %%xmm10,%%xmm5\n\t" \ - "movdqa %%xmm10,%%xmm0\n\t" \ - "punpckhwd %%xmm12,%%xmm10\n\t" \ - "pmaddwd %%xmm13,%%xmm10\n\t" \ - "punpcklwd %%xmm12,%%xmm5\n\t" \ - "pmaddwd %%xmm13,%%xmm5\n\t" \ - /*xmm5=(t5''*27146+0xB500>>16)+t5''*/ \ - "psrad $16,%%xmm10\n\t" \ - "psrad $16,%%xmm5\n\t" \ - "packssdw %%xmm10,%%xmm5\n\t" \ - "paddw %%xmm0,%%xmm5\n\t" \ - /*xmm0=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ - "pcmpeqw %%xmm15,%%xmm0\n\t" \ - "psubw %%xmm14,%%xmm0\n\t" \ - "paddw %%xmm5,%%xmm0\n\t" \ - "movdqa %%xmm8,%%xmm5\n\t" \ - "psraw $1,%%xmm0\n\t" \ - /*xmm5=t5'''=t4'-s*/ \ - "psubw %%xmm0,%%xmm5\n\t" \ - /*xmm8=t4''=t4'+s*/ \ - "paddw %%xmm0,%%xmm8\n\t" \ - /*xmm0,xmm7,xmm9,xmm10 are free.*/ \ - /*xmm7:xmm9=t6''*27146+0xB500*/ \ - "movdqa %%xmm6,%%xmm7\n\t" \ - "movdqa %%xmm6,%%xmm9\n\t" \ - "punpckhwd %%xmm12,%%xmm7\n\t" \ - "pmaddwd %%xmm13,%%xmm7\n\t" \ - "punpcklwd %%xmm12,%%xmm9\n\t" \ - "pmaddwd %%xmm13,%%xmm9\n\t" \ - /*xmm9=(t6''*27146+0xB500>>16)+t6''*/ \ - "psrad $16,%%xmm7\n\t" \ - "psrad $16,%%xmm9\n\t" \ - "packssdw %%xmm7,%%xmm9\n\t" \ - "paddw %%xmm6,%%xmm9\n\t" \ - /*xmm9=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ - "pcmpeqw %%xmm15,%%xmm6\n\t" \ - "psubw %%xmm14,%%xmm6\n\t" \ - "paddw %%xmm6,%%xmm9\n\t" \ - "movdqa %%xmm11,%%xmm7\n\t" \ - "psraw $1,%%xmm9\n\t" \ - /*xmm7=t6'''=t7'-s*/ \ - "psubw %%xmm9,%%xmm7\n\t" \ - /*xmm9=t7''=t7'+s*/ \ - "paddw %%xmm11,%%xmm9\n\t" \ - /*xmm0,xmm6,xmm10,xmm11 are free.*/ \ - /*Stage 4:*/ \ - /*xmm10:xmm0=t1''*27146+0xB500*/ \ - "movdqa %%xmm1,%%xmm0\n\t" \ - "movdqa %%xmm1,%%xmm10\n\t" \ - "punpcklwd %%xmm12,%%xmm0\n\t" \ - "pmaddwd %%xmm13,%%xmm0\n\t" \ - "punpckhwd %%xmm12,%%xmm10\n\t" \ - "pmaddwd %%xmm13,%%xmm10\n\t" \ - /*xmm0=(t1''*27146+0xB500>>16)+t1''*/ \ - "psrad $16,%%xmm0\n\t" \ - "psrad $16,%%xmm10\n\t" \ - "mov $0x20006A0A,%[a]\n\t" \ - "packssdw %%xmm10,%%xmm0\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "paddw %%xmm1,%%xmm0\n\t" \ - /*xmm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ - "pcmpeqw %%xmm15,%%xmm1\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "psubw %%xmm14,%%xmm1\n\t" \ - "paddw %%xmm1,%%xmm0\n\t" \ - /*xmm10:xmm4=t0''*27146+0x4000*/ \ - "movdqa %%xmm4,%%xmm1\n\t" \ - "movdqa %%xmm4,%%xmm10\n\t" \ - "punpcklwd %%xmm12,%%xmm4\n\t" \ - "pmaddwd %%xmm13,%%xmm4\n\t" \ - "punpckhwd %%xmm12,%%xmm10\n\t" \ - "pmaddwd %%xmm13,%%xmm10\n\t" \ - /*xmm4=(t0''*27146+0x4000>>16)+t0''*/ \ - "psrad $16,%%xmm4\n\t" \ - "psrad $16,%%xmm10\n\t" \ - "mov $0x6CB7,%[a]\n\t" \ - "packssdw %%xmm10,%%xmm4\n\t" \ - "movd %[a],%%xmm12\n\t" \ - "paddw %%xmm1,%%xmm4\n\t" \ - /*xmm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ - "pcmpeqw %%xmm15,%%xmm1\n\t" \ - "pshufd $00,%%xmm12,%%xmm12\n\t" \ - "psubw %%xmm14,%%xmm1\n\t" \ - "mov $0x7FFF6C84,%[a]\n\t" \ - "paddw %%xmm1,%%xmm4\n\t" \ - /*xmm0=_y[0]=u=r+s>>1 \ - The naive implementation could cause overflow, so we use \ - u=(r&s)+((r^s)>>1).*/ \ - "movdqa %%xmm0,%%xmm6\n\t" \ - "pxor %%xmm4,%%xmm0\n\t" \ - "pand %%xmm4,%%xmm6\n\t" \ - "psraw $1,%%xmm0\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "paddw %%xmm6,%%xmm0\n\t" \ - /*xmm4=_y[4]=v=r-u*/ \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "psubw %%xmm0,%%xmm4\n\t" \ - /*xmm1,xmm6,xmm10,xmm11 are free.*/ \ - /*xmm6:xmm10=60547*t3''+0x6CB7*/ \ - "movdqa %%xmm3,%%xmm10\n\t" \ - "movdqa %%xmm3,%%xmm6\n\t" \ - "punpcklwd %%xmm3,%%xmm10\n\t" \ - "pmaddwd %%xmm13,%%xmm10\n\t" \ - "mov $0x61F861F8,%[a]\n\t" \ - "punpckhwd %%xmm3,%%xmm6\n\t" \ - "pmaddwd %%xmm13,%%xmm6\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "paddd %%xmm12,%%xmm10\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "paddd %%xmm12,%%xmm6\n\t" \ - /*xmm1:xmm2=25080*t2'' \ - xmm12=t2''*/ \ - "movdqa %%xmm2,%%xmm11\n\t" \ - "movdqa %%xmm2,%%xmm12\n\t" \ - "pmullw %%xmm13,%%xmm2\n\t" \ - "pmulhw %%xmm13,%%xmm11\n\t" \ - "movdqa %%xmm2,%%xmm1\n\t" \ - "punpcklwd %%xmm11,%%xmm2\n\t" \ - "punpckhwd %%xmm11,%%xmm1\n\t" \ - /*xmm10=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ - "paddd %%xmm2,%%xmm10\n\t" \ - "paddd %%xmm1,%%xmm6\n\t" \ - "psrad $16,%%xmm10\n\t" \ - "pcmpeqw %%xmm15,%%xmm3\n\t" \ - "psrad $16,%%xmm6\n\t" \ - "psubw %%xmm14,%%xmm3\n\t" \ - "packssdw %%xmm6,%%xmm10\n\t" \ - "paddw %%xmm3,%%xmm10\n\t" \ - /*xmm2=_y[2]=u \ - xmm10=s=(25080*u>>16)-t2''*/ \ - "movdqa %%xmm10,%%xmm2\n\t" \ - "pmulhw %%xmm13,%%xmm10\n\t" \ - "psubw %%xmm12,%%xmm10\n\t" \ - /*xmm1:xmm6=s*21600+0x2800*/ \ - "pxor %%xmm12,%%xmm12\n\t" \ - "psubw %%xmm14,%%xmm12\n\t" \ - "mov $0x28005460,%[a]\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "movdqa %%xmm10,%%xmm6\n\t" \ - "movdqa %%xmm10,%%xmm1\n\t" \ - "punpcklwd %%xmm12,%%xmm6\n\t" \ - "pmaddwd %%xmm13,%%xmm6\n\t" \ - "mov $0x0E3D,%[a]\n\t" \ - "punpckhwd %%xmm12,%%xmm1\n\t" \ - "pmaddwd %%xmm13,%%xmm1\n\t" \ - /*xmm6=(s*21600+0x2800>>18)+s*/ \ - "psrad $18,%%xmm6\n\t" \ - "psrad $18,%%xmm1\n\t" \ - "movd %[a],%%xmm12\n\t" \ - "packssdw %%xmm1,%%xmm6\n\t" \ - "pshufd $00,%%xmm12,%%xmm12\n\t" \ - "paddw %%xmm10,%%xmm6\n\t" \ - /*xmm6=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ - "mov $0x7FFF54DC,%[a]\n\t" \ - "pcmpeqw %%xmm15,%%xmm10\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "psubw %%xmm14,%%xmm10\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "paddw %%xmm10,%%xmm6\n\t " \ - /*xmm1,xmm3,xmm10,xmm11 are free.*/ \ - /*xmm11:xmm10=54491*t5'''+0x0E3D*/ \ - "movdqa %%xmm5,%%xmm10\n\t" \ - "movdqa %%xmm5,%%xmm11\n\t" \ - "punpcklwd %%xmm5,%%xmm10\n\t" \ - "pmaddwd %%xmm13,%%xmm10\n\t" \ - "mov $0x8E3A8E3A,%[a]\n\t" \ - "punpckhwd %%xmm5,%%xmm11\n\t" \ - "pmaddwd %%xmm13,%%xmm11\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "paddd %%xmm12,%%xmm10\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "paddd %%xmm12,%%xmm11\n\t" \ - /*xmm7:xmm12=36410*t6''' \ - xmm1=t6'''*/ \ - "movdqa %%xmm7,%%xmm3\n\t" \ - "movdqa %%xmm7,%%xmm1\n\t" \ - "pmulhw %%xmm13,%%xmm3\n\t" \ - "pmullw %%xmm13,%%xmm7\n\t" \ - "paddw %%xmm1,%%xmm3\n\t" \ - "movdqa %%xmm7,%%xmm12\n\t" \ - "punpckhwd %%xmm3,%%xmm7\n\t" \ - "punpcklwd %%xmm3,%%xmm12\n\t" \ - /*xmm10=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ - "paddd %%xmm12,%%xmm10\n\t" \ - "paddd %%xmm7,%%xmm11\n\t" \ - "psrad $16,%%xmm10\n\t" \ - "pcmpeqw %%xmm15,%%xmm5\n\t" \ - "psrad $16,%%xmm11\n\t" \ - "psubw %%xmm14,%%xmm5\n\t" \ - "packssdw %%xmm11,%%xmm10\n\t" \ - "pxor %%xmm12,%%xmm12\n\t" \ - "paddw %%xmm5,%%xmm10\n\t" \ - /*xmm5=_y[5]=u \ - xmm1=s=t6'''-(36410*u>>16)*/ \ - "psubw %%xmm14,%%xmm12\n\t" \ - "movdqa %%xmm10,%%xmm5\n\t" \ - "mov $0x340067C8,%[a]\n\t" \ - "pmulhw %%xmm13,%%xmm10\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "paddw %%xmm5,%%xmm10\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "psubw %%xmm10,%%xmm1\n\t" \ - /*xmm11:xmm3=s*26568+0x3400*/ \ - "movdqa %%xmm1,%%xmm3\n\t" \ - "movdqa %%xmm1,%%xmm11\n\t" \ - "punpcklwd %%xmm12,%%xmm3\n\t" \ - "pmaddwd %%xmm13,%%xmm3\n\t" \ - "mov $0x7B1B,%[a]\n\t" \ - "punpckhwd %%xmm12,%%xmm11\n\t" \ - "pmaddwd %%xmm13,%%xmm11\n\t" \ - /*xmm3=(s*26568+0x3400>>17)+s*/ \ - "psrad $17,%%xmm3\n\t" \ - "psrad $17,%%xmm11\n\t" \ - "movd %[a],%%xmm12\n\t" \ - "packssdw %%xmm11,%%xmm3\n\t" \ - "pshufd $00,%%xmm12,%%xmm12\n\t" \ - "paddw %%xmm1,%%xmm3\n\t" \ - /*xmm3=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ - "mov $0x7FFF7B16,%[a]\n\t" \ - "pcmpeqw %%xmm15,%%xmm1\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "psubw %%xmm14,%%xmm1\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "paddw %%xmm1,%%xmm3\n\t " \ - /*xmm1,xmm7,xmm10,xmm11 are free.*/ \ - /*xmm11:xmm10=64277*t7''+0x7B1B*/ \ - "movdqa %%xmm9,%%xmm10\n\t" \ - "movdqa %%xmm9,%%xmm11\n\t" \ - "punpcklwd %%xmm9,%%xmm10\n\t" \ - "pmaddwd %%xmm13,%%xmm10\n\t" \ - "mov $0x31F131F1,%[a]\n\t" \ - "punpckhwd %%xmm9,%%xmm11\n\t" \ - "pmaddwd %%xmm13,%%xmm11\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "paddd %%xmm12,%%xmm10\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - "paddd %%xmm12,%%xmm11\n\t" \ - /*xmm12:xmm7=12785*t4''*/ \ - "movdqa %%xmm8,%%xmm7\n\t" \ - "movdqa %%xmm8,%%xmm1\n\t" \ - "pmullw %%xmm13,%%xmm7\n\t" \ - "pmulhw %%xmm13,%%xmm1\n\t" \ - "movdqa %%xmm7,%%xmm12\n\t" \ - "punpcklwd %%xmm1,%%xmm7\n\t" \ - "punpckhwd %%xmm1,%%xmm12\n\t" \ - /*xmm10=u=(12785*t4''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ - "paddd %%xmm7,%%xmm10\n\t" \ - "paddd %%xmm12,%%xmm11\n\t" \ - "psrad $16,%%xmm10\n\t" \ - "pcmpeqw %%xmm15,%%xmm9\n\t" \ - "psrad $16,%%xmm11\n\t" \ - "psubw %%xmm14,%%xmm9\n\t" \ - "packssdw %%xmm11,%%xmm10\n\t" \ - "pxor %%xmm12,%%xmm12\n\t" \ - "paddw %%xmm9,%%xmm10\n\t" \ - /*xmm1=_y[1]=u \ - xmm10=s=(12785*u>>16)-t4''*/ \ - "psubw %%xmm14,%%xmm12\n\t" \ - "movdqa %%xmm10,%%xmm1\n\t" \ - "mov $0x3000503B,%[a]\n\t" \ - "pmulhw %%xmm13,%%xmm10\n\t" \ - "movd %[a],%%xmm13\n\t" \ - "psubw %%xmm8,%%xmm10\n\t" \ - "pshufd $00,%%xmm13,%%xmm13\n\t" \ - /*xmm8:xmm7=s*20539+0x3000*/ \ - "movdqa %%xmm10,%%xmm7\n\t" \ - "movdqa %%xmm10,%%xmm8\n\t" \ - "punpcklwd %%xmm12,%%xmm7\n\t" \ - "pmaddwd %%xmm13,%%xmm7\n\t" \ - "punpckhwd %%xmm12,%%xmm8\n\t" \ - "pmaddwd %%xmm13,%%xmm8\n\t" \ - /*xmm7=(s*20539+0x3000>>20)+s*/ \ - "psrad $20,%%xmm7\n\t" \ - "psrad $20,%%xmm8\n\t" \ - "packssdw %%xmm8,%%xmm7\n\t" \ - "paddw %%xmm10,%%xmm7\n\t" \ - /*xmm7=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ - "pcmpeqw %%xmm15,%%xmm10\n\t" \ - "psubw %%xmm14,%%xmm10\n\t" \ - "paddw %%xmm10,%%xmm7\n\t " \ - -# define OC_TRANSPOSE8x8 \ - "#OC_TRANSPOSE8x8\n\t" \ - "movdqa %%xmm4,%%xmm8\n\t" \ - /*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \ - "punpcklwd %%xmm5,%%xmm4\n\t" \ - /*xmm8 = f7 e7 f6 e6 f5 e5 f4 e4*/ \ - "punpckhwd %%xmm5,%%xmm8\n\t" \ - /*xmm5 is free.*/ \ - "movdqa %%xmm0,%%xmm5\n\t" \ - /*xmm0 = b3 a3 b2 a2 b1 a1 b0 a0*/ \ - "punpcklwd %%xmm1,%%xmm0\n\t" \ - /*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \ - "punpckhwd %%xmm1,%%xmm5\n\t" \ - /*xmm1 is free.*/ \ - "movdqa %%xmm6,%%xmm1\n\t" \ - /*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \ - "punpcklwd %%xmm7,%%xmm6\n\t" \ - /*xmm1 = h7 g7 h6 g6 h5 g5 h4 g4*/ \ - "punpckhwd %%xmm7,%%xmm1\n\t" \ - /*xmm7 is free.*/ \ - "movdqa %%xmm2,%%xmm7\n\t" \ - /*xmm7 = d3 c3 d2 c2 d1 c1 d0 c0*/ \ - "punpcklwd %%xmm3,%%xmm7\n\t" \ - /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \ - "punpckhwd %%xmm3,%%xmm2\n\t" \ - /*xmm3 is free.*/ \ - "movdqa %%xmm0,%%xmm3\n\t" \ - /*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \ - "punpckldq %%xmm7,%%xmm0\n\t" \ - /*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \ - "punpckhdq %%xmm7,%%xmm3\n\t" \ - /*xmm7 is free.*/ \ - "movdqa %%xmm5,%%xmm7\n\t" \ - /*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \ - "punpckldq %%xmm2,%%xmm5\n\t" \ - /*xmm7 = d7 c7 b7 a7 d6 c6 b6 a6*/ \ - "punpckhdq %%xmm2,%%xmm7\n\t" \ - /*xmm2 is free.*/ \ - "movdqa %%xmm4,%%xmm2\n\t" \ - /*xmm2 = h1 g1 f1 e1 h0 g0 f0 e0*/ \ - "punpckldq %%xmm6,%%xmm2\n\t" \ - /*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \ - "punpckhdq %%xmm6,%%xmm4\n\t" \ - /*xmm6 is free.*/ \ - "movdqa %%xmm8,%%xmm6\n\t" \ - /*xmm6 = h5 g5 f5 e5 h4 g4 f4 e4*/ \ - "punpckldq %%xmm1,%%xmm6\n\t" \ - /*xmm8 = h7 g7 f7 e7 h6 g6 f6 e6*/ \ - "punpckhdq %%xmm1,%%xmm8\n\t" \ - /*xmm1 is free.*/ \ - "movdqa %%xmm0,%%xmm1\n\t" \ - /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \ - "punpcklqdq %%xmm2,%%xmm0\n\t" \ - /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \ - "punpckhqdq %%xmm2,%%xmm1\n\t" \ - /*xmm2 is free.*/ \ - "movdqa %%xmm3,%%xmm2\n\t" \ - /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \ - "punpcklqdq %%xmm4,%%xmm2\n\t" \ - /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \ - "punpckhqdq %%xmm4,%%xmm3\n\t" \ - /*xmm4 is free.*/ \ - "movdqa %%xmm5,%%xmm4\n\t" \ - /*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \ - "punpcklqdq %%xmm6,%%xmm4\n\t" \ - /*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \ - "punpckhqdq %%xmm6,%%xmm5\n\t" \ - /*xmm6 is free.*/ \ - "movdqa %%xmm7,%%xmm6\n\t" \ - /*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \ - "punpcklqdq %%xmm8,%%xmm6\n\t" \ - /*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \ - "punpckhqdq %%xmm8,%%xmm7\n\t" \ - /*xmm8 is free.*/ \ - -/*SSE2 implementation of the fDCT for x86-64 only. - Because of the 8 extra XMM registers on x86-64, this version can operate - without any temporary stack access at all.*/ -void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - ptrdiff_t a; - __asm__ __volatile__( - /*Load the input.*/ - "movdqa 0x00(%[x]),%%xmm0\n\t" - "movdqa 0x10(%[x]),%%xmm1\n\t" - "movdqa 0x20(%[x]),%%xmm2\n\t" - "movdqa 0x30(%[x]),%%xmm3\n\t" - "movdqa 0x40(%[x]),%%xmm4\n\t" - "movdqa 0x50(%[x]),%%xmm5\n\t" - "movdqa 0x60(%[x]),%%xmm6\n\t" - "movdqa 0x70(%[x]),%%xmm7\n\t" - /*Add two extra bits of working precision to improve accuracy; any more and - we could overflow.*/ - /*We also add a few biases to correct for some systematic error that - remains in the full fDCT->iDCT round trip.*/ - /*xmm15={0}x8*/ - "pxor %%xmm15,%%xmm15\n\t" - /*xmm14={-1}x8*/ - "pcmpeqb %%xmm14,%%xmm14\n\t" - "psllw $2,%%xmm0\n\t" - /*xmm8=xmm0*/ - "movdqa %%xmm0,%%xmm8\n\t" - "psllw $2,%%xmm1\n\t" - /*xmm8={_x[7...0]==0}*/ - "pcmpeqw %%xmm15,%%xmm8\n\t" - "psllw $2,%%xmm2\n\t" - /*xmm8={_x[7...0]!=0}*/ - "psubw %%xmm14,%%xmm8\n\t" - "psllw $2,%%xmm3\n\t" - /*%[a]=1*/ - "mov $1,%[a]\n\t" - /*xmm8={_x[6]!=0,0,_x[4]!=0,0,_x[2]!=0,0,_x[0]!=0,0}*/ - "pslld $16,%%xmm8\n\t" - "psllw $2,%%xmm4\n\t" - /*xmm9={0,0,0,0,0,0,0,1}*/ - "movd %[a],%%xmm9\n\t" - /*xmm8={0,0,_x[2]!=0,0,_x[0]!=0,0}*/ - "pshufhw $0x00,%%xmm8,%%xmm8\n\t" - "psllw $2,%%xmm5\n\t" - /*%[a]={1}x2*/ - "mov $0x10001,%[a]\n\t" - /*xmm8={0,0,0,0,0,0,0,_x[0]!=0}*/ - "pshuflw $0x01,%%xmm8,%%xmm8\n\t" - "psllw $2,%%xmm6\n\t" - /*xmm10={0,0,0,0,0,0,1,1}*/ - "movd %[a],%%xmm10\n\t" - /*xmm0=_x[7...0]+{0,0,0,0,0,0,0,_x[0]!=0}*/ - "paddw %%xmm8,%%xmm0\n\t" - "psllw $2,%%xmm7\n\t" - /*xmm0=_x[7...0]+{0,0,0,0,0,0,1,(_x[0]!=0)+1}*/ - "paddw %%xmm10,%%xmm0\n\t" - /*xmm1=_x[15...8]-{0,0,0,0,0,0,0,1}*/ - "psubw %%xmm9,%%xmm1\n\t" - /*Transform columns.*/ - OC_FDCT8x8 - /*Transform rows.*/ - OC_TRANSPOSE8x8 - OC_FDCT8x8 - /*TODO: zig-zag ordering?*/ - OC_TRANSPOSE8x8 - /*xmm14={-2,-2,-2,-2,-2,-2,-2,-2}*/ - "paddw %%xmm14,%%xmm14\n\t" - "psubw %%xmm14,%%xmm0\n\t" - "psubw %%xmm14,%%xmm1\n\t" - "psraw $2,%%xmm0\n\t" - "psubw %%xmm14,%%xmm2\n\t" - "psraw $2,%%xmm1\n\t" - "psubw %%xmm14,%%xmm3\n\t" - "psraw $2,%%xmm2\n\t" - "psubw %%xmm14,%%xmm4\n\t" - "psraw $2,%%xmm3\n\t" - "psubw %%xmm14,%%xmm5\n\t" - "psraw $2,%%xmm4\n\t" - "psubw %%xmm14,%%xmm6\n\t" - "psraw $2,%%xmm5\n\t" - "psubw %%xmm14,%%xmm7\n\t" - "psraw $2,%%xmm6\n\t" - "psraw $2,%%xmm7\n\t" - /*Store the result.*/ - "movdqa %%xmm0,0x00(%[y])\n\t" - "movdqa %%xmm1,0x10(%[y])\n\t" - "movdqa %%xmm2,0x20(%[y])\n\t" - "movdqa %%xmm3,0x30(%[y])\n\t" - "movdqa %%xmm4,0x40(%[y])\n\t" - "movdqa %%xmm5,0x50(%[y])\n\t" - "movdqa %%xmm6,0x60(%[y])\n\t" - "movdqa %%xmm7,0x70(%[y])\n\t" - :[a]"=&r"(a) - :[y]"r"(_y),[x]"r"(_x) - :"memory" - ); -} -#endif diff --git a/drivers/theora/x86/x86enc.c b/drivers/theora/x86/x86enc.c deleted file mode 100644 index 43b7be3ea3..0000000000 --- a/drivers/theora/x86/x86enc.c +++ /dev/null @@ -1,49 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86state.c 15675 2009-02-06 09:43:27Z tterribe $ - - ********************************************************************/ -#include "x86enc.h" - -#if defined(OC_X86_ASM) - -#include "../cpu.c" - -void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){ - ogg_uint32_t cpu_flags; - cpu_flags=oc_cpu_flags_get(); - oc_enc_vtable_init_c(_enc); - if(cpu_flags&OC_CPU_X86_MMX){ - _enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx; - _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx; - _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; - _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; - _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx; - } - if(cpu_flags&OC_CPU_X86_MMXEXT){ - _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext; - _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext; - _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext; - _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext; - _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext; - _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext; - _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext; - } - if(cpu_flags&OC_CPU_X86_SSE2){ -# if defined(OC_X86_64_ASM) - /*_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;*/ -# endif - } -} -#endif diff --git a/drivers/theora/x86/x86enc.h b/drivers/theora/x86/x86enc.h deleted file mode 100644 index 06c3908bcd..0000000000 --- a/drivers/theora/x86/x86enc.h +++ /dev/null @@ -1,47 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $ - - ********************************************************************/ - -#if !defined(_x86_x86enc_H) -# define _x86_x86enc_H (1) -# include "../encint.h" -# include "x86int.h" - -void oc_enc_vtable_init_x86(oc_enc_ctx *_enc); - -unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride); -unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride); -void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64], - const unsigned char *_x,const unsigned char *_y,int _stride); -void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64], - const unsigned char *_x,int _stride); -void oc_enc_frag_copy2_mmxext(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride); -void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]); -void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]); - -#endif diff --git a/drivers/theora/x86/x86int.h b/drivers/theora/x86/x86int.h deleted file mode 100644 index ede724f5aa..0000000000 --- a/drivers/theora/x86/x86int.h +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#if !defined(_x86_x86int_H) -# define _x86_x86int_H (1) -# include "../internal.h" - -void oc_state_vtable_init_x86(oc_theora_state *_state); - -void oc_frag_copy_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride); -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, - const ogg_int16_t *_residue); -void oc_frag_recon_inter_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t *_residue); -void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue); -void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi); -void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); -void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli); -void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); -void oc_restore_fpu_mmx(void); - -#endif diff --git a/drivers/theora/x86/x86state.c b/drivers/theora/x86/x86state.c deleted file mode 100644 index a786bec284..0000000000 --- a/drivers/theora/x86/x86state.c +++ /dev/null @@ -1,62 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include "x86int.h" - -#if defined(OC_X86_ASM) - -#include "../cpu.c" - -/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into - each quadrant of the destination.*/ -static const unsigned char OC_FZIG_ZAG_MMX[128]={ - 0, 8, 1, 2, 9,16,24,17, - 10, 3,32,11,18,25, 4,12, - 5,26,19,40,33,34,41,48, - 27, 6,13,20,28,21,14, 7, - 56,49,42,35,43,50,57,36, - 15,22,29,30,23,44,37,58, - 51,59,38,45,52,31,60,53, - 46,39,47,54,61,62,55,63, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, -}; - -void oc_state_vtable_init_x86(oc_theora_state *_state){ - _state->cpu_flags=oc_cpu_flags_get(); - if(_state->cpu_flags&OC_CPU_X86_MMX){ - _state->opt_vtable.frag_copy=oc_frag_copy_mmx; - _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; - _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; - _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; - _state->opt_vtable.idct8x8=oc_idct8x8_mmx; - _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; - _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; - _state->opt_vtable.state_loop_filter_frag_rows= - oc_state_loop_filter_frag_rows_mmx; - _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; - _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; - } - else oc_state_vtable_init_c(_state); -} -#endif diff --git a/drivers/theora/x86_vc/mmxencfrag.c b/drivers/theora/x86_vc/mmxencfrag.c deleted file mode 100644 index ac9dacf377..0000000000 --- a/drivers/theora/x86_vc/mmxencfrag.c +++ /dev/null @@ -1,969 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $ - - ********************************************************************/ -#include -#include "x86enc.h" - -#if defined(OC_X86_ASM) - -unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride){ - ptrdiff_t ret; - __asm{ -#define SRC esi -#define REF edx -#define YSTRIDE ecx -#define YSTRIDE3 edi - mov YSTRIDE,_ystride - mov SRC,_src - mov REF,_ref - /*Load the first 4 rows of each block.*/ - movq mm0,[SRC] - movq mm1,[REF] - movq mm2,[SRC][YSTRIDE] - movq mm3,[REF][YSTRIDE] - lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] - movq mm4,[SRC+YSTRIDE*2] - movq mm5,[REF+YSTRIDE*2] - movq mm6,[SRC+YSTRIDE3] - movq mm7,[REF+YSTRIDE3] - /*Compute their SADs and add them in mm0*/ - psadbw mm0,mm1 - psadbw mm2,mm3 - lea SRC,[SRC+YSTRIDE*4] - paddw mm0,mm2 - lea REF,[REF+YSTRIDE*4] - /*Load the next 3 rows as registers become available.*/ - movq mm2,[SRC] - movq mm3,[REF] - psadbw mm4,mm5 - psadbw mm6,mm7 - paddw mm0,mm4 - movq mm5,[REF+YSTRIDE] - movq mm4,[SRC+YSTRIDE] - paddw mm0,mm6 - movq mm7,[REF+YSTRIDE*2] - movq mm6,[SRC+YSTRIDE*2] - /*Start adding their SADs to mm0*/ - psadbw mm2,mm3 - psadbw mm4,mm5 - paddw mm0,mm2 - psadbw mm6,mm7 - /*Load last row as registers become available.*/ - movq mm2,[SRC+YSTRIDE3] - movq mm3,[REF+YSTRIDE3] - /*And finish adding up their SADs.*/ - paddw mm0,mm4 - psadbw mm2,mm3 - paddw mm0,mm6 - paddw mm0,mm2 - movd [ret],mm0 -#undef SRC -#undef REF -#undef YSTRIDE -#undef YSTRIDE3 - } - return (unsigned)ret; -} - -unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh){ - /*Early termination is for suckers.*/ - return oc_enc_frag_sad_mmxext(_src,_ref,_ystride); -} - -#define OC_SAD2_LOOP __asm{ \ - /*We want to compute (mm0+mm1>>1) on unsigned bytes without overflow, but \ - pavgb computes (mm0+mm1+1>>1). \ - The latter is exactly 1 too large when the low bit of two corresponding \ - bytes is only set in one of them. \ - Therefore we pxor the operands, pand to mask out the low bits, and psubb to \ - correct the output of pavgb.*/ \ - __asm movq mm6,mm0 \ - __asm lea REF1,[REF1+YSTRIDE*2] \ - __asm pxor mm0,mm1 \ - __asm pavgb mm6,mm1 \ - __asm lea REF2,[REF2+YSTRIDE*2] \ - __asm movq mm1,mm2 \ - __asm pand mm0,mm7 \ - __asm pavgb mm2,mm3 \ - __asm pxor mm1,mm3 \ - __asm movq mm3,[REF2+YSTRIDE] \ - __asm psubb mm6,mm0 \ - __asm movq mm0,[REF1] \ - __asm pand mm1,mm7 \ - __asm psadbw mm4,mm6 \ - __asm movd mm6,RET \ - __asm psubb mm2,mm1 \ - __asm movq mm1,[REF2] \ - __asm lea SRC,[SRC+YSTRIDE*2] \ - __asm psadbw mm5,mm2 \ - __asm movq mm2,[REF1+YSTRIDE] \ - __asm paddw mm5,mm4 \ - __asm movq mm4,[SRC] \ - __asm paddw mm6,mm5 \ - __asm movq mm5,[SRC+YSTRIDE] \ - __asm movd RET,mm6 \ -} - -/*Same as above, but does not pre-load the next two rows.*/ -#define OC_SAD2_TAIL __asm{ \ - __asm movq mm6,mm0 \ - __asm pavgb mm0,mm1 \ - __asm pxor mm6,mm1 \ - __asm movq mm1,mm2 \ - __asm pand mm6,mm7 \ - __asm pavgb mm2,mm3 \ - __asm pxor mm1,mm3 \ - __asm psubb mm0,mm6 \ - __asm pand mm1,mm7 \ - __asm psadbw mm4,mm0 \ - __asm psubb mm2,mm1 \ - __asm movd mm6,RET \ - __asm psadbw mm5,mm2 \ - __asm paddw mm5,mm4 \ - __asm paddw mm6,mm5 \ - __asm movd RET,mm6 \ -} - -unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh){ - ptrdiff_t ret; - __asm{ -#define REF1 ecx -#define REF2 edi -#define YSTRIDE esi -#define SRC edx -#define RET eax - mov YSTRIDE,_ystride - mov SRC,_src - mov REF1,_ref1 - mov REF2,_ref2 - movq mm0,[REF1] - movq mm1,[REF2] - movq mm2,[REF1+YSTRIDE] - movq mm3,[REF2+YSTRIDE] - xor RET,RET - movq mm4,[SRC] - pxor mm7,mm7 - pcmpeqb mm6,mm6 - movq mm5,[SRC+YSTRIDE] - psubb mm7,mm6 - OC_SAD2_LOOP - OC_SAD2_LOOP - OC_SAD2_LOOP - OC_SAD2_TAIL - mov [ret],RET -#undef REF1 -#undef REF2 -#undef YSTRIDE -#undef SRC -#undef RET - } - return (unsigned)ret; -} - -/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their - 16-bit difference in mm0...mm7.*/ -#define OC_LOAD_SUB_8x4(_off) __asm{ \ - __asm movd mm0,[_off+SRC] \ - __asm movd mm4,[_off+REF] \ - __asm movd mm1,[_off+SRC+SRC_YSTRIDE] \ - __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ - __asm movd mm5,[_off+REF+REF_YSTRIDE] \ - __asm lea REF,[REF+REF_YSTRIDE*2] \ - __asm movd mm2,[_off+SRC] \ - __asm movd mm7,[_off+REF] \ - __asm movd mm3,[_off+SRC+SRC_YSTRIDE] \ - __asm movd mm6,[_off+REF+REF_YSTRIDE] \ - __asm punpcklbw mm0,mm4 \ - __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ - __asm punpcklbw mm4,mm4 \ - __asm lea REF,[REF+REF_YSTRIDE*2] \ - __asm psubw mm0,mm4 \ - __asm movd mm4,[_off+SRC] \ - __asm movq [_off*2+BUF],mm0 \ - __asm movd mm0,[_off+REF] \ - __asm punpcklbw mm1,mm5 \ - __asm punpcklbw mm5,mm5 \ - __asm psubw mm1,mm5 \ - __asm movd mm5,[_off+SRC+SRC_YSTRIDE] \ - __asm punpcklbw mm2,mm7 \ - __asm punpcklbw mm7,mm7 \ - __asm psubw mm2,mm7 \ - __asm movd mm7,[_off+REF+REF_YSTRIDE] \ - __asm punpcklbw mm3,mm6 \ - __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ - __asm punpcklbw mm6,mm6 \ - __asm psubw mm3,mm6 \ - __asm movd mm6,[_off+SRC] \ - __asm punpcklbw mm4,mm0 \ - __asm lea REF,[REF+REF_YSTRIDE*2] \ - __asm punpcklbw mm0,mm0 \ - __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ - __asm psubw mm4,mm0 \ - __asm movd mm0,[_off+REF] \ - __asm punpcklbw mm5,mm7 \ - __asm neg SRC_YSTRIDE \ - __asm punpcklbw mm7,mm7 \ - __asm psubw mm5,mm7 \ - __asm movd mm7,[_off+SRC+SRC_YSTRIDE] \ - __asm punpcklbw mm6,mm0 \ - __asm lea REF,[REF+REF_YSTRIDE*2] \ - __asm punpcklbw mm0,mm0 \ - __asm neg REF_YSTRIDE \ - __asm psubw mm6,mm0 \ - __asm movd mm0,[_off+REF+REF_YSTRIDE] \ - __asm lea SRC,[SRC+SRC_YSTRIDE*8] \ - __asm punpcklbw mm7,mm0 \ - __asm neg SRC_YSTRIDE \ - __asm punpcklbw mm0,mm0 \ - __asm lea REF,[REF+REF_YSTRIDE*8] \ - __asm psubw mm7,mm0 \ - __asm neg REF_YSTRIDE \ - __asm movq mm0,[_off*2+BUF] \ -} - -/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/ -#define OC_LOAD_8x4(_off) __asm{ \ - __asm movd mm0,[_off+SRC] \ - __asm movd mm1,[_off+SRC+YSTRIDE] \ - __asm movd mm2,[_off+SRC+YSTRIDE*2] \ - __asm pxor mm7,mm7 \ - __asm movd mm3,[_off+SRC+YSTRIDE3] \ - __asm punpcklbw mm0,mm7 \ - __asm movd mm4,[_off+SRC4] \ - __asm punpcklbw mm1,mm7 \ - __asm movd mm5,[_off+SRC4+YSTRIDE] \ - __asm punpcklbw mm2,mm7 \ - __asm movd mm6,[_off+SRC4+YSTRIDE*2] \ - __asm punpcklbw mm3,mm7 \ - __asm movd mm7,[_off+SRC4+YSTRIDE3] \ - __asm punpcklbw mm4,mm4 \ - __asm punpcklbw mm5,mm5 \ - __asm psrlw mm4,8 \ - __asm psrlw mm5,8 \ - __asm punpcklbw mm6,mm6 \ - __asm punpcklbw mm7,mm7 \ - __asm psrlw mm6,8 \ - __asm psrlw mm7,8 \ -} - -/*Performs the first two stages of an 8-point 1-D Hadamard transform. - The transform is performed in place, except that outputs 0-3 are swapped with - outputs 4-7. - Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to - perform this stage in place with no temporary registers).*/ -#define OC_HADAMARD_AB_8x4 __asm{ \ - /*Stage A: \ - Outputs 0-3 are swapped with 4-7 here.*/ \ - __asm paddw mm5,mm1 \ - __asm paddw mm6,mm2 \ - __asm paddw mm1,mm1 \ - __asm paddw mm2,mm2 \ - __asm psubw mm1,mm5 \ - __asm psubw mm2,mm6 \ - __asm paddw mm7,mm3 \ - __asm paddw mm4,mm0 \ - __asm paddw mm3,mm3 \ - __asm paddw mm0,mm0 \ - __asm psubw mm3,mm7 \ - __asm psubw mm0,mm4 \ - /*Stage B:*/ \ - __asm paddw mm0,mm2 \ - __asm paddw mm1,mm3 \ - __asm paddw mm4,mm6 \ - __asm paddw mm5,mm7 \ - __asm paddw mm2,mm2 \ - __asm paddw mm3,mm3 \ - __asm paddw mm6,mm6 \ - __asm paddw mm7,mm7 \ - __asm psubw mm2,mm0 \ - __asm psubw mm3,mm1 \ - __asm psubw mm6,mm4 \ - __asm psubw mm7,mm5 \ -} - -/*Performs the last stage of an 8-point 1-D Hadamard transform in place. - Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in - place with no temporary registers).*/ -#define OC_HADAMARD_C_8x4 __asm{ \ - /*Stage C:*/ \ - __asm paddw mm0,mm1 \ - __asm paddw mm2,mm3 \ - __asm paddw mm4,mm5 \ - __asm paddw mm6,mm7 \ - __asm paddw mm1,mm1 \ - __asm paddw mm3,mm3 \ - __asm paddw mm5,mm5 \ - __asm paddw mm7,mm7 \ - __asm psubw mm1,mm0 \ - __asm psubw mm3,mm2 \ - __asm psubw mm5,mm4 \ - __asm psubw mm7,mm6 \ -} - -/*Performs an 8-point 1-D Hadamard transform. - The transform is performed in place, except that outputs 0-3 are swapped with - outputs 4-7. - Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform - in place with no temporary registers).*/ -#define OC_HADAMARD_8x4 __asm{ \ - OC_HADAMARD_AB_8x4 \ - OC_HADAMARD_C_8x4 \ -} - -/*Performs the first part of the final stage of the Hadamard transform and - summing of absolute values. - At the end of this part, mm1 will contain the DC coefficient of the - transform.*/ -#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) __asm{ \ - /*We use the fact that \ - (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \ - to merge the final butterfly with the abs and the first stage of \ - accumulation. \ - Thus we can avoid using pabsw, which is not available until SSSE3. \ - Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \ - implementation would be (3+3)*8+7=55 instructions (+4 for spilling \ - registers). \ - Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \ - This implementation is only 26 (+4 for spilling registers).*/ \ - __asm movq [_r7+BUF],mm7 \ - __asm movq [_r6+BUF],mm6 \ - /*mm7={0x7FFF}x4 \ - mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \ - __asm pcmpeqb mm7,mm7 \ - __asm movq mm6,mm0 \ - __asm psrlw mm7,1 \ - __asm paddw mm6,mm1 \ - __asm pmaxsw mm0,mm1 \ - __asm paddsw mm6,mm7 \ - __asm psubw mm0,mm6 \ - /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \ - mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \ - __asm movq mm6,mm2 \ - __asm movq mm1,mm4 \ - __asm pmaxsw mm2,mm3 \ - __asm pmaxsw mm4,mm5 \ - __asm paddw mm6,mm3 \ - __asm paddw mm1,mm5 \ - __asm movq mm3,[_r7+BUF] \ -} - -/*Performs the second part of the final stage of the Hadamard transform and - summing of absolute values.*/ -#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) __asm{ \ - __asm paddsw mm6,mm7 \ - __asm movq mm5,[_r6+BUF] \ - __asm paddsw mm1,mm7 \ - __asm psubw mm2,mm6 \ - __asm psubw mm4,mm1 \ - /*mm7={1}x4 (needed for the horizontal add that follows) \ - mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \ - __asm movq mm6,mm3 \ - __asm pmaxsw mm3,mm5 \ - __asm paddw mm0,mm2 \ - __asm paddw mm6,mm5 \ - __asm paddw mm0,mm4 \ - __asm paddsw mm6,mm7 \ - __asm paddw mm0,mm3 \ - __asm psrlw mm7,14 \ - __asm psubw mm0,mm6 \ -} - -/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the - absolute value of each component, and accumulates everything into mm0. - This is the only portion of SATD which requires MMXEXT (we could use plain - MMX, but it takes 4 instructions and an extra register to work around the - lack of a pmaxsw, which is a pretty serious penalty).*/ -#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) __asm{ \ - OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ - OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ -} - -/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each - component, and accumulates everything into mm0. - Note that mm0 will have an extra 4 added to each column, and that after - removing this value, the remainder will be half the conventional value.*/ -#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) __asm{ \ - OC_HADAMARD_AB_8x4 \ - OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \ -} - -/*Performs two 4x4 transposes (mostly) in place. - On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7} - contains rows {a,b,c,d}. - On output, {0x40,0x50,0x60,0x70}+_off+BUF contains {e,f,g,h}^T, and - {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/ -#define OC_TRANSPOSE_4x4x2(_off) __asm{ \ - /*First 4x4 transpose:*/ \ - __asm movq [0x10+_off+BUF],mm5 \ - /*mm0 = e3 e2 e1 e0 \ - mm1 = f3 f2 f1 f0 \ - mm2 = g3 g2 g1 g0 \ - mm3 = h3 h2 h1 h0*/ \ - __asm movq mm5,mm2 \ - __asm punpcklwd mm2,mm3 \ - __asm punpckhwd mm5,mm3 \ - __asm movq mm3,mm0 \ - __asm punpcklwd mm0,mm1 \ - __asm punpckhwd mm3,mm1 \ - /*mm0 = f1 e1 f0 e0 \ - mm3 = f3 e3 f2 e2 \ - mm2 = h1 g1 h0 g0 \ - mm5 = h3 g3 h2 g2*/ \ - __asm movq mm1,mm0 \ - __asm punpckldq mm0,mm2 \ - __asm punpckhdq mm1,mm2 \ - __asm movq mm2,mm3 \ - __asm punpckhdq mm3,mm5 \ - __asm movq [0x40+_off+BUF],mm0 \ - __asm punpckldq mm2,mm5 \ - /*mm0 = h0 g0 f0 e0 \ - mm1 = h1 g1 f1 e1 \ - mm2 = h2 g2 f2 e2 \ - mm3 = h3 g3 f3 e3*/ \ - __asm movq mm5,[0x10+_off+BUF] \ - /*Second 4x4 transpose:*/ \ - /*mm4 = a3 a2 a1 a0 \ - mm5 = b3 b2 b1 b0 \ - mm6 = c3 c2 c1 c0 \ - mm7 = d3 d2 d1 d0*/ \ - __asm movq mm0,mm6 \ - __asm punpcklwd mm6,mm7 \ - __asm movq [0x50+_off+BUF],mm1 \ - __asm punpckhwd mm0,mm7 \ - __asm movq mm7,mm4 \ - __asm punpcklwd mm4,mm5 \ - __asm movq [0x60+_off+BUF],mm2 \ - __asm punpckhwd mm7,mm5 \ - /*mm4 = b1 a1 b0 a0 \ - mm7 = b3 a3 b2 a2 \ - mm6 = d1 c1 d0 c0 \ - mm0 = d3 c3 d2 c2*/ \ - __asm movq mm5,mm4 \ - __asm punpckldq mm4,mm6 \ - __asm movq [0x70+_off+BUF],mm3 \ - __asm punpckhdq mm5,mm6 \ - __asm movq mm6,mm7 \ - __asm punpckhdq mm7,mm0 \ - __asm punpckldq mm6,mm0 \ - /*mm4 = d0 c0 b0 a0 \ - mm5 = d1 c1 b1 a1 \ - mm6 = d2 c2 b2 a2 \ - mm7 = d3 c3 b3 a3*/ \ -} - -static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src, - int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){ - OC_ALIGN8(ogg_int16_t buf[64]); - ogg_int16_t *bufp; - unsigned ret1; - unsigned ret2; - bufp=buf; - __asm{ -#define SRC esi -#define REF eax -#define SRC_YSTRIDE ecx -#define REF_YSTRIDE edx -#define BUF edi -#define RET eax -#define RET2 edx - mov SRC,_src - mov SRC_YSTRIDE,_src_ystride - mov REF,_ref - mov REF_YSTRIDE,_ref_ystride - mov BUF,bufp - OC_LOAD_SUB_8x4(0x00) - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2(0x00) - /*Finish swapping out this 8x4 block to make room for the next one. - mm0...mm3 have been swapped out already.*/ - movq [0x00+BUF],mm4 - movq [0x10+BUF],mm5 - movq [0x20+BUF],mm6 - movq [0x30+BUF],mm7 - OC_LOAD_SUB_8x4(0x04) - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2(0x08) - /*Here the first 4x4 block of output from the last transpose is the second - 4x4 block of input for the next transform. - We have cleverly arranged that it already be in the appropriate place, so - we only have to do half the loads.*/ - movq mm1,[0x10+BUF] - movq mm2,[0x20+BUF] - movq mm3,[0x30+BUF] - movq mm0,[0x00+BUF] - OC_HADAMARD_ABS_ACCUM_8x4(0x28,0x38) - /*Up to this point, everything fit in 16 bits (8 input + 1 for the - difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 - for the factor of two we dropped + 3 for the vertical accumulation). - Now we finally have to promote things to dwords. - We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long - latency of pmaddwd by starting the next series of loads now.*/ - mov RET2,_thresh - pmaddwd mm0,mm7 - movq mm1,[0x50+BUF] - movq mm5,[0x58+BUF] - movq mm4,mm0 - movq mm2,[0x60+BUF] - punpckhdq mm0,mm0 - movq mm6,[0x68+BUF] - paddd mm4,mm0 - movq mm3,[0x70+BUF] - movd RET,mm4 - movq mm7,[0x78+BUF] - /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4 - added to them, and a factor of two removed; correct the final sum here.*/ - lea RET,[RET+RET-32] - movq mm0,[0x40+BUF] - cmp RET,RET2 - movq mm4,[0x48+BUF] - jae at_end - OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78) - pmaddwd mm0,mm7 - /*There isn't much to stick in here to hide the latency this time, but the - alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose - latency is even worse.*/ - sub RET,32 - movq mm4,mm0 - punpckhdq mm0,mm0 - paddd mm4,mm0 - movd RET2,mm4 - lea RET,[RET+RET2*2] - align 16 -at_end: - mov ret1,RET -#undef SRC -#undef REF -#undef SRC_YSTRIDE -#undef REF_YSTRIDE -#undef BUF -#undef RET -#undef RET2 - } - return ret1; -} - -unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh){ - return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh); -} - - -/*Our internal implementation of frag_copy2 takes an extra stride parameter so - we can share code with oc_enc_frag_satd2_thresh_mmxext().*/ -static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride, - const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){ - __asm{ - /*Load the first 3 rows.*/ -#define DST_YSTRIDE edi -#define SRC_YSTRIDE esi -#define DST eax -#define SRC1 edx -#define SRC2 ecx - mov DST_YSTRIDE,_dst_ystride - mov SRC_YSTRIDE,_src_ystride - mov DST,_dst - mov SRC1,_src1 - mov SRC2,_src2 - movq mm0,[SRC1] - movq mm1,[SRC2] - movq mm2,[SRC1+SRC_YSTRIDE] - lea SRC1,[SRC1+SRC_YSTRIDE*2] - movq mm3,[SRC2+SRC_YSTRIDE] - lea SRC2,[SRC2+SRC_YSTRIDE*2] - pxor mm7,mm7 - movq mm4,[SRC1] - pcmpeqb mm6,mm6 - movq mm5,[SRC2] - /*mm7={1}x8.*/ - psubb mm7,mm6 - /*Start averaging mm0 and mm1 into mm6.*/ - movq mm6,mm0 - pxor mm0,mm1 - pavgb mm6,mm1 - /*mm1 is free, start averaging mm3 into mm2 using mm1.*/ - movq mm1,mm2 - pand mm0,mm7 - pavgb mm2,mm3 - pxor mm1,mm3 - /*mm3 is free.*/ - psubb mm6,mm0 - /*mm0 is free, start loading the next row.*/ - movq mm0,[SRC1+SRC_YSTRIDE] - /*Start averaging mm5 and mm4 using mm3.*/ - movq mm3,mm4 - /*mm6 [row 0] is done; write it out.*/ - movq [DST],mm6 - pand mm1,mm7 - pavgb mm4,mm5 - psubb mm2,mm1 - /*mm1 is free, continue loading the next row.*/ - movq mm1,[SRC2+SRC_YSTRIDE] - pxor mm3,mm5 - lea SRC1,[SRC1+SRC_YSTRIDE*2] - /*mm2 [row 1] is done; write it out.*/ - movq [DST+DST_YSTRIDE],mm2 - pand mm3,mm7 - /*Start loading the next row.*/ - movq mm2,[SRC1] - lea DST,[DST+DST_YSTRIDE*2] - psubb mm4,mm3 - lea SRC2,[SRC2+SRC_YSTRIDE*2] - /*mm4 [row 2] is done; write it out.*/ - movq [DST],mm4 - /*Continue loading the next row.*/ - movq mm3,[SRC2] - /*Start averaging mm0 and mm1 into mm6.*/ - movq mm6,mm0 - pxor mm0,mm1 - /*Start loading the next row.*/ - movq mm4,[SRC1+SRC_YSTRIDE] - pavgb mm6,mm1 - /*mm1 is free; start averaging mm3 into mm2 using mm1.*/ - movq mm1,mm2 - pand mm0,mm7 - /*Continue loading the next row.*/ - movq mm5,[SRC2+SRC_YSTRIDE] - pavgb mm2,mm3 - lea SRC1,[SRC1+SRC_YSTRIDE*2] - pxor mm1,mm3 - /*mm3 is free.*/ - psubb mm6,mm0 - /*mm0 is free, start loading the next row.*/ - movq mm0,[SRC1] - /*Start averaging mm5 into mm4 using mm3.*/ - movq mm3,mm4 - /*mm6 [row 3] is done; write it out.*/ - movq [DST+DST_YSTRIDE],mm6 - pand mm1,mm7 - lea SRC2,[SRC2+SRC_YSTRIDE*2] - pavgb mm4,mm5 - lea DST,[DST+DST_YSTRIDE*2] - psubb mm2,mm1 - /*mm1 is free; continue loading the next row.*/ - movq mm1,[SRC2] - pxor mm3,mm5 - /*mm2 [row 4] is done; write it out.*/ - movq [DST],mm2 - pand mm3,mm7 - /*Start loading the next row.*/ - movq mm2,[SRC1+SRC_YSTRIDE] - psubb mm4,mm3 - /*Start averaging mm0 and mm1 into mm6.*/ - movq mm6,mm0 - /*Continue loading the next row.*/ - movq mm3,[SRC2+SRC_YSTRIDE] - /*mm4 [row 5] is done; write it out.*/ - movq [DST+DST_YSTRIDE],mm4 - pxor mm0,mm1 - pavgb mm6,mm1 - /*mm4 is free; start averaging mm3 into mm2 using mm4.*/ - movq mm4,mm2 - pand mm0,mm7 - pavgb mm2,mm3 - pxor mm4,mm3 - lea DST,[DST+DST_YSTRIDE*2] - psubb mm6,mm0 - pand mm4,mm7 - /*mm6 [row 6] is done, write it out.*/ - movq [DST],mm6 - psubb mm2,mm4 - /*mm2 [row 7] is done, write it out.*/ - movq [DST+DST_YSTRIDE],mm2 -#undef SRC1 -#undef SRC2 -#undef SRC_YSTRIDE -#undef DST_YSTRIDE -#undef DST - } -} - -unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh){ - OC_ALIGN8(unsigned char ref[64]); - oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride); - return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh); -} - -unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src, - int _ystride){ - OC_ALIGN8(ogg_int16_t buf[64]); - ogg_int16_t *bufp; - unsigned ret1; - unsigned ret2; - bufp=buf; - __asm{ -#define SRC eax -#define SRC4 esi -#define BUF edi -#define RET eax -#define RET_WORD ax -#define RET2 ecx -#define YSTRIDE edx -#define YSTRIDE3 ecx - mov SRC,_src - mov BUF,bufp - mov YSTRIDE,_ystride - /* src4 = src+4*ystride */ - lea SRC4,[SRC+YSTRIDE*4] - /* ystride3 = 3*ystride */ - lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] - OC_LOAD_8x4(0x00) - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2(0x00) - /*Finish swapping out this 8x4 block to make room for the next one. - mm0...mm3 have been swapped out already.*/ - movq [0x00+BUF],mm4 - movq [0x10+BUF],mm5 - movq [0x20+BUF],mm6 - movq [0x30+BUF],mm7 - OC_LOAD_8x4(0x04) - OC_HADAMARD_8x4 - OC_TRANSPOSE_4x4x2(0x08) - /*Here the first 4x4 block of output from the last transpose is the second - 4x4 block of input for the next transform. - We have cleverly arranged that it already be in the appropriate place, so - we only have to do half the loads.*/ - movq mm1,[0x10+BUF] - movq mm2,[0x20+BUF] - movq mm3,[0x30+BUF] - movq mm0,[0x00+BUF] - /*We split out the stages here so we can save the DC coefficient in the - middle.*/ - OC_HADAMARD_AB_8x4 - OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38) - movd RET,mm1 - OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38) - /*Up to this point, everything fit in 16 bits (8 input + 1 for the - difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 - for the factor of two we dropped + 3 for the vertical accumulation). - Now we finally have to promote things to dwords. - We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long - latency of pmaddwd by starting the next series of loads now.*/ - pmaddwd mm0,mm7 - movq mm1,[0x50+BUF] - movq mm5,[0x58+BUF] - movq mm2,[0x60+BUF] - movq mm4,mm0 - movq mm6,[0x68+BUF] - punpckhdq mm0,mm0 - movq mm3,[0x70+BUF] - paddd mm4,mm0 - movq mm7,[0x78+BUF] - movd RET2,mm4 - movq mm0,[0x40+BUF] - movq mm4,[0x48+BUF] - OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78) - pmaddwd mm0,mm7 - /*We assume that the DC coefficient is always positive (which is true, - because the input to the INTRA transform was not a difference).*/ - movzx RET,RET_WORD - add RET2,RET2 - sub RET2,RET - movq mm4,mm0 - punpckhdq mm0,mm0 - paddd mm4,mm0 - movd RET,mm4 - lea RET,[-64+RET2+RET*2] - mov [ret1],RET -#undef SRC -#undef SRC4 -#undef BUF -#undef RET -#undef RET_WORD -#undef RET2 -#undef YSTRIDE -#undef YSTRIDE3 - } - return ret1; -} - -void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64], - const unsigned char *_src, const unsigned char *_ref,int _ystride){ - int i; - __asm pxor mm7,mm7 - for(i=4;i-->0;){ - __asm{ -#define SRC edx -#define YSTRIDE esi -#define RESIDUE eax -#define REF ecx - mov YSTRIDE,_ystride - mov RESIDUE,_residue - mov SRC,_src - mov REF,_ref - /*mm0=[src]*/ - movq mm0,[SRC] - /*mm1=[ref]*/ - movq mm1,[REF] - /*mm4=[src+ystride]*/ - movq mm4,[SRC+YSTRIDE] - /*mm5=[ref+ystride]*/ - movq mm5,[REF+YSTRIDE] - /*Compute [src]-[ref].*/ - movq mm2,mm0 - punpcklbw mm0,mm7 - movq mm3,mm1 - punpckhbw mm2,mm7 - punpcklbw mm1,mm7 - punpckhbw mm3,mm7 - psubw mm0,mm1 - psubw mm2,mm3 - /*Compute [src+ystride]-[ref+ystride].*/ - movq mm1,mm4 - punpcklbw mm4,mm7 - movq mm3,mm5 - punpckhbw mm1,mm7 - lea SRC,[SRC+YSTRIDE*2] - punpcklbw mm5,mm7 - lea REF,[REF+YSTRIDE*2] - punpckhbw mm3,mm7 - psubw mm4,mm5 - psubw mm1,mm3 - /*Write the answer out.*/ - movq [RESIDUE+0x00],mm0 - movq [RESIDUE+0x08],mm2 - movq [RESIDUE+0x10],mm4 - movq [RESIDUE+0x18],mm1 - lea RESIDUE,[RESIDUE+0x20] - mov _residue,RESIDUE - mov _src,SRC - mov _ref,REF -#undef SRC -#undef YSTRIDE -#undef RESIDUE -#undef REF - } - } -} - -void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64], - const unsigned char *_src,int _ystride){ - __asm{ -#define YSTRIDE edx -#define YSTRIDE3 edi -#define RESIDUE ecx -#define SRC eax - mov YSTRIDE,_ystride - mov RESIDUE,_residue - mov SRC,_src - /*mm0=[src]*/ - movq mm0,[SRC] - /*mm1=[src+ystride]*/ - movq mm1,[SRC+YSTRIDE] - /*mm6={-1}x4*/ - pcmpeqw mm6,mm6 - /*mm2=[src+2*ystride]*/ - movq mm2,[SRC+YSTRIDE*2] - /*[ystride3]=3*[ystride]*/ - lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] - /*mm6={1}x4*/ - psllw mm6,15 - /*mm3=[src+3*ystride]*/ - movq mm3,[SRC+YSTRIDE3] - /*mm6={128}x4*/ - psrlw mm6,8 - /*mm7=0*/ - pxor mm7,mm7 - /*[src]=[src]+4*[ystride]*/ - lea SRC,[SRC+YSTRIDE*4] - /*Compute [src]-128 and [src+ystride]-128*/ - movq mm4,mm0 - punpcklbw mm0,mm7 - movq mm5,mm1 - punpckhbw mm4,mm7 - psubw mm0,mm6 - punpcklbw mm1,mm7 - psubw mm4,mm6 - punpckhbw mm5,mm7 - psubw mm1,mm6 - psubw mm5,mm6 - /*Write the answer out.*/ - movq [RESIDUE+0x00],mm0 - movq [RESIDUE+0x08],mm4 - movq [RESIDUE+0x10],mm1 - movq [RESIDUE+0x18],mm5 - /*mm0=[src+4*ystride]*/ - movq mm0,[SRC] - /*mm1=[src+5*ystride]*/ - movq mm1,[SRC+YSTRIDE] - /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/ - movq mm4,mm2 - punpcklbw mm2,mm7 - movq mm5,mm3 - punpckhbw mm4,mm7 - psubw mm2,mm6 - punpcklbw mm3,mm7 - psubw mm4,mm6 - punpckhbw mm5,mm7 - psubw mm3,mm6 - psubw mm5,mm6 - /*Write the answer out.*/ - movq [RESIDUE+0x20],mm2 - movq [RESIDUE+0x28],mm4 - movq [RESIDUE+0x30],mm3 - movq [RESIDUE+0x38],mm5 - /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ - movq mm2,[SRC+YSTRIDE*2] - movq mm3,[SRC+YSTRIDE3] - movq mm4,mm0 - punpcklbw mm0,mm7 - movq mm5,mm1 - punpckhbw mm4,mm7 - psubw mm0,mm6 - punpcklbw mm1,mm7 - psubw mm4,mm6 - punpckhbw mm5,mm7 - psubw mm1,mm6 - psubw mm5,mm6 - /*Write the answer out.*/ - movq [RESIDUE+0x40],mm0 - movq [RESIDUE+0x48],mm4 - movq [RESIDUE+0x50],mm1 - movq [RESIDUE+0x58],mm5 - /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ - movq mm4,mm2 - punpcklbw mm2,mm7 - movq mm5,mm3 - punpckhbw mm4,mm7 - psubw mm2,mm6 - punpcklbw mm3,mm7 - psubw mm4,mm6 - punpckhbw mm5,mm7 - psubw mm3,mm6 - psubw mm5,mm6 - /*Write the answer out.*/ - movq [RESIDUE+0x60],mm2 - movq [RESIDUE+0x68],mm4 - movq [RESIDUE+0x70],mm3 - movq [RESIDUE+0x78],mm5 -#undef YSTRIDE -#undef YSTRIDE3 -#undef RESIDUE -#undef SRC - } -} - -void oc_enc_frag_copy2_mmxext(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride){ - oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride); -} - -#endif diff --git a/drivers/theora/x86_vc/mmxfdct.c b/drivers/theora/x86_vc/mmxfdct.c deleted file mode 100644 index dcf17c9fa7..0000000000 --- a/drivers/theora/x86_vc/mmxfdct.c +++ /dev/null @@ -1,670 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ********************************************************************/ - /*MMX fDCT implementation for x86_32*/ -/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/ -#include "x86enc.h" - -#if defined(OC_X86_ASM) - -#define OC_FDCT_STAGE1_8x4 __asm{ \ - /*Stage 1:*/ \ - /*mm0=t7'=t0-t7*/ \ - __asm psubw mm0,mm7 \ - __asm paddw mm7,mm7 \ - /*mm1=t6'=t1-t6*/ \ - __asm psubw mm1, mm6 \ - __asm paddw mm6,mm6 \ - /*mm2=t5'=t2-t5*/ \ - __asm psubw mm2,mm5 \ - __asm paddw mm5,mm5 \ - /*mm3=t4'=t3-t4*/ \ - __asm psubw mm3,mm4 \ - __asm paddw mm4,mm4 \ - /*mm7=t0'=t0+t7*/ \ - __asm paddw mm7,mm0 \ - /*mm6=t1'=t1+t6*/ \ - __asm paddw mm6,mm1 \ - /*mm5=t2'=t2+t5*/ \ - __asm paddw mm5,mm2 \ - /*mm4=t3'=t3+t4*/ \ - __asm paddw mm4,mm3\ -} - -#define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \ - /*Stage 2:*/ \ - /*mm7=t3''=t0'-t3'*/ \ - __asm psubw mm7,mm4 \ - __asm paddw mm4,mm4 \ - /*mm6=t2''=t1'-t2'*/ \ - __asm psubw mm6,mm5 \ - __asm movq [Y+_r6],mm7 \ - __asm paddw mm5,mm5 \ - /*mm1=t5''=t6'-t5'*/ \ - __asm psubw mm1,mm2 \ - __asm movq [Y+_r2],mm6 \ - /*mm4=t0''=t0'+t3'*/ \ - __asm paddw mm4,mm7 \ - __asm paddw mm2,mm2 \ - /*mm5=t1''=t1'+t2'*/ \ - __asm movq [Y+_r0],mm4 \ - __asm paddw mm5,mm6 \ - /*mm2=t6''=t6'+t5'*/ \ - __asm paddw mm2,mm1 \ - __asm movq [Y+_r4],mm5 \ - /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \ - /*mm4, mm5, mm6, mm7 are free.*/ \ - /*Stage 3:*/ \ - /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \ - __asm mov A,0x5A806A0A \ - __asm pcmpeqb mm6,mm6 \ - __asm movd mm7,A \ - __asm psrlw mm6,15 \ - __asm punpckldq mm7,mm7 \ - __asm paddw mm6,mm6 \ - /*mm0=0, m2={-1}x4 \ - mm5:mm4=t5''*27146+0xB500*/ \ - __asm movq mm4,mm1 \ - __asm movq mm5,mm1 \ - __asm punpcklwd mm4,mm6 \ - __asm movq [Y+_r3],mm2 \ - __asm pmaddwd mm4,mm7 \ - __asm movq [Y+_r7],mm0 \ - __asm punpckhwd mm5,mm6 \ - __asm pxor mm0,mm0 \ - __asm pmaddwd mm5,mm7 \ - __asm pcmpeqb mm2,mm2 \ - /*mm2=t6'', mm1=t5''+(t5''!=0) \ - mm4=(t5''*27146+0xB500>>16)*/ \ - __asm pcmpeqw mm0,mm1 \ - __asm psrad mm4,16 \ - __asm psubw mm0,mm2 \ - __asm movq mm2, [Y+_r3] \ - __asm psrad mm5,16 \ - __asm paddw mm1,mm0 \ - __asm packssdw mm4,mm5 \ - /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ - __asm paddw mm4,mm1 \ - __asm movq mm0, [Y+_r7] \ - __asm psraw mm4,1 \ - __asm movq mm1,mm3 \ - /*mm3=t4''=t4'+s*/ \ - __asm paddw mm3,mm4 \ - /*mm1=t5'''=t4'-s*/ \ - __asm psubw mm1,mm4 \ - /*mm1=0, mm3={-1}x4 \ - mm5:mm4=t6''*27146+0xB500*/ \ - __asm movq mm4,mm2 \ - __asm movq mm5,mm2 \ - __asm punpcklwd mm4,mm6 \ - __asm movq [Y+_r5],mm1 \ - __asm pmaddwd mm4,mm7 \ - __asm movq [Y+_r1],mm3 \ - __asm punpckhwd mm5,mm6 \ - __asm pxor mm1,mm1 \ - __asm pmaddwd mm5,mm7 \ - __asm pcmpeqb mm3,mm3 \ - /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \ - __asm psrad mm4,16 \ - __asm pcmpeqw mm1,mm2 \ - __asm psrad mm5,16 \ - __asm psubw mm1,mm3 \ - __asm packssdw mm4,mm5 \ - __asm paddw mm2,mm1 \ - /*mm1=t1'' \ - mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ - __asm paddw mm4,mm2 \ - __asm movq mm1,[Y+_r4] \ - __asm psraw mm4,1 \ - __asm movq mm2,mm0 \ - /*mm7={54491-0x7FFF,0x7FFF}x2 \ - mm0=t7''=t7'+s*/ \ - __asm paddw mm0,mm4 \ - /*mm2=t6'''=t7'-s*/ \ - __asm psubw mm2,mm4 \ - /*Stage 4:*/ \ - /*mm0=0, mm2=t0'' \ - mm5:mm4=t1''*27146+0xB500*/ \ - __asm movq mm4,mm1 \ - __asm movq mm5,mm1 \ - __asm punpcklwd mm4,mm6 \ - __asm movq [Y+_r3],mm2 \ - __asm pmaddwd mm4,mm7 \ - __asm movq mm2,[Y+_r0] \ - __asm punpckhwd mm5,mm6 \ - __asm movq [Y+_r7],mm0 \ - __asm pmaddwd mm5,mm7 \ - __asm pxor mm0,mm0 \ - /*mm7={27146,0x4000>>1}x2 \ - mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ - __asm psrad mm4,16 \ - __asm mov A,0x20006A0A \ - __asm pcmpeqw mm0,mm1 \ - __asm movd mm7,A \ - __asm psrad mm5,16 \ - __asm psubw mm0,mm3 \ - __asm packssdw mm4,mm5 \ - __asm paddw mm0,mm1 \ - __asm punpckldq mm7,mm7 \ - __asm paddw mm0,mm4 \ - /*mm6={0x00000E3D}x2 \ - mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \ - __asm movq mm4,mm2 \ - __asm movq mm5,mm2 \ - __asm punpcklwd mm4,mm6 \ - __asm mov A,0x0E3D \ - __asm pmaddwd mm4,mm7 \ - __asm punpckhwd mm5,mm6 \ - __asm movd mm6,A \ - __asm pmaddwd mm5,mm7 \ - __asm pxor mm1,mm1 \ - __asm punpckldq mm6,mm6 \ - __asm pcmpeqw mm1,mm2 \ - /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ - __asm psrad mm4,16 \ - __asm psubw mm1,mm3 \ - __asm psrad mm5,16 \ - __asm paddw mm2,mm1 \ - __asm packssdw mm4,mm5 \ - __asm movq mm1,[Y+_r5] \ - __asm paddw mm4,mm2 \ - /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \ - The naive implementation could cause overflow, so we use \ - u=(r&s)+((r^s)>>1).*/ \ - __asm movq mm2,[Y+_r3] \ - __asm movq mm7,mm0 \ - __asm pxor mm0,mm4 \ - __asm pand mm7,mm4 \ - __asm psraw mm0,1 \ - __asm mov A,0x7FFF54DC \ - __asm paddw mm0,mm7 \ - __asm movd mm7,A \ - /*mm7={54491-0x7FFF,0x7FFF}x2 \ - mm4=_y[4]=v=r-u*/ \ - __asm psubw mm4,mm0 \ - __asm punpckldq mm7,mm7 \ - __asm movq [Y+_r4],mm4 \ - /*mm0=0, mm7={36410}x4 \ - mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \ - __asm movq mm4,mm1 \ - __asm movq mm5,mm1 \ - __asm punpcklwd mm4,mm1 \ - __asm mov A,0x8E3A8E3A \ - __asm pmaddwd mm4,mm7 \ - __asm movq [Y+_r0],mm0 \ - __asm punpckhwd mm5,mm1 \ - __asm pxor mm0,mm0 \ - __asm pmaddwd mm5,mm7 \ - __asm pcmpeqw mm1,mm0 \ - __asm movd mm7,A \ - __asm psubw mm1,mm3 \ - __asm punpckldq mm7,mm7 \ - __asm paddd mm4,mm6 \ - __asm paddd mm5,mm6 \ - /*mm0=0 \ - mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \ - __asm movq mm6,mm2 \ - __asm movq mm3,mm2 \ - __asm pmulhw mm6,mm7 \ - __asm paddw mm1,mm2 \ - __asm pmullw mm3,mm7 \ - __asm pxor mm0,mm0 \ - __asm paddw mm6,mm1 \ - __asm movq mm1,mm3 \ - __asm punpckhwd mm3,mm6 \ - __asm punpcklwd mm1,mm6 \ - /*mm3={-1}x4, mm6={1}x4 \ - mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ - __asm paddd mm5,mm3 \ - __asm paddd mm4,mm1 \ - __asm psrad mm5,16 \ - __asm pxor mm6,mm6 \ - __asm psrad mm4,16 \ - __asm pcmpeqb mm3,mm3 \ - __asm packssdw mm4,mm5 \ - __asm psubw mm6,mm3 \ - /*mm1=t7'', mm7={26568,0x3400}x2 \ - mm2=s=t6'''-(36410*u>>16)*/ \ - __asm movq mm1,mm4 \ - __asm mov A,0x340067C8 \ - __asm pmulhw mm4,mm7 \ - __asm movd mm7,A \ - __asm movq [Y+_r5],mm1 \ - __asm punpckldq mm7,mm7 \ - __asm paddw mm4,mm1 \ - __asm movq mm1,[Y+_r7] \ - __asm psubw mm2,mm4 \ - /*mm6={0x00007B1B}x2 \ - mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \ - __asm movq mm4,mm2 \ - __asm movq mm5,mm2 \ - __asm punpcklwd mm4,mm6 \ - __asm pcmpeqw mm0,mm2 \ - __asm pmaddwd mm4,mm7 \ - __asm mov A,0x7B1B \ - __asm punpckhwd mm5,mm6 \ - __asm movd mm6,A \ - __asm pmaddwd mm5,mm7 \ - __asm psubw mm0,mm3 \ - __asm punpckldq mm6,mm6 \ - /*mm7={64277-0x7FFF,0x7FFF}x2 \ - mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ - __asm psrad mm4,17 \ - __asm paddw mm2,mm0 \ - __asm psrad mm5,17 \ - __asm mov A,0x7FFF7B16 \ - __asm packssdw mm4,mm5 \ - __asm movd mm7,A \ - __asm paddw mm2,mm4 \ - __asm punpckldq mm7,mm7 \ - /*mm0=0, mm7={12785}x4 \ - mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \ - __asm movq mm4,mm1 \ - __asm movq mm5,mm1 \ - __asm movq [Y+_r3],mm2 \ - __asm punpcklwd mm4,mm1 \ - __asm movq mm2,[Y+_r1] \ - __asm pmaddwd mm4,mm7 \ - __asm mov A,0x31F131F1 \ - __asm punpckhwd mm5,mm1 \ - __asm pxor mm0,mm0 \ - __asm pmaddwd mm5,mm7 \ - __asm pcmpeqw mm1,mm0 \ - __asm movd mm7,A \ - __asm psubw mm1,mm3 \ - __asm punpckldq mm7,mm7 \ - __asm paddd mm4,mm6 \ - __asm paddd mm5,mm6 \ - /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \ - __asm movq mm6,mm2 \ - __asm movq mm3,mm2 \ - __asm pmulhw mm6,mm7 \ - __asm pmullw mm3,mm7 \ - __asm paddw mm6,mm1 \ - __asm movq mm1,mm3 \ - __asm punpckhwd mm3,mm6 \ - __asm punpcklwd mm1,mm6 \ - /*mm3={-1}x4, mm6={1}x4 \ - mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ - __asm paddd mm5,mm3 \ - __asm paddd mm4,mm1 \ - __asm psrad mm5,16 \ - __asm pxor mm6,mm6 \ - __asm psrad mm4,16 \ - __asm pcmpeqb mm3,mm3 \ - __asm packssdw mm4,mm5 \ - __asm psubw mm6,mm3 \ - /*mm1=t3'', mm7={20539,0x3000}x2 \ - mm4=s=(12785*u>>16)-t4''*/ \ - __asm movq [Y+_r1],mm4 \ - __asm pmulhw mm4,mm7 \ - __asm mov A,0x3000503B \ - __asm movq mm1,[Y+_r6] \ - __asm movd mm7,A \ - __asm psubw mm4,mm2 \ - __asm punpckldq mm7,mm7 \ - /*mm6={0x00006CB7}x2 \ - mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \ - __asm movq mm5,mm4 \ - __asm movq mm2,mm4 \ - __asm punpcklwd mm4,mm6 \ - __asm pcmpeqw mm0,mm2 \ - __asm pmaddwd mm4,mm7 \ - __asm mov A,0x6CB7 \ - __asm punpckhwd mm5,mm6 \ - __asm movd mm6,A \ - __asm pmaddwd mm5,mm7 \ - __asm psubw mm0,mm3 \ - __asm punpckldq mm6,mm6 \ - /*mm7={60547-0x7FFF,0x7FFF}x2 \ - mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ - __asm psrad mm4,20 \ - __asm paddw mm2,mm0 \ - __asm psrad mm5,20 \ - __asm mov A,0x7FFF6C84 \ - __asm packssdw mm4,mm5 \ - __asm movd mm7,A \ - __asm paddw mm2,mm4 \ - __asm punpckldq mm7,mm7 \ - /*mm0=0, mm7={25080}x4 \ - mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \ - __asm movq mm4,mm1 \ - __asm movq mm5,mm1 \ - __asm movq [Y+_r7],mm2 \ - __asm punpcklwd mm4,mm1 \ - __asm movq mm2,[Y+_r2] \ - __asm pmaddwd mm4,mm7 \ - __asm mov A,0x61F861F8 \ - __asm punpckhwd mm5,mm1 \ - __asm pxor mm0,mm0 \ - __asm pmaddwd mm5,mm7 \ - __asm movd mm7,A \ - __asm pcmpeqw mm1,mm0 \ - __asm psubw mm1,mm3 \ - __asm punpckldq mm7,mm7 \ - __asm paddd mm4,mm6 \ - __asm paddd mm5,mm6 \ - /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \ - __asm movq mm6,mm2 \ - __asm movq mm3,mm2 \ - __asm pmulhw mm6,mm7 \ - __asm pmullw mm3,mm7 \ - __asm paddw mm6,mm1 \ - __asm movq mm1,mm3 \ - __asm punpckhwd mm3,mm6 \ - __asm punpcklwd mm1,mm6 \ - /*mm1={-1}x4 \ - mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ - __asm paddd mm5,mm3 \ - __asm paddd mm4,mm1 \ - __asm psrad mm5,16 \ - __asm mov A,0x28005460 \ - __asm psrad mm4,16 \ - __asm pcmpeqb mm1,mm1 \ - __asm packssdw mm4,mm5 \ - /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \ - mm4=s=(25080*u>>16)-t2''*/ \ - __asm movq mm6,mm4 \ - __asm pmulhw mm4,mm7 \ - __asm pxor mm5,mm5 \ - __asm movd mm7,A \ - __asm psubw mm5,mm1 \ - __asm punpckldq mm7,mm7 \ - __asm psubw mm4,mm2 \ - /*mm2=s+(s!=0) \ - mm4:mm3=s*21600+0x2800*/ \ - __asm movq mm3,mm4 \ - __asm movq mm2,mm4 \ - __asm punpckhwd mm4,mm5 \ - __asm pcmpeqw mm0,mm2 \ - __asm pmaddwd mm4,mm7 \ - __asm psubw mm0,mm1 \ - __asm punpcklwd mm3,mm5 \ - __asm paddw mm2,mm0 \ - __asm pmaddwd mm3,mm7 \ - /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \ - mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ - __asm movq mm0,[Y+_r4] \ - __asm psrad mm4,18 \ - __asm movq mm5,[Y+_r5] \ - __asm psrad mm3,18 \ - __asm movq mm1,[Y+_r7] \ - __asm packssdw mm3,mm4 \ - __asm movq mm4,[Y+_r0] \ - __asm paddw mm3,mm2 \ -} - -/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7]. - On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and - {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/ -#define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \ - /*First 4x4 transpose:*/ \ - /*mm0 = e3 e2 e1 e0 \ - mm5 = f3 f2 f1 f0 \ - mm3 = g3 g2 g1 g0 \ - mm1 = h3 h2 h1 h0*/ \ - __asm movq mm2,mm0 \ - __asm punpcklwd mm0,mm5 \ - __asm punpckhwd mm2,mm5 \ - __asm movq mm5,mm3 \ - __asm punpcklwd mm3,mm1 \ - __asm punpckhwd mm5,mm1 \ - /*mm0 = f1 e1 f0 e0 \ - mm2 = f3 e3 f2 e2 \ - mm3 = h1 g1 h0 g0 \ - mm5 = h3 g3 h2 g2*/ \ - __asm movq mm1,mm0 \ - __asm punpckldq mm0,mm3 \ - __asm movq [Y+_r4],mm0 \ - __asm punpckhdq mm1,mm3 \ - __asm movq mm0,[Y+_r1] \ - __asm movq mm3,mm2 \ - __asm punpckldq mm2,mm5 \ - __asm punpckhdq mm3,mm5 \ - __asm movq mm5,[Y+_r3] \ - /*_y[4] = h0 g0 f0 e0 \ - mm1 = h1 g1 f1 e1 \ - mm2 = h2 g2 f2 e2 \ - mm3 = h3 g3 f3 e3*/ \ - /*Second 4x4 transpose:*/ \ - /*mm4 = a3 a2 a1 a0 \ - mm0 = b3 b2 b1 b0 \ - mm6 = c3 c2 c1 c0 \ - mm5 = d3 d2 d1 d0*/ \ - __asm movq mm7,mm4 \ - __asm punpcklwd mm4,mm0 \ - __asm punpckhwd mm7,mm0 \ - __asm movq mm0,mm6 \ - __asm punpcklwd mm6,mm5 \ - __asm punpckhwd mm0,mm5 \ - /*mm4 = b1 a1 b0 a0 \ - mm7 = b3 a3 b2 a2 \ - mm6 = d1 c1 d0 c0 \ - mm0 = d3 c3 d2 c2*/ \ - __asm movq mm5,mm4 \ - __asm punpckldq mm4,mm6 \ - __asm punpckhdq mm5,mm6 \ - __asm movq mm6,mm7 \ - __asm punpckhdq mm7,mm0 \ - __asm punpckldq mm6,mm0 \ - /*mm4 = d0 c0 b0 a0 \ - mm5 = d1 c1 b1 a1 \ - mm6 = d2 c2 b2 a2 \ - mm7 = d3 c3 b3 a3*/ \ -} - -/*MMX implementation of the fDCT.*/ -void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ - ptrdiff_t a; - __asm{ -#define Y eax -#define A ecx -#define X edx - /*Add two extra bits of working precision to improve accuracy; any more and - we could overflow.*/ - /*We also add biases to correct for some systematic error that remains in - the full fDCT->iDCT round trip.*/ - mov X, _x - mov Y, _y - movq mm0,[0x00+X] - movq mm1,[0x10+X] - movq mm2,[0x20+X] - movq mm3,[0x30+X] - pcmpeqb mm4,mm4 - pxor mm7,mm7 - movq mm5,mm0 - psllw mm0,2 - pcmpeqw mm5,mm7 - movq mm7,[0x70+X] - psllw mm1,2 - psubw mm5,mm4 - psllw mm2,2 - mov A,1 - pslld mm5,16 - movd mm6,A - psllq mm5,16 - mov A,0x10001 - psllw mm3,2 - movd mm4,A - punpckhwd mm5,mm6 - psubw mm1,mm6 - movq mm6,[0x60+X] - paddw mm0,mm5 - movq mm5,[0x50+X] - paddw mm0,mm4 - movq mm4,[0x40+X] - /*We inline stage1 of the transform here so we can get better instruction - scheduling with the shifts.*/ - /*mm0=t7'=t0-t7*/ - psllw mm7,2 - psubw mm0,mm7 - psllw mm6,2 - paddw mm7,mm7 - /*mm1=t6'=t1-t6*/ - psllw mm5,2 - psubw mm1,mm6 - psllw mm4,2 - paddw mm6,mm6 - /*mm2=t5'=t2-t5*/ - psubw mm2,mm5 - paddw mm5,mm5 - /*mm3=t4'=t3-t4*/ - psubw mm3,mm4 - paddw mm4,mm4 - /*mm7=t0'=t0+t7*/ - paddw mm7,mm0 - /*mm6=t1'=t1+t6*/ - paddw mm6,mm1 - /*mm5=t2'=t2+t5*/ - paddw mm5,mm2 - /*mm4=t3'=t3+t4*/ - paddw mm4,mm3 - OC_FDCT8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70) - OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70) - /*Swap out this 8x4 block for the next one.*/ - movq mm0,[0x08+X] - movq [0x30+Y],mm7 - movq mm7,[0x78+X] - movq [0x50+Y],mm1 - movq mm1,[0x18+X] - movq [0x20+Y],mm6 - movq mm6,[0x68+X] - movq [0x60+Y],mm2 - movq mm2,[0x28+X] - movq [0x10+Y],mm5 - movq mm5,[0x58+X] - movq [0x70+Y],mm3 - movq mm3,[0x38+X] - /*And increase its working precision, too.*/ - psllw mm0,2 - movq [0x00+Y],mm4 - psllw mm7,2 - movq mm4,[0x48+X] - /*We inline stage1 of the transform here so we can get better instruction - scheduling with the shifts.*/ - /*mm0=t7'=t0-t7*/ - psubw mm0,mm7 - psllw mm1,2 - paddw mm7,mm7 - psllw mm6,2 - /*mm1=t6'=t1-t6*/ - psubw mm1,mm6 - psllw mm2,2 - paddw mm6,mm6 - psllw mm5,2 - /*mm2=t5'=t2-t5*/ - psubw mm2,mm5 - psllw mm3,2 - paddw mm5,mm5 - psllw mm4,2 - /*mm3=t4'=t3-t4*/ - psubw mm3,mm4 - paddw mm4,mm4 - /*mm7=t0'=t0+t7*/ - paddw mm7,mm0 - /*mm6=t1'=t1+t6*/ - paddw mm6,mm1 - /*mm5=t2'=t2+t5*/ - paddw mm5,mm2 - /*mm4=t3'=t3+t4*/ - paddw mm4,mm3 - OC_FDCT8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78) - OC_TRANSPOSE8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78) - /*Here the first 4x4 block of output from the last transpose is the second - 4x4 block of input for the next transform. - We have cleverly arranged that it already be in the appropriate place, - so we only have to do half the stores and loads.*/ - movq mm0,[0x00+Y] - movq [0x58+Y],mm1 - movq mm1,[0x10+Y] - movq [0x68+Y],mm2 - movq mm2,[0x20+Y] - movq [0x78+Y],mm3 - movq mm3,[0x30+Y] - OC_FDCT_STAGE1_8x4 - OC_FDCT8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38) - OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38) - /*mm0={-2}x4*/ - pcmpeqw mm0,mm0 - paddw mm0,mm0 - /*Round the results.*/ - psubw mm1,mm0 - psubw mm2,mm0 - psraw mm1,2 - psubw mm3,mm0 - movq [0x18+Y],mm1 - psraw mm2,2 - psubw mm4,mm0 - movq mm1,[0x08+Y] - psraw mm3,2 - psubw mm5,mm0 - psraw mm4,2 - psubw mm6,mm0 - psraw mm5,2 - psubw mm7,mm0 - psraw mm6,2 - psubw mm1,mm0 - psraw mm7,2 - movq mm0,[0x40+Y] - psraw mm1,2 - movq [0x30+Y],mm7 - movq mm7,[0x78+Y] - movq [0x08+Y],mm1 - movq mm1,[0x50+Y] - movq [0x20+Y],mm6 - movq mm6,[0x68+Y] - movq [0x28+Y],mm2 - movq mm2,[0x60+Y] - movq [0x10+Y],mm5 - movq mm5,[0x58+Y] - movq [0x38+Y],mm3 - movq mm3,[0x70+Y] - movq [0x00+Y],mm4 - movq mm4,[0x48+Y] - OC_FDCT_STAGE1_8x4 - OC_FDCT8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78) - OC_TRANSPOSE8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78) - /*mm0={-2}x4*/ - pcmpeqw mm0,mm0 - paddw mm0,mm0 - /*Round the results.*/ - psubw mm1,mm0 - psubw mm2,mm0 - psraw mm1,2 - psubw mm3,mm0 - movq [0x58+Y],mm1 - psraw mm2,2 - psubw mm4,mm0 - movq mm1,[0x48+Y] - psraw mm3,2 - psubw mm5,mm0 - movq [0x68+Y],mm2 - psraw mm4,2 - psubw mm6,mm0 - movq [0x78+Y],mm3 - psraw mm5,2 - psubw mm7,mm0 - movq [0x40+Y],mm4 - psraw mm6,2 - psubw mm1,mm0 - movq [0x50+Y],mm5 - psraw mm7,2 - movq [0x60+Y],mm6 - psraw mm1,2 - movq [0x70+Y],mm7 - movq [0x48+Y],mm1 -#undef Y -#undef A -#undef X - } -} - -#endif diff --git a/drivers/theora/x86_vc/mmxfrag.c b/drivers/theora/x86_vc/mmxfrag.c deleted file mode 100644 index 4eb2084dc6..0000000000 --- a/drivers/theora/x86_vc/mmxfrag.c +++ /dev/null @@ -1,337 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxfrag.c 16578 2009-09-25 19:50:48Z cristianadam $ - - ********************************************************************/ - -/*MMX acceleration of fragment reconstruction for motion compensation. - Originally written by Rudolf Marek. - Additional optimization by Nils Pipenbrinck. - Note: Loops are unrolled for best performance. - The iteration each instruction belongs to is marked in the comments as #i.*/ -#include -#include "x86int.h" -#include "mmxfrag.h" - -#if defined(OC_X86_ASM) - -/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes - between rows.*/ -void oc_frag_copy_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride){ -#define SRC edx -#define DST eax -#define YSTRIDE ecx -#define YSTRIDE3 esi - OC_FRAG_COPY_MMX(_dst,_src,_ystride); -#undef SRC -#undef DST -#undef YSTRIDE -#undef YSTRIDE3 -} - -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, - const ogg_int16_t *_residue){ - __asm{ -#define DST edx -#define DST4 esi -#define YSTRIDE eax -#define YSTRIDE3 edi -#define RESIDUE ecx - mov DST,_dst - mov YSTRIDE,_ystride - mov RESIDUE,_residue - lea DST4,[DST+YSTRIDE*4] - lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] - /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ - pcmpeqw mm0,mm0 - /*#0 Load low residue.*/ - movq mm1,[0*8+RESIDUE] - /*#0 Load high residue.*/ - movq mm2,[1*8+RESIDUE] - /*Set mm0 to 0x8000800080008000.*/ - psllw mm0,15 - /*#1 Load low residue.*/ - movq mm3,[2*8+RESIDUE] - /*#1 Load high residue.*/ - movq mm4,[3*8+RESIDUE] - /*Set mm0 to 0x0080008000800080.*/ - psrlw mm0,8 - /*#2 Load low residue.*/ - movq mm5,[4*8+RESIDUE] - /*#2 Load high residue.*/ - movq mm6,[5*8+RESIDUE] - /*#0 Bias low residue.*/ - paddsw mm1,mm0 - /*#0 Bias high residue.*/ - paddsw mm2,mm0 - /*#0 Pack to byte.*/ - packuswb mm1,mm2 - /*#1 Bias low residue.*/ - paddsw mm3,mm0 - /*#1 Bias high residue.*/ - paddsw mm4,mm0 - /*#1 Pack to byte.*/ - packuswb mm3,mm4 - /*#2 Bias low residue.*/ - paddsw mm5,mm0 - /*#2 Bias high residue.*/ - paddsw mm6,mm0 - /*#2 Pack to byte.*/ - packuswb mm5,mm6 - /*#0 Write row.*/ - movq [DST],mm1 - /*#1 Write row.*/ - movq [DST+YSTRIDE],mm3 - /*#2 Write row.*/ - movq [DST+YSTRIDE*2],mm5 - /*#3 Load low residue.*/ - movq mm1,[6*8+RESIDUE] - /*#3 Load high residue.*/ - movq mm2,[7*8+RESIDUE] - /*#4 Load high residue.*/ - movq mm3,[8*8+RESIDUE] - /*#4 Load high residue.*/ - movq mm4,[9*8+RESIDUE] - /*#5 Load high residue.*/ - movq mm5,[10*8+RESIDUE] - /*#5 Load high residue.*/ - movq mm6,[11*8+RESIDUE] - /*#3 Bias low residue.*/ - paddsw mm1,mm0 - /*#3 Bias high residue.*/ - paddsw mm2,mm0 - /*#3 Pack to byte.*/ - packuswb mm1,mm2 - /*#4 Bias low residue.*/ - paddsw mm3,mm0 - /*#4 Bias high residue.*/ - paddsw mm4,mm0 - /*#4 Pack to byte.*/ - packuswb mm3,mm4 - /*#5 Bias low residue.*/ - paddsw mm5,mm0 - /*#5 Bias high residue.*/ - paddsw mm6,mm0 - /*#5 Pack to byte.*/ - packuswb mm5,mm6 - /*#3 Write row.*/ - movq [DST+YSTRIDE3],mm1 - /*#4 Write row.*/ - movq [DST4],mm3 - /*#5 Write row.*/ - movq [DST4+YSTRIDE],mm5 - /*#6 Load low residue.*/ - movq mm1,[12*8+RESIDUE] - /*#6 Load high residue.*/ - movq mm2,[13*8+RESIDUE] - /*#7 Load low residue.*/ - movq mm3,[14*8+RESIDUE] - /*#7 Load high residue.*/ - movq mm4,[15*8+RESIDUE] - /*#6 Bias low residue.*/ - paddsw mm1,mm0 - /*#6 Bias high residue.*/ - paddsw mm2,mm0 - /*#6 Pack to byte.*/ - packuswb mm1,mm2 - /*#7 Bias low residue.*/ - paddsw mm3,mm0 - /*#7 Bias high residue.*/ - paddsw mm4,mm0 - /*#7 Pack to byte.*/ - packuswb mm3,mm4 - /*#6 Write row.*/ - movq [DST4+YSTRIDE*2],mm1 - /*#7 Write row.*/ - movq [DST4+YSTRIDE3],mm3 -#undef DST -#undef DST4 -#undef YSTRIDE -#undef YSTRIDE3 -#undef RESIDUE - } -} - -void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, - int _ystride,const ogg_int16_t *_residue){ - int i; - /*Zero mm0.*/ - __asm pxor mm0,mm0; - for(i=4;i-->0;){ - __asm{ -#define DST edx -#define SRC ecx -#define YSTRIDE edi -#define RESIDUE eax - mov DST,_dst - mov SRC,_src - mov YSTRIDE,_ystride - mov RESIDUE,_residue - /*#0 Load source.*/ - movq mm3,[SRC] - /*#1 Load source.*/ - movq mm7,[SRC+YSTRIDE] - /*#0 Get copy of src.*/ - movq mm4,mm3 - /*#0 Expand high source.*/ - punpckhbw mm4,mm0 - /*#0 Expand low source.*/ - punpcklbw mm3,mm0 - /*#0 Add residue high.*/ - paddsw mm4,[8+RESIDUE] - /*#1 Get copy of src.*/ - movq mm2,mm7 - /*#0 Add residue low.*/ - paddsw mm3,[RESIDUE] - /*#1 Expand high source.*/ - punpckhbw mm2,mm0 - /*#0 Pack final row pixels.*/ - packuswb mm3,mm4 - /*#1 Expand low source.*/ - punpcklbw mm7,mm0 - /*#1 Add residue low.*/ - paddsw mm7,[16+RESIDUE] - /*#1 Add residue high.*/ - paddsw mm2,[24+RESIDUE] - /*Advance residue.*/ - lea RESIDUE,[32+RESIDUE] - /*#1 Pack final row pixels.*/ - packuswb mm7,mm2 - /*Advance src.*/ - lea SRC,[SRC+YSTRIDE*2] - /*#0 Write row.*/ - movq [DST],mm3 - /*#1 Write row.*/ - movq [DST+YSTRIDE],mm7 - /*Advance dst.*/ - lea DST,[DST+YSTRIDE*2] - mov _residue,RESIDUE - mov _dst,DST - mov _src,SRC -#undef DST -#undef SRC -#undef YSTRIDE -#undef RESIDUE - } - } -} - -void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ - int i; - /*Zero mm7.*/ - __asm pxor mm7,mm7; - for(i=4;i-->0;){ - __asm{ -#define SRC1 ecx -#define SRC2 edi -#define YSTRIDE esi -#define RESIDUE edx -#define DST eax - mov YSTRIDE,_ystride - mov DST,_dst - mov RESIDUE,_residue - mov SRC1,_src1 - mov SRC2,_src2 - /*#0 Load src1.*/ - movq mm0,[SRC1] - /*#0 Load src2.*/ - movq mm2,[SRC2] - /*#0 Copy src1.*/ - movq mm1,mm0 - /*#0 Copy src2.*/ - movq mm3,mm2 - /*#1 Load src1.*/ - movq mm4,[SRC1+YSTRIDE] - /*#0 Unpack lower src1.*/ - punpcklbw mm0,mm7 - /*#1 Load src2.*/ - movq mm5,[SRC2+YSTRIDE] - /*#0 Unpack higher src1.*/ - punpckhbw mm1,mm7 - /*#0 Unpack lower src2.*/ - punpcklbw mm2,mm7 - /*#0 Unpack higher src2.*/ - punpckhbw mm3,mm7 - /*Advance src1 ptr.*/ - lea SRC1,[SRC1+YSTRIDE*2] - /*Advance src2 ptr.*/ - lea SRC2,[SRC2+YSTRIDE*2] - /*#0 Lower src1+src2.*/ - paddsw mm0,mm2 - /*#0 Higher src1+src2.*/ - paddsw mm1,mm3 - /*#1 Copy src1.*/ - movq mm2,mm4 - /*#0 Build lo average.*/ - psraw mm0,1 - /*#1 Copy src2.*/ - movq mm3,mm5 - /*#1 Unpack lower src1.*/ - punpcklbw mm4,mm7 - /*#0 Build hi average.*/ - psraw mm1,1 - /*#1 Unpack higher src1.*/ - punpckhbw mm2,mm7 - /*#0 low+=residue.*/ - paddsw mm0,[RESIDUE] - /*#1 Unpack lower src2.*/ - punpcklbw mm5,mm7 - /*#0 high+=residue.*/ - paddsw mm1,[8+RESIDUE] - /*#1 Unpack higher src2.*/ - punpckhbw mm3,mm7 - /*#1 Lower src1+src2.*/ - paddsw mm5,mm4 - /*#0 Pack and saturate.*/ - packuswb mm0,mm1 - /*#1 Higher src1+src2.*/ - paddsw mm3,mm2 - /*#0 Write row.*/ - movq [DST],mm0 - /*#1 Build lo average.*/ - psraw mm5,1 - /*#1 Build hi average.*/ - psraw mm3,1 - /*#1 low+=residue.*/ - paddsw mm5,[16+RESIDUE] - /*#1 high+=residue.*/ - paddsw mm3,[24+RESIDUE] - /*#1 Pack and saturate.*/ - packuswb mm5,mm3 - /*#1 Write row ptr.*/ - movq [DST+YSTRIDE],mm5 - /*Advance residue ptr.*/ - add RESIDUE,32 - /*Advance dest ptr.*/ - lea DST,[DST+YSTRIDE*2] - mov _dst,DST - mov _residue,RESIDUE - mov _src1,SRC1 - mov _src2,SRC2 -#undef SRC1 -#undef SRC2 -#undef YSTRIDE -#undef RESIDUE -#undef DST - } - } -} - -void oc_restore_fpu_mmx(void){ - __asm emms; -} - -#endif diff --git a/drivers/theora/x86_vc/mmxfrag.h b/drivers/theora/x86_vc/mmxfrag.h deleted file mode 100644 index 45ee93e777..0000000000 --- a/drivers/theora/x86_vc/mmxfrag.h +++ /dev/null @@ -1,61 +0,0 @@ -#if !defined(_x86_vc_mmxfrag_H) -# define _x86_vc_mmxfrag_H (1) -# include -# include "x86int.h" - -#if defined(OC_X86_ASM) - -/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes - between rows.*/ -#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ - do{ \ - const unsigned char *src; \ - unsigned char *dst; \ - src=(_src); \ - dst=(_dst); \ - __asm mov SRC,src \ - __asm mov DST,dst \ - __asm mov YSTRIDE,_ystride \ - /*src+0*ystride*/ \ - __asm movq mm0,[SRC] \ - /*src+1*ystride*/ \ - __asm movq mm1,[SRC+YSTRIDE] \ - /*ystride3=ystride*3*/ \ - __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ - /*src+2*ystride*/ \ - __asm movq mm2,[SRC+YSTRIDE*2] \ - /*src+3*ystride*/ \ - __asm movq mm3,[SRC+YSTRIDE3] \ - /*dst+0*ystride*/ \ - __asm movq [DST],mm0 \ - /*dst+1*ystride*/ \ - __asm movq [DST+YSTRIDE],mm1 \ - /*Pointer to next 4.*/ \ - __asm lea SRC,[SRC+YSTRIDE*4] \ - /*dst+2*ystride*/ \ - __asm movq [DST+YSTRIDE*2],mm2 \ - /*dst+3*ystride*/ \ - __asm movq [DST+YSTRIDE3],mm3 \ - /*Pointer to next 4.*/ \ - __asm lea DST,[DST+YSTRIDE*4] \ - /*src+0*ystride*/ \ - __asm movq mm0,[SRC] \ - /*src+1*ystride*/ \ - __asm movq mm1,[SRC+YSTRIDE] \ - /*src+2*ystride*/ \ - __asm movq mm2,[SRC+YSTRIDE*2] \ - /*src+3*ystride*/ \ - __asm movq mm3,[SRC+YSTRIDE3] \ - /*dst+0*ystride*/ \ - __asm movq [DST],mm0 \ - /*dst+1*ystride*/ \ - __asm movq [DST+YSTRIDE],mm1 \ - /*dst+2*ystride*/ \ - __asm movq [DST+YSTRIDE*2],mm2 \ - /*dst+3*ystride*/ \ - __asm movq [DST+YSTRIDE3],mm3 \ - } \ - while(0) - -# endif -#endif diff --git a/drivers/theora/x86_vc/mmxidct.c b/drivers/theora/x86_vc/mmxidct.c deleted file mode 100644 index 8f5ff6803c..0000000000 --- a/drivers/theora/x86_vc/mmxidct.c +++ /dev/null @@ -1,562 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -/*MMX acceleration of Theora's iDCT. - Originally written by Rudolf Marek, based on code from On2's VP3.*/ -#include "x86int.h" -#include "../dct.h" - -#if defined(OC_X86_ASM) - -/*These are offsets into the table of constants below.*/ -/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ -#define OC_COSINE_OFFSET (0) -/*A row of 8's.*/ -#define OC_EIGHT_OFFSET (56) - - - -/*A table of constants used by the MMX routines.*/ -static const __declspec(align(16))ogg_uint16_t - OC_IDCT_CONSTS[(7+1)*4]={ - (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, - (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, - (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, - (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, - (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, - (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, - (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, - (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, - (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, - (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, - (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, - (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, - (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, - (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, - 8, 8, 8, 8 -}; - -/*38 cycles*/ -#define OC_IDCT_BEGIN __asm{ \ - __asm movq mm2,OC_I(3) \ - __asm movq mm6,OC_C(3) \ - __asm movq mm4,mm2 \ - __asm movq mm7,OC_J(5) \ - __asm pmulhw mm4,mm6 \ - __asm movq mm1,OC_C(5) \ - __asm pmulhw mm6,mm7 \ - __asm movq mm5,mm1 \ - __asm pmulhw mm1,mm2 \ - __asm movq mm3,OC_I(1) \ - __asm pmulhw mm5,mm7 \ - __asm movq mm0,OC_C(1) \ - __asm paddw mm4,mm2 \ - __asm paddw mm6,mm7 \ - __asm paddw mm2,mm1 \ - __asm movq mm1,OC_J(7) \ - __asm paddw mm7,mm5 \ - __asm movq mm5,mm0 \ - __asm pmulhw mm0,mm3 \ - __asm paddw mm4,mm7 \ - __asm pmulhw mm5,mm1 \ - __asm movq mm7,OC_C(7) \ - __asm psubw mm6,mm2 \ - __asm paddw mm0,mm3 \ - __asm pmulhw mm3,mm7 \ - __asm movq mm2,OC_I(2) \ - __asm pmulhw mm7,mm1 \ - __asm paddw mm5,mm1 \ - __asm movq mm1,mm2 \ - __asm pmulhw mm2,OC_C(2) \ - __asm psubw mm3,mm5 \ - __asm movq mm5,OC_J(6) \ - __asm paddw mm0,mm7 \ - __asm movq mm7,mm5 \ - __asm psubw mm0,mm4 \ - __asm pmulhw mm5,OC_C(2) \ - __asm paddw mm2,mm1 \ - __asm pmulhw mm1,OC_C(6) \ - __asm paddw mm4,mm4 \ - __asm paddw mm4,mm0 \ - __asm psubw mm3,mm6 \ - __asm paddw mm5,mm7 \ - __asm paddw mm6,mm6 \ - __asm pmulhw mm7,OC_C(6) \ - __asm paddw mm6,mm3 \ - __asm movq OC_I(1),mm4 \ - __asm psubw mm1,mm5 \ - __asm movq mm4,OC_C(4) \ - __asm movq mm5,mm3 \ - __asm pmulhw mm3,mm4 \ - __asm paddw mm7,mm2 \ - __asm movq OC_I(2),mm6 \ - __asm movq mm2,mm0 \ - __asm movq mm6,OC_I(0) \ - __asm pmulhw mm0,mm4 \ - __asm paddw mm5,mm3 \ - __asm movq mm3,OC_J(4) \ - __asm psubw mm5,mm1 \ - __asm paddw mm2,mm0 \ - __asm psubw mm6,mm3 \ - __asm movq mm0,mm6 \ - __asm pmulhw mm6,mm4 \ - __asm paddw mm3,mm3 \ - __asm paddw mm1,mm1 \ - __asm paddw mm3,mm0 \ - __asm paddw mm1,mm5 \ - __asm pmulhw mm4,mm3 \ - __asm paddw mm6,mm0 \ - __asm psubw mm6,mm2 \ - __asm paddw mm2,mm2 \ - __asm movq mm0,OC_I(1) \ - __asm paddw mm2,mm6 \ - __asm paddw mm4,mm3 \ - __asm psubw mm2,mm1 \ -} - -/*38+8=46 cycles.*/ -#define OC_ROW_IDCT __asm{ \ - OC_IDCT_BEGIN \ - /*r3=D'*/ \ - __asm movq mm3,OC_I(2) \ - /*r4=E'=E-G*/ \ - __asm psubw mm4,mm7 \ - /*r1=H'+H'*/ \ - __asm paddw mm1,mm1 \ - /*r7=G+G*/ \ - __asm paddw mm7,mm7 \ - /*r1=R1=A''+H'*/ \ - __asm paddw mm1,mm2 \ - /*r7=G'=E+G*/ \ - __asm paddw mm7,mm4 \ - /*r4=R4=E'-D'*/ \ - __asm psubw mm4,mm3 \ - __asm paddw mm3,mm3 \ - /*r6=R6=F'-B''*/ \ - __asm psubw mm6,mm5 \ - __asm paddw mm5,mm5 \ - /*r3=R3=E'+D'*/ \ - __asm paddw mm3,mm4 \ - /*r5=R5=F'+B''*/ \ - __asm paddw mm5,mm6 \ - /*r7=R7=G'-C'*/ \ - __asm psubw mm7,mm0 \ - __asm paddw mm0,mm0 \ - /*Save R1.*/ \ - __asm movq OC_I(1),mm1 \ - /*r0=R0=G.+C.*/ \ - __asm paddw mm0,mm7 \ -} - -/*The following macro does two 4x4 transposes in place. - At entry, we assume: - r0 = a3 a2 a1 a0 - I(1) = b3 b2 b1 b0 - r2 = c3 c2 c1 c0 - r3 = d3 d2 d1 d0 - - r4 = e3 e2 e1 e0 - r5 = f3 f2 f1 f0 - r6 = g3 g2 g1 g0 - r7 = h3 h2 h1 h0 - - At exit, we have: - I(0) = d0 c0 b0 a0 - I(1) = d1 c1 b1 a1 - I(2) = d2 c2 b2 a2 - I(3) = d3 c3 b3 a3 - - J(4) = h0 g0 f0 e0 - J(5) = h1 g1 f1 e1 - J(6) = h2 g2 f2 e2 - J(7) = h3 g3 f3 e3 - - I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. - J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. - - Since r1 is free at entry, we calculate the Js first.*/ -/*19 cycles.*/ -#define OC_TRANSPOSE __asm{ \ - __asm movq mm1,mm4 \ - __asm punpcklwd mm4,mm5 \ - __asm movq OC_I(0),mm0 \ - __asm punpckhwd mm1,mm5 \ - __asm movq mm0,mm6 \ - __asm punpcklwd mm6,mm7 \ - __asm movq mm5,mm4 \ - __asm punpckldq mm4,mm6 \ - __asm punpckhdq mm5,mm6 \ - __asm movq mm6,mm1 \ - __asm movq OC_J(4),mm4 \ - __asm punpckhwd mm0,mm7 \ - __asm movq OC_J(5),mm5 \ - __asm punpckhdq mm6,mm0 \ - __asm movq mm4,OC_I(0) \ - __asm punpckldq mm1,mm0 \ - __asm movq mm5,OC_I(1) \ - __asm movq mm0,mm4 \ - __asm movq OC_J(7),mm6 \ - __asm punpcklwd mm0,mm5 \ - __asm movq OC_J(6),mm1 \ - __asm punpckhwd mm4,mm5 \ - __asm movq mm5,mm2 \ - __asm punpcklwd mm2,mm3 \ - __asm movq mm1,mm0 \ - __asm punpckldq mm0,mm2 \ - __asm punpckhdq mm1,mm2 \ - __asm movq mm2,mm4 \ - __asm movq OC_I(0),mm0 \ - __asm punpckhwd mm5,mm3 \ - __asm movq OC_I(1),mm1 \ - __asm punpckhdq mm4,mm5 \ - __asm punpckldq mm2,mm5 \ - __asm movq OC_I(3),mm4 \ - __asm movq OC_I(2),mm2 \ -} - -/*38+19=57 cycles.*/ -#define OC_COLUMN_IDCT __asm{ \ - OC_IDCT_BEGIN \ - __asm paddw mm2,OC_8 \ - /*r1=H'+H'*/ \ - __asm paddw mm1,mm1 \ - /*r1=R1=A''+H'*/ \ - __asm paddw mm1,mm2 \ - /*r2=NR2*/ \ - __asm psraw mm2,4 \ - /*r4=E'=E-G*/ \ - __asm psubw mm4,mm7 \ - /*r1=NR1*/ \ - __asm psraw mm1,4 \ - /*r3=D'*/ \ - __asm movq mm3,OC_I(2) \ - /*r7=G+G*/ \ - __asm paddw mm7,mm7 \ - /*Store NR2 at I(2).*/ \ - __asm movq OC_I(2),mm2 \ - /*r7=G'=E+G*/ \ - __asm paddw mm7,mm4 \ - /*Store NR1 at I(1).*/ \ - __asm movq OC_I(1),mm1 \ - /*r4=R4=E'-D'*/ \ - __asm psubw mm4,mm3 \ - __asm paddw mm4,OC_8 \ - /*r3=D'+D'*/ \ - __asm paddw mm3,mm3 \ - /*r3=R3=E'+D'*/ \ - __asm paddw mm3,mm4 \ - /*r4=NR4*/ \ - __asm psraw mm4,4 \ - /*r6=R6=F'-B''*/ \ - __asm psubw mm6,mm5 \ - /*r3=NR3*/ \ - __asm psraw mm3,4 \ - __asm paddw mm6,OC_8 \ - /*r5=B''+B''*/ \ - __asm paddw mm5,mm5 \ - /*r5=R5=F'+B''*/ \ - __asm paddw mm5,mm6 \ - /*r6=NR6*/ \ - __asm psraw mm6,4 \ - /*Store NR4 at J(4).*/ \ - __asm movq OC_J(4),mm4 \ - /*r5=NR5*/ \ - __asm psraw mm5,4 \ - /*Store NR3 at I(3).*/ \ - __asm movq OC_I(3),mm3 \ - /*r7=R7=G'-C'*/ \ - __asm psubw mm7,mm0 \ - __asm paddw mm7,OC_8 \ - /*r0=C'+C'*/ \ - __asm paddw mm0,mm0 \ - /*r0=R0=G'+C'*/ \ - __asm paddw mm0,mm7 \ - /*r7=NR7*/ \ - __asm psraw mm7,4 \ - /*Store NR6 at J(6).*/ \ - __asm movq OC_J(6),mm6 \ - /*r0=NR0*/ \ - __asm psraw mm0,4 \ - /*Store NR5 at J(5).*/ \ - __asm movq OC_J(5),mm5 \ - /*Store NR7 at J(7).*/ \ - __asm movq OC_J(7),mm7 \ - /*Store NR0 at I(0).*/ \ - __asm movq OC_I(0),mm0 \ -} - -#define OC_MID(_m,_i) [CONSTS+_m+(_i)*8] -#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) -#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) - -static void oc_idct8x8_slow(ogg_int16_t _y[64]){ - /*This routine accepts an 8x8 matrix, but in partially transposed form. - Every 4x4 block is transposed.*/ - __asm{ -#define CONSTS eax -#define Y edx - mov CONSTS,offset OC_IDCT_CONSTS - mov Y,_y -#define OC_I(_k) [Y+_k*16] -#define OC_J(_k) [Y+(_k-4)*16+8] - OC_ROW_IDCT - OC_TRANSPOSE -#undef OC_I -#undef OC_J -#define OC_I(_k) [Y+(_k*16)+64] -#define OC_J(_k) [Y+(_k-4)*16+72] - OC_ROW_IDCT - OC_TRANSPOSE -#undef OC_I -#undef OC_J -#define OC_I(_k) [Y+_k*16] -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT -#undef OC_I -#undef OC_J -#define OC_I(_k) [Y+_k*16+8] -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT -#undef OC_I -#undef OC_J -#undef CONSTS -#undef Y - } -} - -/*25 cycles.*/ -#define OC_IDCT_BEGIN_10 __asm{ \ - __asm movq mm2,OC_I(3) \ - __asm nop \ - __asm movq mm6,OC_C(3) \ - __asm movq mm4,mm2 \ - __asm movq mm1,OC_C(5) \ - __asm pmulhw mm4,mm6 \ - __asm movq mm3,OC_I(1) \ - __asm pmulhw mm1,mm2 \ - __asm movq mm0,OC_C(1) \ - __asm paddw mm4,mm2 \ - __asm pxor mm6,mm6 \ - __asm paddw mm2,mm1 \ - __asm movq mm5,OC_I(2) \ - __asm pmulhw mm0,mm3 \ - __asm movq mm1,mm5 \ - __asm paddw mm0,mm3 \ - __asm pmulhw mm3,OC_C(7) \ - __asm psubw mm6,mm2 \ - __asm pmulhw mm5,OC_C(2) \ - __asm psubw mm0,mm4 \ - __asm movq mm7,OC_I(2) \ - __asm paddw mm4,mm4 \ - __asm paddw mm7,mm5 \ - __asm paddw mm4,mm0 \ - __asm pmulhw mm1,OC_C(6) \ - __asm psubw mm3,mm6 \ - __asm movq OC_I(1),mm4 \ - __asm paddw mm6,mm6 \ - __asm movq mm4,OC_C(4) \ - __asm paddw mm6,mm3 \ - __asm movq mm5,mm3 \ - __asm pmulhw mm3,mm4 \ - __asm movq OC_I(2),mm6 \ - __asm movq mm2,mm0 \ - __asm movq mm6,OC_I(0) \ - __asm pmulhw mm0,mm4 \ - __asm paddw mm5,mm3 \ - __asm paddw mm2,mm0 \ - __asm psubw mm5,mm1 \ - __asm pmulhw mm6,mm4 \ - __asm paddw mm6,OC_I(0) \ - __asm paddw mm1,mm1 \ - __asm movq mm4,mm6 \ - __asm paddw mm1,mm5 \ - __asm psubw mm6,mm2 \ - __asm paddw mm2,mm2 \ - __asm movq mm0,OC_I(1) \ - __asm paddw mm2,mm6 \ - __asm psubw mm2,mm1 \ - __asm nop \ -} - -/*25+8=33 cycles.*/ -#define OC_ROW_IDCT_10 __asm{ \ - OC_IDCT_BEGIN_10 \ - /*r3=D'*/ \ - __asm movq mm3,OC_I(2) \ - /*r4=E'=E-G*/ \ - __asm psubw mm4,mm7 \ - /*r1=H'+H'*/ \ - __asm paddw mm1,mm1 \ - /*r7=G+G*/ \ - __asm paddw mm7,mm7 \ - /*r1=R1=A''+H'*/ \ - __asm paddw mm1,mm2 \ - /*r7=G'=E+G*/ \ - __asm paddw mm7,mm4 \ - /*r4=R4=E'-D'*/ \ - __asm psubw mm4,mm3 \ - __asm paddw mm3,mm3 \ - /*r6=R6=F'-B''*/ \ - __asm psubw mm6,mm5 \ - __asm paddw mm5,mm5 \ - /*r3=R3=E'+D'*/ \ - __asm paddw mm3,mm4 \ - /*r5=R5=F'+B''*/ \ - __asm paddw mm5,mm6 \ - /*r7=R7=G'-C'*/ \ - __asm psubw mm7,mm0 \ - __asm paddw mm0,mm0 \ - /*Save R1.*/ \ - __asm movq OC_I(1),mm1 \ - /*r0=R0=G'+C'*/ \ - __asm paddw mm0,mm7 \ -} - -/*25+19=44 cycles'*/ -#define OC_COLUMN_IDCT_10 __asm{ \ - OC_IDCT_BEGIN_10 \ - __asm paddw mm2,OC_8 \ - /*r1=H'+H'*/ \ - __asm paddw mm1,mm1 \ - /*r1=R1=A''+H'*/ \ - __asm paddw mm1,mm2 \ - /*r2=NR2*/ \ - __asm psraw mm2,4 \ - /*r4=E'=E-G*/ \ - __asm psubw mm4,mm7 \ - /*r1=NR1*/ \ - __asm psraw mm1,4 \ - /*r3=D'*/ \ - __asm movq mm3,OC_I(2) \ - /*r7=G+G*/ \ - __asm paddw mm7,mm7 \ - /*Store NR2 at I(2).*/ \ - __asm movq OC_I(2),mm2 \ - /*r7=G'=E+G*/ \ - __asm paddw mm7,mm4 \ - /*Store NR1 at I(1).*/ \ - __asm movq OC_I(1),mm1 \ - /*r4=R4=E'-D'*/ \ - __asm psubw mm4,mm3 \ - __asm paddw mm4,OC_8 \ - /*r3=D'+D'*/ \ - __asm paddw mm3,mm3 \ - /*r3=R3=E'+D'*/ \ - __asm paddw mm3,mm4 \ - /*r4=NR4*/ \ - __asm psraw mm4,4 \ - /*r6=R6=F'-B''*/ \ - __asm psubw mm6,mm5 \ - /*r3=NR3*/ \ - __asm psraw mm3,4 \ - __asm paddw mm6,OC_8 \ - /*r5=B''+B''*/ \ - __asm paddw mm5,mm5 \ - /*r5=R5=F'+B''*/ \ - __asm paddw mm5,mm6 \ - /*r6=NR6*/ \ - __asm psraw mm6,4 \ - /*Store NR4 at J(4).*/ \ - __asm movq OC_J(4),mm4 \ - /*r5=NR5*/ \ - __asm psraw mm5,4 \ - /*Store NR3 at I(3).*/ \ - __asm movq OC_I(3),mm3 \ - /*r7=R7=G'-C'*/ \ - __asm psubw mm7,mm0 \ - __asm paddw mm7,OC_8 \ - /*r0=C'+C'*/ \ - __asm paddw mm0,mm0 \ - /*r0=R0=G'+C'*/ \ - __asm paddw mm0,mm7 \ - /*r7=NR7*/ \ - __asm psraw mm7,4 \ - /*Store NR6 at J(6).*/ \ - __asm movq OC_J(6),mm6 \ - /*r0=NR0*/ \ - __asm psraw mm0,4 \ - /*Store NR5 at J(5).*/ \ - __asm movq OC_J(5),mm5 \ - /*Store NR7 at J(7).*/ \ - __asm movq OC_J(7),mm7 \ - /*Store NR0 at I(0).*/ \ - __asm movq OC_I(0),mm0 \ -} - -static void oc_idct8x8_10(ogg_int16_t _y[64]){ - __asm{ -#define CONSTS eax -#define Y edx - mov CONSTS,offset OC_IDCT_CONSTS - mov Y,_y -#define OC_I(_k) [Y+_k*16] -#define OC_J(_k) [Y+(_k-4)*16+8] - /*Done with dequant, descramble, and partial transpose. - Now do the iDCT itself.*/ - OC_ROW_IDCT_10 - OC_TRANSPOSE -#undef OC_I -#undef OC_J -#define OC_I(_k) [Y+_k*16] -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT_10 -#undef OC_I -#undef OC_J -#define OC_I(_k) [Y+_k*16+8] -#define OC_J(_k) OC_I(_k) - OC_COLUMN_IDCT_10 -#undef OC_I -#undef OC_J -#undef CONSTS -#undef Y - } -} - -/*Performs an inverse 8x8 Type-II DCT transform. - The input is assumed to be scaled by a factor of 4 relative to orthonormal - version of the transform.*/ -void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ - /*_last_zzi is subtly different from an actual count of the number of - coefficients we decoded for this block. - It contains the value of zzi BEFORE the final token in the block was - decoded. - In most cases this is an EOB token (the continuation of an EOB run from a - previous block counts), and so this is the same as the coefficient count. - However, in the case that the last token was NOT an EOB token, but filled - the block up with exactly 64 coefficients, _last_zzi will be less than 64. - Provided the last token was not a pure zero run, the minimum value it can - be is 46, and so that doesn't affect any of the cases in this routine. - However, if the last token WAS a pure zero run of length 63, then _last_zzi - will be 1 while the number of coefficients decoded is 64. - Thus, we will trigger the following special case, where the real - coefficient count would not. - Note also that a zero run of length 64 will give _last_zzi a value of 0, - but we still process the DC coefficient, which might have a non-zero value - due to DC prediction. - Although convoluted, this is arguably the correct behavior: it allows us to - use a smaller transform when the block ends with a long zero run instead - of a normal EOB token. - It could be smarter... multiple separate zero runs at the end of a block - will fool it, but an encoder that generates these really deserves what it - gets. - Needless to say we inherited this approach from VP3.*/ - /*Perform the iDCT.*/ - if(_last_zzi<10)oc_idct8x8_10(_y); - else oc_idct8x8_slow(_y); -} - -#endif diff --git a/drivers/theora/x86_vc/mmxloop.h b/drivers/theora/x86_vc/mmxloop.h deleted file mode 100644 index 2561fca2ae..0000000000 --- a/drivers/theora/x86_vc/mmxloop.h +++ /dev/null @@ -1,219 +0,0 @@ -#if !defined(_x86_vc_mmxloop_H) -# define _x86_vc_mmxloop_H (1) -# include -# include "x86int.h" - -#if defined(OC_X86_ASM) - -/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. - On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and - mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ -#define OC_LOOP_FILTER8_MMX __asm{ \ - /*mm7=0*/ \ - __asm pxor mm7,mm7 \ - /*mm6:mm0={a0,...,a7}*/ \ - __asm movq mm6,mm0 \ - __asm punpcklbw mm0,mm7 \ - __asm punpckhbw mm6,mm7 \ - /*mm3:mm5={d0,...,d7}*/ \ - __asm movq mm5,mm3 \ - __asm punpcklbw mm3,mm7 \ - __asm punpckhbw mm5,mm7 \ - /*mm6:mm0={a0-d0,...,a7-d7}*/ \ - __asm psubw mm0,mm3 \ - __asm psubw mm6,mm5 \ - /*mm3:mm1={b0,...,b7}*/ \ - __asm movq mm3,mm1 \ - __asm punpcklbw mm1,mm7 \ - __asm movq mm4,mm2 \ - __asm punpckhbw mm3,mm7 \ - /*mm5:mm4={c0,...,c7}*/ \ - __asm movq mm5,mm2 \ - __asm punpcklbw mm4,mm7 \ - __asm punpckhbw mm5,mm7 \ - /*mm7={3}x4 \ - mm5:mm4={c0-b0,...,c7-b7}*/ \ - __asm pcmpeqw mm7,mm7 \ - __asm psubw mm4,mm1 \ - __asm psrlw mm7,14 \ - __asm psubw mm5,mm3 \ - /*Scale by 3.*/ \ - __asm pmullw mm4,mm7 \ - __asm pmullw mm5,mm7 \ - /*mm7={4}x4 \ - mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ - __asm psrlw mm7,1 \ - __asm paddw mm4,mm0 \ - __asm psllw mm7,2 \ - __asm movq mm0,[LL] \ - __asm paddw mm5,mm6 \ - /*R_i has the range [-127,128], so we compute -R_i instead. \ - mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ - __asm psubw mm4,mm7 \ - __asm psubw mm5,mm7 \ - __asm psraw mm4,3 \ - __asm psraw mm5,3 \ - __asm pcmpeqb mm7,mm7 \ - __asm packsswb mm4,mm5 \ - __asm pxor mm6,mm6 \ - __asm pxor mm4,mm7 \ - __asm packuswb mm1,mm3 \ - /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ - /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ - we have to split things by sign (the other option is to work in 16 bits, \ - but working in 8 bits gives much better parallelism). \ - We compute abs(R_i), but save a mask of which terms were negative in mm6. \ - Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ - Finally, we split mm4 into positive and negative pieces using the mask in \ - mm6, and add and subtract them as appropriate.*/ \ - /*mm4=abs(-R_i)*/ \ - /*mm7=255-2*L*/ \ - __asm pcmpgtb mm6,mm4 \ - __asm psubb mm7,mm0 \ - __asm pxor mm4,mm6 \ - __asm psubb mm7,mm0 \ - __asm psubb mm4,mm6 \ - /*mm7=255-max(2*L-abs(R_i),0)*/ \ - __asm paddusb mm7,mm4 \ - /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ - __asm paddusb mm4,mm7 \ - __asm psubusb mm4,mm7 \ - /*Now split mm4 by the original sign of -R_i.*/ \ - __asm movq mm5,mm4 \ - __asm pand mm4,mm6 \ - __asm pandn mm6,mm5 \ - /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ - /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ - __asm paddusb mm1,mm4 \ - __asm psubusb mm2,mm4 \ - __asm psubusb mm1,mm6 \ - __asm paddusb mm2,mm6 \ -} - -#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ - do{ \ - /*Used local variable pix__ in order to fix compilation errors like: \ - "error C2425: 'SHL' : non-constant expression in 'second operand'".*/ \ - unsigned char *pix__; \ - unsigned char *ll__; \ - ll__=(_ll); \ - pix__=(_pix); \ - __asm mov YSTRIDE,_ystride \ - __asm mov LL,ll__ \ - __asm mov PIX,pix__ \ - __asm sub PIX,YSTRIDE \ - __asm sub PIX,YSTRIDE \ - /*mm0={a0,...,a7}*/ \ - __asm movq mm0,[PIX] \ - /*ystride3=_ystride*3*/ \ - __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ - /*mm3={d0,...,d7}*/ \ - __asm movq mm3,[PIX+YSTRIDE3] \ - /*mm1={b0,...,b7}*/ \ - __asm movq mm1,[PIX+YSTRIDE] \ - /*mm2={c0,...,c7}*/ \ - __asm movq mm2,[PIX+YSTRIDE*2] \ - OC_LOOP_FILTER8_MMX \ - /*Write it back out.*/ \ - __asm movq [PIX+YSTRIDE],mm1 \ - __asm movq [PIX+YSTRIDE*2],mm2 \ - } \ - while(0) - -#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ - do{ \ - /*Used local variable ll__ in order to fix compilation errors like: \ - "error C2443: operand size conflict".*/ \ - unsigned char *ll__; \ - unsigned char *pix__; \ - ll__=(_ll); \ - pix__=(_pix)-2; \ - __asm mov PIX,pix__ \ - __asm mov YSTRIDE,_ystride \ - __asm mov LL,ll__ \ - /*x x x x d0 c0 b0 a0*/ \ - __asm movd mm0,[PIX] \ - /*x x x x d1 c1 b1 a1*/ \ - __asm movd mm1,[PIX+YSTRIDE] \ - /*ystride3=_ystride*3*/ \ - __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ - /*x x x x d2 c2 b2 a2*/ \ - __asm movd mm2,[PIX+YSTRIDE*2] \ - /*x x x x d3 c3 b3 a3*/ \ - __asm lea D,[PIX+YSTRIDE*4] \ - __asm movd mm3,[PIX+YSTRIDE3] \ - /*x x x x d4 c4 b4 a4*/ \ - __asm movd mm4,[D] \ - /*x x x x d5 c5 b5 a5*/ \ - __asm movd mm5,[D+YSTRIDE] \ - /*x x x x d6 c6 b6 a6*/ \ - __asm movd mm6,[D+YSTRIDE*2] \ - /*x x x x d7 c7 b7 a7*/ \ - __asm movd mm7,[D+YSTRIDE3] \ - /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ - __asm punpcklbw mm0,mm1 \ - /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ - __asm punpcklbw mm2,mm3 \ - /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ - __asm movq mm3,mm0 \ - /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ - __asm punpcklwd mm0,mm2 \ - /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ - __asm punpckhwd mm3,mm2 \ - /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ - __asm movq mm1,mm0 \ - /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ - __asm punpcklbw mm4,mm5 \ - /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ - __asm punpcklbw mm6,mm7 \ - /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ - __asm movq mm5,mm4 \ - /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ - __asm punpcklwd mm4,mm6 \ - /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ - __asm punpckhwd mm5,mm6 \ - /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ - __asm movq mm2,mm3 \ - /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ - __asm punpckldq mm0,mm4 \ - /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ - __asm punpckhdq mm1,mm4 \ - /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ - __asm punpckldq mm2,mm5 \ - /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ - __asm punpckhdq mm3,mm5 \ - OC_LOOP_FILTER8_MMX \ - /*mm2={b0+R_0'',...,b7+R_7''}*/ \ - __asm movq mm0,mm1 \ - /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ - __asm punpcklbw mm1,mm2 \ - /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ - __asm punpckhbw mm0,mm2 \ - /*[d]=c1 b1 c0 b0*/ \ - __asm movd D,mm1 \ - __asm mov [PIX+1],D_WORD \ - __asm psrlq mm1,32 \ - __asm shr D,16 \ - __asm mov [PIX+YSTRIDE+1],D_WORD \ - /*[d]=c3 b3 c2 b2*/ \ - __asm movd D,mm1 \ - __asm mov [PIX+YSTRIDE*2+1],D_WORD \ - __asm shr D,16 \ - __asm mov [PIX+YSTRIDE3+1],D_WORD \ - __asm lea PIX,[PIX+YSTRIDE*4] \ - /*[d]=c5 b5 c4 b4*/ \ - __asm movd D,mm0 \ - __asm mov [PIX+1],D_WORD \ - __asm psrlq mm0,32 \ - __asm shr D,16 \ - __asm mov [PIX+YSTRIDE+1],D_WORD \ - /*[d]=c7 b7 c6 b6*/ \ - __asm movd D,mm0 \ - __asm mov [PIX+YSTRIDE*2+1],D_WORD \ - __asm shr D,16 \ - __asm mov [PIX+YSTRIDE3+1],D_WORD \ - } \ - while(0) - -# endif -#endif diff --git a/drivers/theora/x86_vc/mmxstate.c b/drivers/theora/x86_vc/mmxstate.c deleted file mode 100644 index 73bd1981cf..0000000000 --- a/drivers/theora/x86_vc/mmxstate.c +++ /dev/null @@ -1,211 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: mmxstate.c 16584 2009-09-26 19:35:55Z tterribe $ - - ********************************************************************/ - -/*MMX acceleration of complete fragment reconstruction algorithm. - Originally written by Rudolf Marek.*/ -#include -#include "x86int.h" -#include "mmxfrag.h" -#include "mmxloop.h" - -#if defined(OC_X86_ASM) - -void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ - unsigned char *dst; - ptrdiff_t frag_buf_off; - int ystride; - int mb_mode; - /*Apply the inverse transform.*/ - /*Special case only having a DC component.*/ - if(_last_zzi<2){ - /*Note that this value must be unsigned, to keep the __asm__ block from - sign-extending it when it puts it in a register.*/ - ogg_uint16_t p; - /*We round this dequant product (and not any of the others) because there's - no iDCT rounding.*/ - p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); - /*Fill _dct_coeffs with p.*/ - __asm{ -#define Y eax -#define P ecx - mov Y,_dct_coeffs - movzx P,p - /*mm0=0000 0000 0000 AAAA*/ - movd mm0,P - /*mm0=0000 0000 AAAA AAAA*/ - punpcklwd mm0,mm0 - /*mm0=AAAA AAAA AAAA AAAA*/ - punpckldq mm0,mm0 - movq [Y],mm0 - movq [8+Y],mm0 - movq [16+Y],mm0 - movq [24+Y],mm0 - movq [32+Y],mm0 - movq [40+Y],mm0 - movq [48+Y],mm0 - movq [56+Y],mm0 - movq [64+Y],mm0 - movq [72+Y],mm0 - movq [80+Y],mm0 - movq [88+Y],mm0 - movq [96+Y],mm0 - movq [104+Y],mm0 - movq [112+Y],mm0 - movq [120+Y],mm0 -#undef Y -#undef P - } - } - else{ - /*Dequantize the DC coefficient.*/ - _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); - oc_idct8x8_mmx(_dct_coeffs,_last_zzi); - } - /*Fill in the target buffer.*/ - frag_buf_off=_state->frag_buf_offs[_fragi]; - mb_mode=_state->frags[_fragi].mb_mode; - ystride=_state->ref_ystride[_pli]; - dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; - if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); - else{ - const unsigned char *ref; - int mvoffsets[2]; - ref= - _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] - +frag_buf_off; - if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, - _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ - oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, - _dct_coeffs); - } - else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); - } -} - -/*We copy these entire function to inline the actual MMX routines so that we - use only a single indirect call.*/ - -/*Copies the fragments specified by the lists of fragment indices from one - frame to another. - _fragis: A pointer to a list of fragment indices. - _nfragis: The number of fragment indices to copy. - _dst_frame: The reference frame to copy to. - _src_frame: The reference frame to copy from. - _pli: The color plane the fragments lie in.*/ -void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli){ - const ptrdiff_t *frag_buf_offs; - const unsigned char *src_frame_data; - unsigned char *dst_frame_data; - ptrdiff_t fragii; - int ystride; - dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; - src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; - ystride=_state->ref_ystride[_pli]; - frag_buf_offs=_state->frag_buf_offs; - for(fragii=0;fragii<_nfragis;fragii++){ - ptrdiff_t frag_buf_off; - frag_buf_off=frag_buf_offs[_fragis[fragii]]; -#define SRC edx -#define DST eax -#define YSTRIDE ecx -#define YSTRIDE3 edi - OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, - src_frame_data+frag_buf_off,ystride); -#undef SRC -#undef DST -#undef YSTRIDE -#undef YSTRIDE3 - } -} - -/*Apply the loop filter to a given set of fragment rows in the given plane. - The filter may be run on the bottom edge, affecting pixels in the next row of - fragments, so this row also needs to be available. - _bv: The bounding values array. - _refi: The index of the frame buffer to filter. - _pli: The color plane to filter. - _fragy0: The Y coordinate of the first fragment row to filter. - _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ -void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ - OC_ALIGN8(unsigned char ll[8]); - const oc_fragment_plane *fplane; - const oc_fragment *frags; - const ptrdiff_t *frag_buf_offs; - unsigned char *ref_frame_data; - ptrdiff_t fragi_top; - ptrdiff_t fragi_bot; - ptrdiff_t fragi0; - ptrdiff_t fragi0_end; - int ystride; - int nhfrags; - memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); - fplane=_state->fplanes+_pli; - nhfrags=fplane->nhfrags; - fragi_top=fplane->froffset; - fragi_bot=fragi_top+fplane->nfrags; - fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; - fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; - ystride=_state->ref_ystride[_pli]; - frags=_state->frags; - frag_buf_offs=_state->frag_buf_offs; - ref_frame_data=_state->ref_frame_data[_refi]; - /*The following loops are constructed somewhat non-intuitively on purpose. - The main idea is: if a block boundary has at least one coded fragment on - it, the filter is applied to it. - However, the order that the filters are applied in matters, and VP3 chose - the somewhat strange ordering used below.*/ - while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); - if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); - if(fragi+1opt_vtable.frag_sub=oc_enc_frag_sub_mmx; - _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx; - _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; - _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; - _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx; - } - if(cpu_flags&OC_CPU_X86_MMXEXT){ - _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext; - _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext; - _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext; - _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext; - _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext; - _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext; - _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext; - } - if(cpu_flags&OC_CPU_X86_SSE2){ -# if defined(OC_X86_64_ASM) - _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2; -# endif - } -} -#endif diff --git a/drivers/theora/x86_vc/x86enc.h b/drivers/theora/x86_vc/x86enc.h deleted file mode 100644 index 581484641f..0000000000 --- a/drivers/theora/x86_vc/x86enc.h +++ /dev/null @@ -1,47 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $ - - ********************************************************************/ - -#if !defined(_x86_vc_x86enc_H) -# define _x86_vc_x86enc_H (1) -# include "../encint.h" -# include "x86int.h" - -void oc_enc_vtable_init_x86(oc_enc_ctx *_enc); - -unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride); -unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref,int _ystride,unsigned _thresh); -unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, - const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, - unsigned _thresh); -unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride); -void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64], - const unsigned char *_x,const unsigned char *_y,int _stride); -void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64], - const unsigned char *_x,int _stride); -void oc_enc_frag_copy2_mmxext(unsigned char *_dst, - const unsigned char *_src1,const unsigned char *_src2,int _ystride); -void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]); -void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]); - -#endif diff --git a/drivers/theora/x86_vc/x86int.h b/drivers/theora/x86_vc/x86int.h deleted file mode 100644 index 4cca485311..0000000000 --- a/drivers/theora/x86_vc/x86int.h +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#if !defined(_x86_vc_x86int_H) -# define _x86_vc_x86int_H (1) -# include "../internal.h" - -void oc_state_vtable_init_x86(oc_theora_state *_state); - -void oc_frag_copy_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride); -void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, - const ogg_int16_t *_residue); -void oc_frag_recon_inter_mmx(unsigned char *_dst, - const unsigned char *_src,int _ystride,const ogg_int16_t *_residue); -void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, - const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue); -void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi); -void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, - int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); -void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, - const ptrdiff_t *_fragis,ptrdiff_t _nfragis, - int _dst_frame,int _src_frame,int _pli); -void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, - int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); -void oc_restore_fpu_mmx(void); - -#endif diff --git a/drivers/theora/x86_vc/x86state.c b/drivers/theora/x86_vc/x86state.c deleted file mode 100644 index a786bec284..0000000000 --- a/drivers/theora/x86_vc/x86state.c +++ /dev/null @@ -1,62 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $ - - ********************************************************************/ - -#include "x86int.h" - -#if defined(OC_X86_ASM) - -#include "../cpu.c" - -/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into - each quadrant of the destination.*/ -static const unsigned char OC_FZIG_ZAG_MMX[128]={ - 0, 8, 1, 2, 9,16,24,17, - 10, 3,32,11,18,25, 4,12, - 5,26,19,40,33,34,41,48, - 27, 6,13,20,28,21,14, 7, - 56,49,42,35,43,50,57,36, - 15,22,29,30,23,44,37,58, - 51,59,38,45,52,31,60,53, - 46,39,47,54,61,62,55,63, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, - 64,64,64,64,64,64,64,64, -}; - -void oc_state_vtable_init_x86(oc_theora_state *_state){ - _state->cpu_flags=oc_cpu_flags_get(); - if(_state->cpu_flags&OC_CPU_X86_MMX){ - _state->opt_vtable.frag_copy=oc_frag_copy_mmx; - _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; - _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; - _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; - _state->opt_vtable.idct8x8=oc_idct8x8_mmx; - _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; - _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; - _state->opt_vtable.state_loop_filter_frag_rows= - oc_state_loop_filter_frag_rows_mmx; - _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; - _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; - } - else oc_state_vtable_init_c(_state); -} -#endif diff --git a/drivers/theora/yuv2rgb.h b/drivers/theora/yuv2rgb.h deleted file mode 100644 index 59101bd057..0000000000 --- a/drivers/theora/yuv2rgb.h +++ /dev/null @@ -1,1121 +0,0 @@ -#ifndef YUV2RGB_H -#define YUV2RGB_H - -#include "typedefs.h" - -static const uint32_t tables[256*3] = -{ - /* y_table */ - 0x7FFFFFEDU, - 0x7FFFFFEFU, - 0x7FFFFFF0U, - 0x7FFFFFF1U, - 0x7FFFFFF2U, - 0x7FFFFFF3U, - 0x7FFFFFF4U, - 0x7FFFFFF6U, - 0x7FFFFFF7U, - 0x7FFFFFF8U, - 0x7FFFFFF9U, - 0x7FFFFFFAU, - 0x7FFFFFFBU, - 0x7FFFFFFDU, - 0x7FFFFFFEU, - 0x7FFFFFFFU, - 0x80000000U, - 0x80400801U, - 0x80A01002U, - 0x80E01803U, - 0x81202805U, - 0x81803006U, - 0x81C03807U, - 0x82004008U, - 0x82604809U, - 0x82A0500AU, - 0x82E0600CU, - 0x8340680DU, - 0x8380700EU, - 0x83C0780FU, - 0x84208010U, - 0x84608811U, - 0x84A09813U, - 0x8500A014U, - 0x8540A815U, - 0x8580B016U, - 0x85E0B817U, - 0x8620C018U, - 0x8660D01AU, - 0x86C0D81BU, - 0x8700E01CU, - 0x8740E81DU, - 0x87A0F01EU, - 0x87E0F81FU, - 0x88210821U, - 0x88811022U, - 0x88C11823U, - 0x89012024U, - 0x89412825U, - 0x89A13026U, - 0x89E14028U, - 0x8A214829U, - 0x8A81502AU, - 0x8AC1582BU, - 0x8B01602CU, - 0x8B61682DU, - 0x8BA1782FU, - 0x8BE18030U, - 0x8C418831U, - 0x8C819032U, - 0x8CC19833U, - 0x8D21A034U, - 0x8D61B036U, - 0x8DA1B837U, - 0x8E01C038U, - 0x8E41C839U, - 0x8E81D03AU, - 0x8EE1D83BU, - 0x8F21E83DU, - 0x8F61F03EU, - 0x8FC1F83FU, - 0x90020040U, - 0x90420841U, - 0x90A21042U, - 0x90E22044U, - 0x91222845U, - 0x91823046U, - 0x91C23847U, - 0x92024048U, - 0x92624849U, - 0x92A2504AU, - 0x92E2604CU, - 0x9342684DU, - 0x9382704EU, - 0x93C2784FU, - 0x94228050U, - 0x94628851U, - 0x94A29853U, - 0x9502A054U, - 0x9542A855U, - 0x9582B056U, - 0x95E2B857U, - 0x9622C058U, - 0x9662D05AU, - 0x96C2D85BU, - 0x9702E05CU, - 0x9742E85DU, - 0x97A2F05EU, - 0x97E2F85FU, - 0x98230861U, - 0x98831062U, - 0x98C31863U, - 0x99032064U, - 0x99632865U, - 0x99A33066U, - 0x99E34068U, - 0x9A434869U, - 0x9A83506AU, - 0x9AC3586BU, - 0x9B23606CU, - 0x9B63686DU, - 0x9BA3786FU, - 0x9BE38070U, - 0x9C438871U, - 0x9C839072U, - 0x9CC39873U, - 0x9D23A074U, - 0x9D63B076U, - 0x9DA3B877U, - 0x9E03C078U, - 0x9E43C879U, - 0x9E83D07AU, - 0x9EE3D87BU, - 0x9F23E87DU, - 0x9F63F07EU, - 0x9FC3F87FU, - 0xA0040080U, - 0xA0440881U, - 0xA0A41082U, - 0xA0E42084U, - 0xA1242885U, - 0xA1843086U, - 0xA1C43887U, - 0xA2044088U, - 0xA2644889U, - 0xA2A4588BU, - 0xA2E4608CU, - 0xA344688DU, - 0xA384708EU, - 0xA3C4788FU, - 0xA4248090U, - 0xA4649092U, - 0xA4A49893U, - 0xA504A094U, - 0xA544A895U, - 0xA584B096U, - 0xA5E4B897U, - 0xA624C098U, - 0xA664D09AU, - 0xA6C4D89BU, - 0xA704E09CU, - 0xA744E89DU, - 0xA7A4F09EU, - 0xA7E4F89FU, - 0xA82508A1U, - 0xA88510A2U, - 0xA8C518A3U, - 0xA90520A4U, - 0xA96528A5U, - 0xA9A530A6U, - 0xA9E540A8U, - 0xAA4548A9U, - 0xAA8550AAU, - 0xAAC558ABU, - 0xAB2560ACU, - 0xAB6568ADU, - 0xABA578AFU, - 0xAC0580B0U, - 0xAC4588B1U, - 0xAC8590B2U, - 0xACE598B3U, - 0xAD25A0B4U, - 0xAD65B0B6U, - 0xADA5B8B7U, - 0xAE05C0B8U, - 0xAE45C8B9U, - 0xAE85D0BAU, - 0xAEE5D8BBU, - 0xAF25E8BDU, - 0xAF65F0BEU, - 0xAFC5F8BFU, - 0xB00600C0U, - 0xB04608C1U, - 0xB0A610C2U, - 0xB0E620C4U, - 0xB12628C5U, - 0xB18630C6U, - 0xB1C638C7U, - 0xB20640C8U, - 0xB26648C9U, - 0xB2A658CBU, - 0xB2E660CCU, - 0xB34668CDU, - 0xB38670CEU, - 0xB3C678CFU, - 0xB42680D0U, - 0xB46690D2U, - 0xB4A698D3U, - 0xB506A0D4U, - 0xB546A8D5U, - 0xB586B0D6U, - 0xB5E6B8D7U, - 0xB626C8D9U, - 0xB666D0DAU, - 0xB6C6D8DBU, - 0xB706E0DCU, - 0xB746E8DDU, - 0xB7A6F0DEU, - 0xB7E6F8DFU, - 0xB82708E1U, - 0xB88710E2U, - 0xB8C718E3U, - 0xB90720E4U, - 0xB96728E5U, - 0xB9A730E6U, - 0xB9E740E8U, - 0xBA4748E9U, - 0xBA8750EAU, - 0xBAC758EBU, - 0xBB2760ECU, - 0xBB6768EDU, - 0xBBA778EFU, - 0xBC0780F0U, - 0xBC4788F1U, - 0xBC8790F2U, - 0xBCE798F3U, - 0xBD27A0F4U, - 0xBD67B0F6U, - 0xBDC7B8F7U, - 0xBE07C0F8U, - 0xBE47C8F9U, - 0xBEA7D0FAU, - 0xBEE7D8FBU, - 0xBF27E8FDU, - 0xBF87F0FEU, - 0xBFC7F8FFU, - 0xC0080100U, - 0xC0480901U, - 0xC0A81102U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - 0xC0E82104U, - /* u_table */ - 0x0C400103U, - 0x0C200105U, - 0x0C200107U, - 0x0C000109U, - 0x0BE0010BU, - 0x0BC0010DU, - 0x0BA0010FU, - 0x0BA00111U, - 0x0B800113U, - 0x0B600115U, - 0x0B400117U, - 0x0B400119U, - 0x0B20011BU, - 0x0B00011DU, - 0x0AE0011FU, - 0x0AE00121U, - 0x0AC00123U, - 0x0AA00125U, - 0x0A800127U, - 0x0A600129U, - 0x0A60012BU, - 0x0A40012DU, - 0x0A20012FU, - 0x0A000131U, - 0x0A000132U, - 0x09E00134U, - 0x09C00136U, - 0x09A00138U, - 0x09A0013AU, - 0x0980013CU, - 0x0960013EU, - 0x09400140U, - 0x09400142U, - 0x09200144U, - 0x09000146U, - 0x08E00148U, - 0x08C0014AU, - 0x08C0014CU, - 0x08A0014EU, - 0x08800150U, - 0x08600152U, - 0x08600154U, - 0x08400156U, - 0x08200158U, - 0x0800015AU, - 0x0800015CU, - 0x07E0015EU, - 0x07C00160U, - 0x07A00162U, - 0x07A00164U, - 0x07800166U, - 0x07600168U, - 0x0740016AU, - 0x0720016CU, - 0x0720016EU, - 0x07000170U, - 0x06E00172U, - 0x06C00174U, - 0x06C00176U, - 0x06A00178U, - 0x0680017AU, - 0x0660017CU, - 0x0660017EU, - 0x06400180U, - 0x06200182U, - 0x06000184U, - 0x05E00185U, - 0x05E00187U, - 0x05C00189U, - 0x05A0018BU, - 0x0580018DU, - 0x0580018FU, - 0x05600191U, - 0x05400193U, - 0x05200195U, - 0x05200197U, - 0x05000199U, - 0x04E0019BU, - 0x04C0019DU, - 0x04C0019FU, - 0x04A001A1U, - 0x048001A3U, - 0x046001A5U, - 0x044001A7U, - 0x044001A9U, - 0x042001ABU, - 0x040001ADU, - 0x03E001AFU, - 0x03E001B1U, - 0x03C001B3U, - 0x03A001B5U, - 0x038001B7U, - 0x038001B9U, - 0x036001BBU, - 0x034001BDU, - 0x032001BFU, - 0x032001C1U, - 0x030001C3U, - 0x02E001C5U, - 0x02C001C7U, - 0x02A001C9U, - 0x02A001CBU, - 0x028001CDU, - 0x026001CFU, - 0x024001D1U, - 0x024001D3U, - 0x022001D5U, - 0x020001D7U, - 0x01E001D8U, - 0x01E001DAU, - 0x01C001DCU, - 0x01A001DEU, - 0x018001E0U, - 0x016001E2U, - 0x016001E4U, - 0x014001E6U, - 0x012001E8U, - 0x010001EAU, - 0x010001ECU, - 0x00E001EEU, - 0x00C001F0U, - 0x00A001F2U, - 0x00A001F4U, - 0x008001F6U, - 0x006001F8U, - 0x004001FAU, - 0x004001FCU, - 0x002001FEU, - 0x00000200U, - 0xFFE00202U, - 0xFFC00204U, - 0xFFC00206U, - 0xFFA00208U, - 0xFF80020AU, - 0xFF60020CU, - 0xFF60020EU, - 0xFF400210U, - 0xFF200212U, - 0xFF000214U, - 0xFF000216U, - 0xFEE00218U, - 0xFEC0021AU, - 0xFEA0021CU, - 0xFEA0021EU, - 0xFE800220U, - 0xFE600222U, - 0xFE400224U, - 0xFE200226U, - 0xFE200228U, - 0xFE000229U, - 0xFDE0022BU, - 0xFDC0022DU, - 0xFDC0022FU, - 0xFDA00231U, - 0xFD800233U, - 0xFD600235U, - 0xFD600237U, - 0xFD400239U, - 0xFD20023BU, - 0xFD00023DU, - 0xFCE0023FU, - 0xFCE00241U, - 0xFCC00243U, - 0xFCA00245U, - 0xFC800247U, - 0xFC800249U, - 0xFC60024BU, - 0xFC40024DU, - 0xFC20024FU, - 0xFC200251U, - 0xFC000253U, - 0xFBE00255U, - 0xFBC00257U, - 0xFBC00259U, - 0xFBA0025BU, - 0xFB80025DU, - 0xFB60025FU, - 0xFB400261U, - 0xFB400263U, - 0xFB200265U, - 0xFB000267U, - 0xFAE00269U, - 0xFAE0026BU, - 0xFAC0026DU, - 0xFAA0026FU, - 0xFA800271U, - 0xFA800273U, - 0xFA600275U, - 0xFA400277U, - 0xFA200279U, - 0xFA20027BU, - 0xFA00027CU, - 0xF9E0027EU, - 0xF9C00280U, - 0xF9A00282U, - 0xF9A00284U, - 0xF9800286U, - 0xF9600288U, - 0xF940028AU, - 0xF940028CU, - 0xF920028EU, - 0xF9000290U, - 0xF8E00292U, - 0xF8E00294U, - 0xF8C00296U, - 0xF8A00298U, - 0xF880029AU, - 0xF860029CU, - 0xF860029EU, - 0xF84002A0U, - 0xF82002A2U, - 0xF80002A4U, - 0xF80002A6U, - 0xF7E002A8U, - 0xF7C002AAU, - 0xF7A002ACU, - 0xF7A002AEU, - 0xF78002B0U, - 0xF76002B2U, - 0xF74002B4U, - 0xF74002B6U, - 0xF72002B8U, - 0xF70002BAU, - 0xF6E002BCU, - 0xF6C002BEU, - 0xF6C002C0U, - 0xF6A002C2U, - 0xF68002C4U, - 0xF66002C6U, - 0xF66002C8U, - 0xF64002CAU, - 0xF62002CCU, - 0xF60002CEU, - 0xF60002CFU, - 0xF5E002D1U, - 0xF5C002D3U, - 0xF5A002D5U, - 0xF5A002D7U, - 0xF58002D9U, - 0xF56002DBU, - 0xF54002DDU, - 0xF52002DFU, - 0xF52002E1U, - 0xF50002E3U, - 0xF4E002E5U, - 0xF4C002E7U, - 0xF4C002E9U, - 0xF4A002EBU, - 0xF48002EDU, - 0xF46002EFU, - 0xF46002F1U, - 0xF44002F3U, - 0xF42002F5U, - 0xF40002F7U, - 0xF3E002F9U, - 0xF3E002FBU, - /* v_table */ - 0x1A09A000U, - 0x19E9A800U, - 0x19A9B800U, - 0x1969C800U, - 0x1949D000U, - 0x1909E000U, - 0x18C9E800U, - 0x18A9F800U, - 0x186A0000U, - 0x182A1000U, - 0x180A2000U, - 0x17CA2800U, - 0x17AA3800U, - 0x176A4000U, - 0x172A5000U, - 0x170A6000U, - 0x16CA6800U, - 0x168A7800U, - 0x166A8000U, - 0x162A9000U, - 0x160AA000U, - 0x15CAA800U, - 0x158AB800U, - 0x156AC000U, - 0x152AD000U, - 0x14EAE000U, - 0x14CAE800U, - 0x148AF800U, - 0x146B0000U, - 0x142B1000U, - 0x13EB2000U, - 0x13CB2800U, - 0x138B3800U, - 0x134B4000U, - 0x132B5000U, - 0x12EB6000U, - 0x12CB6800U, - 0x128B7800U, - 0x124B8000U, - 0x122B9000U, - 0x11EBA000U, - 0x11ABA800U, - 0x118BB800U, - 0x114BC000U, - 0x112BD000U, - 0x10EBE000U, - 0x10ABE800U, - 0x108BF800U, - 0x104C0000U, - 0x100C1000U, - 0x0FEC2000U, - 0x0FAC2800U, - 0x0F8C3800U, - 0x0F4C4000U, - 0x0F0C5000U, - 0x0EEC5800U, - 0x0EAC6800U, - 0x0E6C7800U, - 0x0E4C8000U, - 0x0E0C9000U, - 0x0DEC9800U, - 0x0DACA800U, - 0x0D6CB800U, - 0x0D4CC000U, - 0x0D0CD000U, - 0x0CCCD800U, - 0x0CACE800U, - 0x0C6CF800U, - 0x0C4D0000U, - 0x0C0D1000U, - 0x0BCD1800U, - 0x0BAD2800U, - 0x0B6D3800U, - 0x0B2D4000U, - 0x0B0D5000U, - 0x0ACD5800U, - 0x0AAD6800U, - 0x0A6D7800U, - 0x0A2D8000U, - 0x0A0D9000U, - 0x09CD9800U, - 0x098DA800U, - 0x096DB800U, - 0x092DC000U, - 0x090DD000U, - 0x08CDD800U, - 0x088DE800U, - 0x086DF800U, - 0x082E0000U, - 0x07EE1000U, - 0x07CE1800U, - 0x078E2800U, - 0x076E3800U, - 0x072E4000U, - 0x06EE5000U, - 0x06CE5800U, - 0x068E6800U, - 0x064E7800U, - 0x062E8000U, - 0x05EE9000U, - 0x05CE9800U, - 0x058EA800U, - 0x054EB800U, - 0x052EC000U, - 0x04EED000U, - 0x04AED800U, - 0x048EE800U, - 0x044EF000U, - 0x042F0000U, - 0x03EF1000U, - 0x03AF1800U, - 0x038F2800U, - 0x034F3000U, - 0x030F4000U, - 0x02EF5000U, - 0x02AF5800U, - 0x028F6800U, - 0x024F7000U, - 0x020F8000U, - 0x01EF9000U, - 0x01AF9800U, - 0x016FA800U, - 0x014FB000U, - 0x010FC000U, - 0x00EFD000U, - 0x00AFD800U, - 0x006FE800U, - 0x004FF000U, - 0x00100000U, - 0xFFD01000U, - 0xFFB01800U, - 0xFF702800U, - 0xFF303000U, - 0xFF104000U, - 0xFED05000U, - 0xFEB05800U, - 0xFE706800U, - 0xFE307000U, - 0xFE108000U, - 0xFDD09000U, - 0xFD909800U, - 0xFD70A800U, - 0xFD30B000U, - 0xFD10C000U, - 0xFCD0D000U, - 0xFC90D800U, - 0xFC70E800U, - 0xFC30F000U, - 0xFBF10000U, - 0xFBD11000U, - 0xFB911800U, - 0xFB712800U, - 0xFB313000U, - 0xFAF14000U, - 0xFAD14800U, - 0xFA915800U, - 0xFA516800U, - 0xFA317000U, - 0xF9F18000U, - 0xF9D18800U, - 0xF9919800U, - 0xF951A800U, - 0xF931B000U, - 0xF8F1C000U, - 0xF8B1C800U, - 0xF891D800U, - 0xF851E800U, - 0xF831F000U, - 0xF7F20000U, - 0xF7B20800U, - 0xF7921800U, - 0xF7522800U, - 0xF7123000U, - 0xF6F24000U, - 0xF6B24800U, - 0xF6925800U, - 0xF6526800U, - 0xF6127000U, - 0xF5F28000U, - 0xF5B28800U, - 0xF5729800U, - 0xF552A800U, - 0xF512B000U, - 0xF4F2C000U, - 0xF4B2C800U, - 0xF472D800U, - 0xF452E800U, - 0xF412F000U, - 0xF3D30000U, - 0xF3B30800U, - 0xF3731800U, - 0xF3532800U, - 0xF3133000U, - 0xF2D34000U, - 0xF2B34800U, - 0xF2735800U, - 0xF2336800U, - 0xF2137000U, - 0xF1D38000U, - 0xF1B38800U, - 0xF1739800U, - 0xF133A800U, - 0xF113B000U, - 0xF0D3C000U, - 0xF093C800U, - 0xF073D800U, - 0xF033E000U, - 0xF013F000U, - 0xEFD40000U, - 0xEF940800U, - 0xEF741800U, - 0xEF342000U, - 0xEEF43000U, - 0xEED44000U, - 0xEE944800U, - 0xEE745800U, - 0xEE346000U, - 0xEDF47000U, - 0xEDD48000U, - 0xED948800U, - 0xED549800U, - 0xED34A000U, - 0xECF4B000U, - 0xECD4C000U, - 0xEC94C800U, - 0xEC54D800U, - 0xEC34E000U, - 0xEBF4F000U, - 0xEBB50000U, - 0xEB950800U, - 0xEB551800U, - 0xEB352000U, - 0xEAF53000U, - 0xEAB54000U, - 0xEA954800U, - 0xEA555800U, - 0xEA156000U, - 0xE9F57000U, - 0xE9B58000U, - 0xE9958800U, - 0xE9559800U, - 0xE915A000U, - 0xE8F5B000U, - 0xE8B5C000U, - 0xE875C800U, - 0xE855D800U, - 0xE815E000U, - 0xE7F5F000U, - 0xE7B60000U, - 0xE7760800U, - 0xE7561800U, - 0xE7162000U, - 0xE6D63000U, - 0xE6B64000U, - 0xE6764800U, - 0xE6365800U -}; - -#define FLAGS 0x40080100 -#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) -#define READY(Y) tables[Y] -#define FIXUP(Y) \ -do { \ - int tmp = (Y) & FLAGS; \ - if (tmp != 0) \ - { \ - tmp -= tmp>>8; \ - (Y) |= tmp; \ - tmp = FLAGS & ~(Y>>1); \ - (Y) += tmp>>8; \ - } \ -} while (0 == 1) - -#define STORE(Y,DSTPTR) \ -do { \ - *(DSTPTR)++ = (Y); \ - *(DSTPTR)++ = (Y)>>22; \ - *(DSTPTR)++ = (Y)>>11; \ - *(DSTPTR)++ = 255; \ -} while (0 == 1) - -void yuv422_2_rgb8888(uint8_t *dst_ptr, - const uint8_t *y_ptr, - const uint8_t *u_ptr, - const uint8_t *v_ptr, - int32_t width, - int32_t height, - int32_t y_span, - int32_t uv_span, - int32_t dst_span, - int32_t dither) -{ - height -= 1; - while (height > 0) - { - height -= width<<16; - height += 1<<16; - while (height < 0) - { - /* Do top row pair */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - y1 = uv + READY(*y_ptr++); - FIXUP(y0); - FIXUP(y1); - STORE(y0, dst_ptr); - STORE(y1, dst_ptr); - height += (2<<16); - } - if ((height>>16) == 0) - { - /* Trailing top row pix */ - uint32_t uv, y0; - - uv = READUV(*u_ptr,*v_ptr); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, dst_ptr); - } - dst_ptr += dst_span-width*4; - y_ptr += y_span-width; - u_ptr += uv_span-(width>>1); - v_ptr += uv_span-(width>>1); - height = (height<<16)>>16; - height -= 1; - if (height == 0) - break; - height -= width<<16; - height += 1<<16; - while (height < 0) - { - /* Do second row pair */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - y1 = uv + READY(*y_ptr++); - FIXUP(y0); - FIXUP(y1); - STORE(y0, dst_ptr); - STORE(y1, dst_ptr); - height += (2<<16); - } - if ((height>>16) == 0) - { - /* Trailing bottom row pix */ - uint32_t uv, y0; - - uv = READUV(*u_ptr,*v_ptr); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, dst_ptr); - } - dst_ptr += dst_span-width*4; - y_ptr += y_span-width; - u_ptr += uv_span-(width>>1); - v_ptr += uv_span-(width>>1); - height = (height<<16)>>16; - height -= 1; - } -} - - -#undef FLAGS -#undef READUV -#undef READY -#undef FIXUP -#undef STORE - - -#define FLAGS 0x40080100 -#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) -#define READY(Y) tables[Y] -#define FIXUP(Y) \ -do { \ - int tmp = (Y) & FLAGS; \ - if (tmp != 0) \ - { \ - tmp -= tmp>>8; \ - (Y) |= tmp; \ - tmp = FLAGS & ~(Y>>1); \ - (Y) += tmp>>8; \ - } \ -} while (0 == 1) - -#define STORE(Y,DSTPTR) \ -do { \ - (DSTPTR) = 0xFF000000 | (Y & 0xFF) | (0xFF00 & (Y>>14)) | (0xFF0000 & (Y<<5));\ -} while (0 == 1) - -void yuv420_2_rgb8888(uint8_t *dst_ptr_, - const uint8_t *y_ptr, - const uint8_t *u_ptr, - const uint8_t *v_ptr, - int32_t width, - int32_t height, - int32_t y_span, - int32_t uv_span, - int32_t dst_span, - int32_t dither) -{ - uint32_t *dst_ptr = (uint32_t *)(void *)dst_ptr_; - dst_span >>= 2; - - height -= 1; - while (height > 0) - { - height -= width<<16; - height += 1<<16; - while (height < 0) - { - /* Do 2 column pairs */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr++,*v_ptr++); - y1 = uv + READY(y_ptr[y_span]); - y0 = uv + READY(*y_ptr++); - FIXUP(y1); - FIXUP(y0); - STORE(y1, dst_ptr[dst_span]); - STORE(y0, *dst_ptr++); - y1 = uv + READY(y_ptr[y_span]); - y0 = uv + READY(*y_ptr++); - FIXUP(y1); - FIXUP(y0); - STORE(y1, dst_ptr[dst_span]); - STORE(y0, *dst_ptr++); - height += (2<<16); - } - if ((height>>16) == 0) - { - /* Trailing column pair */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr,*v_ptr); - y1 = uv + READY(y_ptr[y_span]); - y0 = uv + READY(*y_ptr++); - FIXUP(y1); - FIXUP(y0); - STORE(y0, dst_ptr[dst_span]); - STORE(y1, *dst_ptr++); - } - dst_ptr += dst_span*2-width; - y_ptr += y_span*2-width; - u_ptr += uv_span-(width>>1); - v_ptr += uv_span-(width>>1); - height = (height<<16)>>16; - height -= 2; - } - if (height == 0) - { - /* Trail row */ - height -= width<<16; - height += 1<<16; - while (height < 0) - { - /* Do a row pair */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr++,*v_ptr++); - y1 = uv + READY(*y_ptr++); - y0 = uv + READY(*y_ptr++); - FIXUP(y1); - FIXUP(y0); - STORE(y1, *dst_ptr++); - STORE(y0, *dst_ptr++); - height += (2<<16); - } - if ((height>>16) == 0) - { - /* Trailing pix */ - uint32_t uv, y0; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, *dst_ptr++); - } - } -} - - - -#undef FLAGS -#undef READUV -#undef READY -#undef FIXUP -#undef STORE - -#define FLAGS 0x40080100 -#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) -#define READY(Y) tables[Y] -#define FIXUP(Y) \ -do { \ - int tmp = (Y) & FLAGS; \ - if (tmp != 0) \ - { \ - tmp -= tmp>>8; \ - (Y) |= tmp; \ - tmp = FLAGS & ~(Y>>1); \ - (Y) += tmp>>8; \ - } \ -} while (0 == 1) - -#define STORE(Y,DSTPTR) \ -do { \ - *(DSTPTR)++ = (Y); \ - *(DSTPTR)++ = (Y)>>22; \ - *(DSTPTR)++ = (Y)>>11; \ - *(DSTPTR)++ = 255; \ -} while (0 == 1) - -void yuv444_2_rgb8888(uint8_t *dst_ptr, - const uint8_t *y_ptr, - const uint8_t *u_ptr, - const uint8_t *v_ptr, - int32_t width, - int32_t height, - int32_t y_span, - int32_t uv_span, - int32_t dst_span, - int32_t dither) -{ - height -= 1; - while (height > 0) - { - height -= width<<16; - height += 1<<16; - while (height < 0) - { - /* Do top row pair */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, dst_ptr); - uv = READUV(*u_ptr++,*v_ptr++); - y1 = uv + READY(*y_ptr++); - FIXUP(y1); - STORE(y1, dst_ptr); - height += (2<<16); - } - if ((height>>16) == 0) - { - /* Trailing top row pix */ - uint32_t uv, y0; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, dst_ptr); - } - dst_ptr += dst_span-width*4; - y_ptr += y_span-width; - u_ptr += uv_span-width; - v_ptr += uv_span-width; - height = (height<<16)>>16; - height -= 1; - if (height == 0) - break; - height -= width<<16; - height += 1<<16; - while (height < 0) - { - /* Do second row pair */ - uint32_t uv, y0, y1; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, dst_ptr); - uv = READUV(*u_ptr++,*v_ptr++); - y1 = uv + READY(*y_ptr++); - FIXUP(y1); - STORE(y1, dst_ptr); - height += (2<<16); - } - if ((height>>16) == 0) - { - /* Trailing bottom row pix */ - uint32_t uv, y0; - - uv = READUV(*u_ptr++,*v_ptr++); - y0 = uv + READY(*y_ptr++); - FIXUP(y0); - STORE(y0, dst_ptr); - } - dst_ptr += dst_span-width*4; - y_ptr += y_span-width; - u_ptr += uv_span-width; - v_ptr += uv_span-width; - height = (height<<16)>>16; - height -= 1; - } -} -#endif // YUV2RGB_H diff --git a/modules/theora/SCsub b/modules/theora/SCsub new file mode 100644 index 0000000000..cdb78e955e --- /dev/null +++ b/modules/theora/SCsub @@ -0,0 +1,81 @@ +Import('env') +Import('env_modules') + +env_theora = env_modules.Clone() + +# Thirdparty source files +if (env["libtheora"] != "system"): # builtin + thirdparty_dir = "#thirdparty/libtheora/" + thirdparty_sources = [ + "analyze.c", + "apiwrapper.c", + "bitpack.c", + "cpu.c", + "decapiwrapper.c", + "decinfo.c", + "decode.c", + "dequant.c", + "encapiwrapper.c", + "encfrag.c", + "encinfo.c", + "encode.c", + "encoder_disabled.c", + "enquant.c", + "fdct.c", + "fragment.c", + "huffdec.c", + "huffenc.c", + "idct.c", + "info.c", + "internal.c", + "mathops.c", + "mcenc.c", + "quant.c", + "rate.c", + "state.c", + "tokenize.c", + ] + + thirdparty_sources_x86 = [ + "x86/mmxencfrag.c", + "x86/mmxfdct.c", + "x86/mmxfrag.c", + "x86/mmxidct.c", + "x86/mmxstate.c", + "x86/sse2fdct.c", + "x86/x86enc.c", + "x86/x86state.c", + ] + + thirdparty_sources_x86_vc = [ + "x86_vc/mmxencfrag.c", + "x86_vc/mmxfdct.c", + "x86_vc/mmxfrag.c", + "x86_vc/mmxidct.c", + "x86_vc/mmxstate.c", + "x86_vc/x86enc.c", + "x86_vc/x86state.c", + ] + + if (env["x86_opt_gcc"]): + thirdparty_sources += thirdparty_sources_x86 + + if (env["x86_opt_vc"]): + thirdparty_sources += thirdparty_sources_x86_vc + + if (env["x86_opt_gcc"] or env["x86_opt_vc"]): + env_theora.Append(CCFLAGS = ["-DOC_X86_ASM"]) + + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + + env_theora.add_source_files(env.modules_sources, thirdparty_sources) + env_theora.Append(CPPPATH = [thirdparty_dir]) + + # also requires libogg and libvorbis + if (env["libogg"] != "system"): # builtin + env_theora.Append(CPPPATH = ["#thirdparty/libogg"]) + if (env["libvorbis"] != "system"): # builtin + env_theora.Append(CPPPATH = ["#thirdparty/libvorbis"]) + +# Godot source files +env_theora.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/theora/config.py b/modules/theora/config.py new file mode 100644 index 0000000000..368e97e152 --- /dev/null +++ b/modules/theora/config.py @@ -0,0 +1,6 @@ + +def can_build(platform): + return True + +def configure(env): + pass diff --git a/modules/theora/register_types.cpp b/modules/theora/register_types.cpp new file mode 100644 index 0000000000..282b59b0ec --- /dev/null +++ b/modules/theora/register_types.cpp @@ -0,0 +1,45 @@ +/*************************************************************************/ +/* register_types.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "register_types.h" + +#include "video_stream_theora.h" + +static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL; + +void register_theora_types() { + + theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora ); + ResourceLoader::add_resource_format_loader(theora_stream_loader); + ObjectTypeDB::register_type(); +} + +void unregister_theora_types() { + + memdelete( theora_stream_loader ); +} diff --git a/modules/theora/register_types.h b/modules/theora/register_types.h new file mode 100644 index 0000000000..18bdbf0c4c --- /dev/null +++ b/modules/theora/register_types.h @@ -0,0 +1,30 @@ +/*************************************************************************/ +/* register_types.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +void register_theora_types(); +void unregister_theora_types(); diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp new file mode 100644 index 0000000000..3ddfee3a1d --- /dev/null +++ b/modules/theora/video_stream_theora.cpp @@ -0,0 +1,940 @@ +/*************************************************************************/ +/* video_stream_theora.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "video_stream_theora.h" + +#include "globals.h" +#include "os/os.h" +#include "yuv2rgb.h" + +int VideoStreamPlaybackTheora:: buffer_data() { + + char *buffer=ogg_sync_buffer(&oy,4096); + +#ifdef THEORA_USE_THREAD_STREAMING + + int read; + + do { + thread_sem->post(); + read = MIN(ring_buffer.data_left(),4096); + if (read) { + ring_buffer.read((uint8_t*)buffer,read); + ogg_sync_wrote(&oy,read); + } else { + OS::get_singleton()->delay_usec(100); + } + + } while(read==0); + + return read; + +#else + + int bytes=file->get_buffer((uint8_t*)buffer, 4096); + ogg_sync_wrote(&oy,bytes); + return(bytes); + +#endif +} + +int VideoStreamPlaybackTheora::queue_page(ogg_page *page){ + if(theora_p) { + ogg_stream_pagein(&to,page); + if (to.e_o_s) + theora_eos=true; + } + if(vorbis_p) { + ogg_stream_pagein(&vo,page); + if (vo.e_o_s) + vorbis_eos=true; + } + return 0; +} + +void VideoStreamPlaybackTheora::video_write(void){ + th_ycbcr_buffer yuv; + th_decode_ycbcr_out(td,yuv); + + /* + int y_offset, uv_offset; + y_offset=(ti.pic_x&~1)+yuv[0].stride*(ti.pic_y&~1); + + { + int pixels = size.x * size.y; + frame_data.resize(pixels * 4); + DVector::Write w = frame_data.write(); + char* dst = (char*)w.ptr(); + int p = 0; + for (int i=0; i::Write w = frame_data.write(); + char* dst = (char*)w.ptr(); + + //uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2); + + if (px_fmt == TH_PF_444) { + + yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); + + } else if (px_fmt == TH_PF_422) { + + yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); + + } else if (px_fmt == TH_PF_420) { + + yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); + }; + + format = Image::FORMAT_RGBA; + } + + Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation + + texture->set_data(img); //zero copy send to visual server + + /* + + if (px_fmt == TH_PF_444) { + + int pitch = 3; + frame_data.resize(size.x * size.y * pitch); + DVector::Write w = frame_data.write(); + char* dst = (char*)w.ptr(); + + for(int i=0;i::Write w = frame_data.write(); + char* dst = (char*)w.ptr(); + + uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y / div); + for(int i=0;i::Write w = frame_data.write(); + char* dst = (char*)w.ptr(); + + uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y / div); + for(int i=0;i>1;j++) { + out[j*4+1] = in_u[j]; + out[j*4+3] = in_v[j]; + } + } + + format = Image::FORMAT_YUV_422; + }; + }; + // */ + + frames_pending = 1; +} + +void VideoStreamPlaybackTheora::clear() { + + if (!file) + return; + + if(vorbis_p){ + ogg_stream_clear(&vo); + if (vorbis_p >= 3) { + vorbis_block_clear(&vb); + vorbis_dsp_clear(&vd); + }; + vorbis_comment_clear(&vc); + vorbis_info_clear(&vi); + vorbis_p = 0; + } + if(theora_p){ + ogg_stream_clear(&to); + th_decode_free(td); + th_comment_clear(&tc); + th_info_clear(&ti); + theora_p = 0; + } + ogg_sync_clear(&oy); + +#ifdef THEORA_USE_THREAD_STREAMING + thread_exit=true; + thread_sem->post(); //just in case + Thread::wait_to_finish(thread); + memdelete(thread); + thread=NULL; + ring_buffer.clear(); +#endif + //file_name = ""; + + theora_p = 0; + vorbis_p = 0; + videobuf_ready = 0; + frames_pending = 0; + videobuf_time = 0; + theora_eos=false; + vorbis_eos=false; + + if (file) { + memdelete(file); + } + file=NULL; + playing = false; +}; + +void VideoStreamPlaybackTheora::set_file(const String& p_file) { + + ERR_FAIL_COND(playing); + ogg_packet op; + th_setup_info *ts = NULL; + + file_name = p_file; + if (file) { + memdelete(file); + } + file = FileAccess::open(p_file, FileAccess::READ); + ERR_FAIL_COND(!file); + +#ifdef THEORA_USE_THREAD_STREAMING + thread_exit=false; + thread_eof=false; + //pre-fill buffer + int to_read = ring_buffer.space_left(); + int read = file->get_buffer(read_buffer.ptr(),to_read); + ring_buffer.write(read_buffer.ptr(),read); + + thread=Thread::create(_streaming_thread,this); + +#endif + + ogg_sync_init(&oy); + + /* init supporting Vorbis structures needed in header parsing */ + vorbis_info_init(&vi); + vorbis_comment_init(&vc); + + /* init supporting Theora structures needed in header parsing */ + th_comment_init(&tc); + th_info_init(&ti); + + theora_eos=false; + vorbis_eos=false; + + /* Ogg file open; parse the headers */ + /* Only interested in Vorbis/Theora streams */ + int stateflag = 0; + + int audio_track_skip=audio_track; + + + while(!stateflag){ + int ret=buffer_data(); + if(ret==0)break; + while(ogg_sync_pageout(&oy,&og)>0){ + ogg_stream_state test; + + /* is this a mandated initial header? If not, stop parsing */ + if(!ogg_page_bos(&og)){ + /* don't leak the page; get it into the appropriate stream */ + queue_page(&og); + stateflag=1; + break; + } + + ogg_stream_init(&test,ogg_page_serialno(&og)); + ogg_stream_pagein(&test,&og); + ogg_stream_packetout(&test,&op); + + + /* identify the codec: try theora */ + if(!theora_p && th_decode_headerin(&ti,&tc,&ts,&op)>=0){ + /* it is theora */ + copymem(&to,&test,sizeof(test)); + theora_p=1; + }else if(!vorbis_p && vorbis_synthesis_headerin(&vi,&vc,&op)>=0){ + + + /* it is vorbis */ + if (audio_track_skip) { + vorbis_info_clear(&vi); + vorbis_comment_clear(&vc); + ogg_stream_clear(&test); + vorbis_info_init(&vi); + vorbis_comment_init(&vc); + + audio_track_skip--; + } else { + copymem(&vo,&test,sizeof(test)); + vorbis_p=1; + } + }else{ + /* whatever it is, we don't care about it */ + ogg_stream_clear(&test); + } + } + /* fall through to non-bos page parsing */ + } + + /* we're expecting more header packets. */ + while((theora_p && theora_p<3) || (vorbis_p && vorbis_p<3)){ + int ret; + + /* look for further theora headers */ + while(theora_p && (theora_p<3) && (ret=ogg_stream_packetout(&to,&op))){ + if(ret<0){ + fprintf(stderr,"Error parsing Theora stream headers; " + "corrupt stream?\n"); + clear(); + return; + } + if(!th_decode_headerin(&ti,&tc,&ts,&op)){ + fprintf(stderr,"Error parsing Theora stream headers; " + "corrupt stream?\n"); + clear(); + return; + } + theora_p++; + } + + /* look for more vorbis header packets */ + while(vorbis_p && (vorbis_p<3) && (ret=ogg_stream_packetout(&vo,&op))){ + if(ret<0){ + fprintf(stderr,"Error parsing Vorbis stream headers; corrupt stream?\n"); + clear(); + return; + } + ret = vorbis_synthesis_headerin(&vi,&vc,&op); + if(ret){ + fprintf(stderr,"Error parsing Vorbis stream headers; corrupt stream?\n"); + clear(); + return; + } + vorbis_p++; + if(vorbis_p==3)break; + } + + /* The header pages/packets will arrive before anything else we + care about, or the stream is not obeying spec */ + + if(ogg_sync_pageout(&oy,&og)>0){ + queue_page(&og); /* demux into the appropriate stream */ + }else{ + int ret=buffer_data(); /* someone needs more data */ + if(ret==0){ + fprintf(stderr,"End of file while searching for codec headers.\n"); + clear(); + return; + } + } + } + + /* and now we have it all. initialize decoders */ + if(theora_p){ + td=th_decode_alloc(&ti,ts); + printf("Ogg logical stream %lx is Theora %dx%d %.02f fps", + to.serialno,ti.pic_width,ti.pic_height, + (double)ti.fps_numerator/ti.fps_denominator); + px_fmt=ti.pixel_fmt; + switch(ti.pixel_fmt){ + case TH_PF_420: printf(" 4:2:0 video\n"); break; + case TH_PF_422: printf(" 4:2:2 video\n"); break; + case TH_PF_444: printf(" 4:4:4 video\n"); break; + case TH_PF_RSVD: + default: + printf(" video\n (UNKNOWN Chroma sampling!)\n"); + break; + } + if(ti.pic_width!=ti.frame_width || ti.pic_height!=ti.frame_height) + printf(" Frame content is %dx%d with offset (%d,%d).\n", + ti.frame_width, ti.frame_height, ti.pic_x, ti.pic_y); + th_decode_ctl(td,TH_DECCTL_GET_PPLEVEL_MAX,&pp_level_max, + sizeof(pp_level_max)); + pp_level=pp_level_max; + pp_level=0; + th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,sizeof(pp_level)); + pp_inc=0; + + /*{ + int arg = 0xffff; + th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_MBMODE,&arg,sizeof(arg)); + th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_MV,&arg,sizeof(arg)); + th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_QI,&arg,sizeof(arg)); + arg=10; + th_decode_ctl(td,TH_DECCTL_SET_TELEMETRY_BITS,&arg,sizeof(arg)); + }*/ + + int w; + int h; + w=(ti.pic_x+ti.frame_width+1&~1)-(ti.pic_x&~1); + h=(ti.pic_y+ti.frame_height+1&~1)-(ti.pic_y&~1); + size.x = w; + size.y = h; + + texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE); + + }else{ + /* tear down the partial theora setup */ + th_info_clear(&ti); + th_comment_clear(&tc); + } + + th_setup_free(ts); + + if(vorbis_p){ + vorbis_synthesis_init(&vd,&vi); + vorbis_block_init(&vd,&vb); + fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n", + vo.serialno,vi.channels,vi.rate); + //_setup(vi.channels, vi.rate); + + }else{ + /* tear down the partial vorbis setup */ + vorbis_info_clear(&vi); + vorbis_comment_clear(&vc); + } + + playing = false; + buffering=true; + time=0; + audio_frames_wrote=0; + + +}; + +float VideoStreamPlaybackTheora::get_time() const { + + //print_line("total: "+itos(get_total())+" todo: "+itos(get_todo())); + //return MAX(0,time-((get_total())/(float)vi.rate)); + return time-AudioServer::get_singleton()->get_output_delay()-delay_compensation;//-((get_total())/(float)vi.rate); +}; + +Ref VideoStreamPlaybackTheora::get_texture() { + + return texture; +} + +void VideoStreamPlaybackTheora::update(float p_delta) { + + if (!file) + return; + + if (!playing || paused) { + //printf("not playing\n"); + return; + }; + + + +#ifdef THEORA_USE_THREAD_STREAMING + thread_sem->post(); +#endif + + //double ctime =AudioServer::get_singleton()->get_mix_time(); + + //print_line("play "+rtos(p_delta)); + time+=p_delta; + + if (videobuf_time>get_time()) { + return; //no new frames need to be produced + } + + bool frame_done=false; + bool audio_done=!vorbis_p; + + while (!frame_done || (!audio_done && !vorbis_eos)) { + //a frame needs to be produced + + ogg_packet op; + bool no_theora=false; + + + while (vorbis_p) { + int ret; + float **pcm; + + bool buffer_full=false; + + /* if there's pending, decoded audio, grab it */ + if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) { + + + + const int AUXBUF_LEN=4096; + int to_read = ret; + int16_t aux_buffer[AUXBUF_LEN]; + + while(to_read) { + + int m = MIN(AUXBUF_LEN/vi.channels,to_read); + + int count = 0; + + for(int j=0;j32767)val=32767; + if(val<-32768)val=-32768; + aux_buffer[count++] = val; + } + } + + if (mix_callback) { + int mixed = mix_callback(mix_udata,aux_buffer,m); + to_read-=mixed; + if (mixed!=m) { //could mix no more + buffer_full=true; + break; + } + } else { + to_read-=m; //just pretend we sent the audio + } + + + } + + + int tr = vorbis_synthesis_read(&vd, ret-to_read); + + + if (vd.granulepos>=0) { + // print_line("wrote: "+itos(audio_frames_wrote)+" gpos: "+itos(vd.granulepos)); + } + + //print_line("mix audio!"); + + audio_frames_wrote+=ret-to_read; + + //print_line("AGP: "+itos(vd.granulepos)+" added "+itos(ret-to_read)); + + + } else { + + /* no pending audio; is there a pending packet to decode? */ + if (ogg_stream_packetout(&vo,&op)>0){ + if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */ + vorbis_synthesis_blockin(&vd,&vb); + } + } else { /* we need more data; break out to suck in another page */ + //printf("need moar data\n"); + break; + }; + } + + + audio_done = videobuf_time < (audio_frames_wrote/float(vi.rate)); + + if (buffer_full) + break; + } + + while(theora_p && !frame_done){ + /* theora is one in, one out... */ + if(ogg_stream_packetout(&to,&op)>0){ + + + if(false && pp_inc){ + pp_level+=pp_inc; + th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level, + sizeof(pp_level)); + pp_inc=0; + } + /*HACK: This should be set after a seek or a gap, but we might not have + a granulepos for the first packet (we only have them for the last + packet on a page), so we just set it as often as we get it. + To do this right, we should back-track from the last packet on the + page and compute the correct granulepos for the first packet after + a seek or a gap.*/ + if(op.granulepos>=0){ + th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos, + sizeof(op.granulepos)); + } + ogg_int64_t videobuf_granulepos; + if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){ + videobuf_time=th_granule_time(td,videobuf_granulepos); + + //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); + + /* is it already too old to be useful? This is only actually + useful cosmetically after a SIGSTOP. Note that we have to + decode the frame even if we don't show it (for now) due to + keyframing. Soon enough libtheora will be able to deal + with non-keyframe seeks. */ + + if(videobuf_time>=get_time()) { + frame_done=true; + } else{ + /*If we are too slow, reduce the pp level.*/ + pp_inc=pp_level>0?-1:0; + } + } else { + + } + + } else { + no_theora=true; + break; + } + } + + + //print_line("no theora: "+itos(no_theora)+" theora eos: "+itos(theora_eos)+" frame done "+itos(frame_done)); + +#ifdef THEORA_USE_THREAD_STREAMING + if (file && thread_eof && no_theora && theora_eos && ring_buffer.data_left()==0) { +#else + if (file && /*!videobuf_ready && */ no_theora && theora_eos) { +#endif + printf("video done, stopping\n"); + stop(); + return; + }; + #if 0 + if (!videobuf_ready || audio_todo > 0){ + /* no data yet for somebody. Grab another page */ + + buffer_data(); + while(ogg_sync_pageout(&oy,&og)>0){ + queue_page(&og); + } + } + #else + + + if (!frame_done || !audio_done){ + //what's the point of waiting for audio to grab a page? + + buffer_data(); + while(ogg_sync_pageout(&oy,&og)>0){ + queue_page(&og); + } + } + #endif + /* If playback has begun, top audio buffer off immediately. */ + //if(stateflag) audio_write_nonblocking(); + + /* are we at or past time for this video frame? */ + if(videobuf_ready && videobuf_time<=get_time()){ + + //video_write(); + //videobuf_ready=0; + } else { + //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); + } + + float tdiff=videobuf_time-get_time(); + /*If we have lots of extra time, increase the post-processing level.*/ + if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){ + pp_inc=pp_level0?-1:0; + } + + } + + video_write(); + +}; + + +void VideoStreamPlaybackTheora::play() { + + if (!playing) + time=0; + else { + stop(); + } + + playing = true; + delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms"); + delay_compensation/=1000.0; + + +}; + +void VideoStreamPlaybackTheora::stop() { + + if (playing) { + + clear(); + set_file(file_name); //reset + } + playing = false; + time=0; +}; + +bool VideoStreamPlaybackTheora::is_playing() const { + + return playing; +}; + +void VideoStreamPlaybackTheora::set_paused(bool p_paused) { + + paused=p_paused; + //pau = !p_paused; +}; + +bool VideoStreamPlaybackTheora::is_paused(bool p_paused) const { + + return paused; +}; + +void VideoStreamPlaybackTheora::set_loop(bool p_enable) { + +}; + +bool VideoStreamPlaybackTheora::has_loop() const { + + return false; +}; + +float VideoStreamPlaybackTheora::get_length() const { + + return 0; +}; + +String VideoStreamPlaybackTheora::get_stream_name() const { + + return ""; +}; + +int VideoStreamPlaybackTheora::get_loop_count() const { + + return 0; +}; + +float VideoStreamPlaybackTheora::get_pos() const { + + return get_time(); +}; + +void VideoStreamPlaybackTheora::seek_pos(float p_time) { + + // no +}; + +void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback,void *p_userdata) { + + mix_callback=p_callback; + mix_udata=p_userdata; +} + +int VideoStreamPlaybackTheora::get_channels() const{ + + return vi.channels; +} + +void VideoStreamPlaybackTheora::set_audio_track(int p_idx) { + + audio_track=p_idx; +} + +int VideoStreamPlaybackTheora::get_mix_rate() const{ + + return vi.rate; +} + +#ifdef THEORA_USE_THREAD_STREAMING + + +void VideoStreamPlaybackTheora::_streaming_thread(void *ud) { + + VideoStreamPlaybackTheora *vs=(VideoStreamPlaybackTheora*)ud; + + while(!vs->thread_exit) { + + //just fill back the buffer + if (!vs->thread_eof) { + + int to_read = vs->ring_buffer.space_left(); + if (to_read) { + int read = vs->file->get_buffer(vs->read_buffer.ptr(),to_read); + vs->ring_buffer.write(vs->read_buffer.ptr(),read); + vs->thread_eof=vs->file->eof_reached(); + } + + + } + + vs->thread_sem->wait(); + } +} + +#endif + +VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() { + + file = NULL; + theora_p = 0; + vorbis_p = 0; + videobuf_ready = 0; + playing = false; + frames_pending = 0; + videobuf_time = 0; + paused=false; + + buffering=false; + texture = Ref( memnew(ImageTexture )); + mix_callback=NULL; + mix_udata=NULL; + audio_track=0; + delay_compensation=0; + audio_frames_wrote=0; + +#ifdef THEORA_USE_THREAD_STREAMING + int rb_power = nearest_shift(RB_SIZE_KB*1024); + ring_buffer.resize(rb_power); + read_buffer.resize(RB_SIZE_KB*1024); + thread_sem=Semaphore::create(); + thread=NULL; + thread_exit=false; + thread_eof=false; + +#endif +}; + +VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() { + +#ifdef THEORA_USE_THREAD_STREAMING + + memdelete(thread_sem); +#endif + clear(); + + if (file) + memdelete(file); + + +}; + + +RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path, Error *r_error) { + if (r_error) + *r_error=ERR_FILE_CANT_OPEN; + + VideoStreamTheora *stream = memnew(VideoStreamTheora); + stream->set_file(p_path); + + if (r_error) + *r_error=OK; + + return Ref(stream); +} + +void ResourceFormatLoaderVideoStreamTheora::get_recognized_extensions(List *p_extensions) const { + + p_extensions->push_back("ogm"); + p_extensions->push_back("ogv"); +} +bool ResourceFormatLoaderVideoStreamTheora::handles_type(const String& p_type) const { + return (p_type=="VideoStream" || p_type=="VideoStreamTheora"); +} + +String ResourceFormatLoaderVideoStreamTheora::get_resource_type(const String &p_path) const { + + String exl=p_path.extension().to_lower(); + if (exl=="ogm" || exl=="ogv") + return "VideoStreamTheora"; + return ""; +} diff --git a/modules/theora/video_stream_theora.h b/modules/theora/video_stream_theora.h new file mode 100644 index 0000000000..04a5c56ee5 --- /dev/null +++ b/modules/theora/video_stream_theora.h @@ -0,0 +1,199 @@ +/*************************************************************************/ +/* video_stream_theora.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#ifndef VIDEO_STREAM_THEORA_H +#define VIDEO_STREAM_THEORA_H + +#include "io/resource_loader.h" +#include "os/file_access.h" +#include "os/thread.h" +#include "os/semaphore.h" +#include "ring_buffer.h" +#include "scene/resources/video_stream.h" + +#include +#include + +//#define THEORA_USE_THREAD_STREAMING + +class VideoStreamPlaybackTheora : public VideoStreamPlayback { + + OBJ_TYPE(VideoStreamPlaybackTheora, VideoStreamPlayback); + + enum { + MAX_FRAMES = 4, + }; + + //Image frames[MAX_FRAMES]; + Image::Format format; + DVector frame_data; + int frames_pending; + FileAccess* file; + String file_name; + int audio_frames_wrote; + Point2i size; + + int buffer_data(); + int queue_page(ogg_page *page); + void video_write(void); + float get_time() const; + + bool theora_eos; + bool vorbis_eos; + + ogg_sync_state oy; + ogg_page og; + ogg_stream_state vo; + ogg_stream_state to; + th_info ti; + th_comment tc; + th_dec_ctx *td; + vorbis_info vi; + vorbis_dsp_state vd; + vorbis_block vb; + vorbis_comment vc; + th_pixel_fmt px_fmt; + double videobuf_time; + int pp_inc; + + int theora_p; + int vorbis_p; + int pp_level_max; + int pp_level; + int videobuf_ready; + + bool playing; + bool buffering; + + double last_update_time; + double time; + double delay_compensation; + + Ref texture; + + AudioMixCallback mix_callback; + void* mix_udata; + bool paused; + +#ifdef THEORA_USE_THREAD_STREAMING + + enum { + RB_SIZE_KB=1024 + }; + + RingBuffer ring_buffer; + Vector read_buffer; + bool thread_eof; + Semaphore *thread_sem; + Thread *thread; + volatile bool thread_exit; + + static void _streaming_thread(void *ud); + +#endif + + + int audio_track; + +protected: + + void clear(); + +public: + + virtual void play(); + virtual void stop(); + virtual bool is_playing() const; + + virtual void set_paused(bool p_paused); + virtual bool is_paused(bool p_paused) const; + + virtual void set_loop(bool p_enable); + virtual bool has_loop() const; + + virtual float get_length() const; + + virtual String get_stream_name() const; + + virtual int get_loop_count() const; + + virtual float get_pos() const; + virtual void seek_pos(float p_time); + + + void set_file(const String& p_file); + + virtual Ref get_texture(); + virtual void update(float p_delta); + + virtual void set_mix_callback(AudioMixCallback p_callback,void *p_userdata); + virtual int get_channels() const; + virtual int get_mix_rate() const; + + virtual void set_audio_track(int p_idx); + + VideoStreamPlaybackTheora(); + ~VideoStreamPlaybackTheora(); +}; + + + +class VideoStreamTheora : public VideoStream { + + OBJ_TYPE(VideoStreamTheora,VideoStream); + + String file; + int audio_track; + + +public: + + Ref instance_playback() { + Ref pb = memnew( VideoStreamPlaybackTheora ); + pb->set_audio_track(audio_track); + pb->set_file(file); + return pb; + } + + void set_file(const String& p_file) { file=p_file; } + void set_audio_track(int p_track) { audio_track=p_track; } + + VideoStreamTheora() { audio_track=0; } + +}; + +class ResourceFormatLoaderVideoStreamTheora : public ResourceFormatLoader { +public: + virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL); + virtual void get_recognized_extensions(List *p_extensions) const; + virtual bool handles_type(const String& p_type) const; + virtual String get_resource_type(const String &p_path) const; + +}; + +#endif diff --git a/modules/theora/yuv2rgb.h b/modules/theora/yuv2rgb.h new file mode 100644 index 0000000000..59101bd057 --- /dev/null +++ b/modules/theora/yuv2rgb.h @@ -0,0 +1,1121 @@ +#ifndef YUV2RGB_H +#define YUV2RGB_H + +#include "typedefs.h" + +static const uint32_t tables[256*3] = +{ + /* y_table */ + 0x7FFFFFEDU, + 0x7FFFFFEFU, + 0x7FFFFFF0U, + 0x7FFFFFF1U, + 0x7FFFFFF2U, + 0x7FFFFFF3U, + 0x7FFFFFF4U, + 0x7FFFFFF6U, + 0x7FFFFFF7U, + 0x7FFFFFF8U, + 0x7FFFFFF9U, + 0x7FFFFFFAU, + 0x7FFFFFFBU, + 0x7FFFFFFDU, + 0x7FFFFFFEU, + 0x7FFFFFFFU, + 0x80000000U, + 0x80400801U, + 0x80A01002U, + 0x80E01803U, + 0x81202805U, + 0x81803006U, + 0x81C03807U, + 0x82004008U, + 0x82604809U, + 0x82A0500AU, + 0x82E0600CU, + 0x8340680DU, + 0x8380700EU, + 0x83C0780FU, + 0x84208010U, + 0x84608811U, + 0x84A09813U, + 0x8500A014U, + 0x8540A815U, + 0x8580B016U, + 0x85E0B817U, + 0x8620C018U, + 0x8660D01AU, + 0x86C0D81BU, + 0x8700E01CU, + 0x8740E81DU, + 0x87A0F01EU, + 0x87E0F81FU, + 0x88210821U, + 0x88811022U, + 0x88C11823U, + 0x89012024U, + 0x89412825U, + 0x89A13026U, + 0x89E14028U, + 0x8A214829U, + 0x8A81502AU, + 0x8AC1582BU, + 0x8B01602CU, + 0x8B61682DU, + 0x8BA1782FU, + 0x8BE18030U, + 0x8C418831U, + 0x8C819032U, + 0x8CC19833U, + 0x8D21A034U, + 0x8D61B036U, + 0x8DA1B837U, + 0x8E01C038U, + 0x8E41C839U, + 0x8E81D03AU, + 0x8EE1D83BU, + 0x8F21E83DU, + 0x8F61F03EU, + 0x8FC1F83FU, + 0x90020040U, + 0x90420841U, + 0x90A21042U, + 0x90E22044U, + 0x91222845U, + 0x91823046U, + 0x91C23847U, + 0x92024048U, + 0x92624849U, + 0x92A2504AU, + 0x92E2604CU, + 0x9342684DU, + 0x9382704EU, + 0x93C2784FU, + 0x94228050U, + 0x94628851U, + 0x94A29853U, + 0x9502A054U, + 0x9542A855U, + 0x9582B056U, + 0x95E2B857U, + 0x9622C058U, + 0x9662D05AU, + 0x96C2D85BU, + 0x9702E05CU, + 0x9742E85DU, + 0x97A2F05EU, + 0x97E2F85FU, + 0x98230861U, + 0x98831062U, + 0x98C31863U, + 0x99032064U, + 0x99632865U, + 0x99A33066U, + 0x99E34068U, + 0x9A434869U, + 0x9A83506AU, + 0x9AC3586BU, + 0x9B23606CU, + 0x9B63686DU, + 0x9BA3786FU, + 0x9BE38070U, + 0x9C438871U, + 0x9C839072U, + 0x9CC39873U, + 0x9D23A074U, + 0x9D63B076U, + 0x9DA3B877U, + 0x9E03C078U, + 0x9E43C879U, + 0x9E83D07AU, + 0x9EE3D87BU, + 0x9F23E87DU, + 0x9F63F07EU, + 0x9FC3F87FU, + 0xA0040080U, + 0xA0440881U, + 0xA0A41082U, + 0xA0E42084U, + 0xA1242885U, + 0xA1843086U, + 0xA1C43887U, + 0xA2044088U, + 0xA2644889U, + 0xA2A4588BU, + 0xA2E4608CU, + 0xA344688DU, + 0xA384708EU, + 0xA3C4788FU, + 0xA4248090U, + 0xA4649092U, + 0xA4A49893U, + 0xA504A094U, + 0xA544A895U, + 0xA584B096U, + 0xA5E4B897U, + 0xA624C098U, + 0xA664D09AU, + 0xA6C4D89BU, + 0xA704E09CU, + 0xA744E89DU, + 0xA7A4F09EU, + 0xA7E4F89FU, + 0xA82508A1U, + 0xA88510A2U, + 0xA8C518A3U, + 0xA90520A4U, + 0xA96528A5U, + 0xA9A530A6U, + 0xA9E540A8U, + 0xAA4548A9U, + 0xAA8550AAU, + 0xAAC558ABU, + 0xAB2560ACU, + 0xAB6568ADU, + 0xABA578AFU, + 0xAC0580B0U, + 0xAC4588B1U, + 0xAC8590B2U, + 0xACE598B3U, + 0xAD25A0B4U, + 0xAD65B0B6U, + 0xADA5B8B7U, + 0xAE05C0B8U, + 0xAE45C8B9U, + 0xAE85D0BAU, + 0xAEE5D8BBU, + 0xAF25E8BDU, + 0xAF65F0BEU, + 0xAFC5F8BFU, + 0xB00600C0U, + 0xB04608C1U, + 0xB0A610C2U, + 0xB0E620C4U, + 0xB12628C5U, + 0xB18630C6U, + 0xB1C638C7U, + 0xB20640C8U, + 0xB26648C9U, + 0xB2A658CBU, + 0xB2E660CCU, + 0xB34668CDU, + 0xB38670CEU, + 0xB3C678CFU, + 0xB42680D0U, + 0xB46690D2U, + 0xB4A698D3U, + 0xB506A0D4U, + 0xB546A8D5U, + 0xB586B0D6U, + 0xB5E6B8D7U, + 0xB626C8D9U, + 0xB666D0DAU, + 0xB6C6D8DBU, + 0xB706E0DCU, + 0xB746E8DDU, + 0xB7A6F0DEU, + 0xB7E6F8DFU, + 0xB82708E1U, + 0xB88710E2U, + 0xB8C718E3U, + 0xB90720E4U, + 0xB96728E5U, + 0xB9A730E6U, + 0xB9E740E8U, + 0xBA4748E9U, + 0xBA8750EAU, + 0xBAC758EBU, + 0xBB2760ECU, + 0xBB6768EDU, + 0xBBA778EFU, + 0xBC0780F0U, + 0xBC4788F1U, + 0xBC8790F2U, + 0xBCE798F3U, + 0xBD27A0F4U, + 0xBD67B0F6U, + 0xBDC7B8F7U, + 0xBE07C0F8U, + 0xBE47C8F9U, + 0xBEA7D0FAU, + 0xBEE7D8FBU, + 0xBF27E8FDU, + 0xBF87F0FEU, + 0xBFC7F8FFU, + 0xC0080100U, + 0xC0480901U, + 0xC0A81102U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + /* u_table */ + 0x0C400103U, + 0x0C200105U, + 0x0C200107U, + 0x0C000109U, + 0x0BE0010BU, + 0x0BC0010DU, + 0x0BA0010FU, + 0x0BA00111U, + 0x0B800113U, + 0x0B600115U, + 0x0B400117U, + 0x0B400119U, + 0x0B20011BU, + 0x0B00011DU, + 0x0AE0011FU, + 0x0AE00121U, + 0x0AC00123U, + 0x0AA00125U, + 0x0A800127U, + 0x0A600129U, + 0x0A60012BU, + 0x0A40012DU, + 0x0A20012FU, + 0x0A000131U, + 0x0A000132U, + 0x09E00134U, + 0x09C00136U, + 0x09A00138U, + 0x09A0013AU, + 0x0980013CU, + 0x0960013EU, + 0x09400140U, + 0x09400142U, + 0x09200144U, + 0x09000146U, + 0x08E00148U, + 0x08C0014AU, + 0x08C0014CU, + 0x08A0014EU, + 0x08800150U, + 0x08600152U, + 0x08600154U, + 0x08400156U, + 0x08200158U, + 0x0800015AU, + 0x0800015CU, + 0x07E0015EU, + 0x07C00160U, + 0x07A00162U, + 0x07A00164U, + 0x07800166U, + 0x07600168U, + 0x0740016AU, + 0x0720016CU, + 0x0720016EU, + 0x07000170U, + 0x06E00172U, + 0x06C00174U, + 0x06C00176U, + 0x06A00178U, + 0x0680017AU, + 0x0660017CU, + 0x0660017EU, + 0x06400180U, + 0x06200182U, + 0x06000184U, + 0x05E00185U, + 0x05E00187U, + 0x05C00189U, + 0x05A0018BU, + 0x0580018DU, + 0x0580018FU, + 0x05600191U, + 0x05400193U, + 0x05200195U, + 0x05200197U, + 0x05000199U, + 0x04E0019BU, + 0x04C0019DU, + 0x04C0019FU, + 0x04A001A1U, + 0x048001A3U, + 0x046001A5U, + 0x044001A7U, + 0x044001A9U, + 0x042001ABU, + 0x040001ADU, + 0x03E001AFU, + 0x03E001B1U, + 0x03C001B3U, + 0x03A001B5U, + 0x038001B7U, + 0x038001B9U, + 0x036001BBU, + 0x034001BDU, + 0x032001BFU, + 0x032001C1U, + 0x030001C3U, + 0x02E001C5U, + 0x02C001C7U, + 0x02A001C9U, + 0x02A001CBU, + 0x028001CDU, + 0x026001CFU, + 0x024001D1U, + 0x024001D3U, + 0x022001D5U, + 0x020001D7U, + 0x01E001D8U, + 0x01E001DAU, + 0x01C001DCU, + 0x01A001DEU, + 0x018001E0U, + 0x016001E2U, + 0x016001E4U, + 0x014001E6U, + 0x012001E8U, + 0x010001EAU, + 0x010001ECU, + 0x00E001EEU, + 0x00C001F0U, + 0x00A001F2U, + 0x00A001F4U, + 0x008001F6U, + 0x006001F8U, + 0x004001FAU, + 0x004001FCU, + 0x002001FEU, + 0x00000200U, + 0xFFE00202U, + 0xFFC00204U, + 0xFFC00206U, + 0xFFA00208U, + 0xFF80020AU, + 0xFF60020CU, + 0xFF60020EU, + 0xFF400210U, + 0xFF200212U, + 0xFF000214U, + 0xFF000216U, + 0xFEE00218U, + 0xFEC0021AU, + 0xFEA0021CU, + 0xFEA0021EU, + 0xFE800220U, + 0xFE600222U, + 0xFE400224U, + 0xFE200226U, + 0xFE200228U, + 0xFE000229U, + 0xFDE0022BU, + 0xFDC0022DU, + 0xFDC0022FU, + 0xFDA00231U, + 0xFD800233U, + 0xFD600235U, + 0xFD600237U, + 0xFD400239U, + 0xFD20023BU, + 0xFD00023DU, + 0xFCE0023FU, + 0xFCE00241U, + 0xFCC00243U, + 0xFCA00245U, + 0xFC800247U, + 0xFC800249U, + 0xFC60024BU, + 0xFC40024DU, + 0xFC20024FU, + 0xFC200251U, + 0xFC000253U, + 0xFBE00255U, + 0xFBC00257U, + 0xFBC00259U, + 0xFBA0025BU, + 0xFB80025DU, + 0xFB60025FU, + 0xFB400261U, + 0xFB400263U, + 0xFB200265U, + 0xFB000267U, + 0xFAE00269U, + 0xFAE0026BU, + 0xFAC0026DU, + 0xFAA0026FU, + 0xFA800271U, + 0xFA800273U, + 0xFA600275U, + 0xFA400277U, + 0xFA200279U, + 0xFA20027BU, + 0xFA00027CU, + 0xF9E0027EU, + 0xF9C00280U, + 0xF9A00282U, + 0xF9A00284U, + 0xF9800286U, + 0xF9600288U, + 0xF940028AU, + 0xF940028CU, + 0xF920028EU, + 0xF9000290U, + 0xF8E00292U, + 0xF8E00294U, + 0xF8C00296U, + 0xF8A00298U, + 0xF880029AU, + 0xF860029CU, + 0xF860029EU, + 0xF84002A0U, + 0xF82002A2U, + 0xF80002A4U, + 0xF80002A6U, + 0xF7E002A8U, + 0xF7C002AAU, + 0xF7A002ACU, + 0xF7A002AEU, + 0xF78002B0U, + 0xF76002B2U, + 0xF74002B4U, + 0xF74002B6U, + 0xF72002B8U, + 0xF70002BAU, + 0xF6E002BCU, + 0xF6C002BEU, + 0xF6C002C0U, + 0xF6A002C2U, + 0xF68002C4U, + 0xF66002C6U, + 0xF66002C8U, + 0xF64002CAU, + 0xF62002CCU, + 0xF60002CEU, + 0xF60002CFU, + 0xF5E002D1U, + 0xF5C002D3U, + 0xF5A002D5U, + 0xF5A002D7U, + 0xF58002D9U, + 0xF56002DBU, + 0xF54002DDU, + 0xF52002DFU, + 0xF52002E1U, + 0xF50002E3U, + 0xF4E002E5U, + 0xF4C002E7U, + 0xF4C002E9U, + 0xF4A002EBU, + 0xF48002EDU, + 0xF46002EFU, + 0xF46002F1U, + 0xF44002F3U, + 0xF42002F5U, + 0xF40002F7U, + 0xF3E002F9U, + 0xF3E002FBU, + /* v_table */ + 0x1A09A000U, + 0x19E9A800U, + 0x19A9B800U, + 0x1969C800U, + 0x1949D000U, + 0x1909E000U, + 0x18C9E800U, + 0x18A9F800U, + 0x186A0000U, + 0x182A1000U, + 0x180A2000U, + 0x17CA2800U, + 0x17AA3800U, + 0x176A4000U, + 0x172A5000U, + 0x170A6000U, + 0x16CA6800U, + 0x168A7800U, + 0x166A8000U, + 0x162A9000U, + 0x160AA000U, + 0x15CAA800U, + 0x158AB800U, + 0x156AC000U, + 0x152AD000U, + 0x14EAE000U, + 0x14CAE800U, + 0x148AF800U, + 0x146B0000U, + 0x142B1000U, + 0x13EB2000U, + 0x13CB2800U, + 0x138B3800U, + 0x134B4000U, + 0x132B5000U, + 0x12EB6000U, + 0x12CB6800U, + 0x128B7800U, + 0x124B8000U, + 0x122B9000U, + 0x11EBA000U, + 0x11ABA800U, + 0x118BB800U, + 0x114BC000U, + 0x112BD000U, + 0x10EBE000U, + 0x10ABE800U, + 0x108BF800U, + 0x104C0000U, + 0x100C1000U, + 0x0FEC2000U, + 0x0FAC2800U, + 0x0F8C3800U, + 0x0F4C4000U, + 0x0F0C5000U, + 0x0EEC5800U, + 0x0EAC6800U, + 0x0E6C7800U, + 0x0E4C8000U, + 0x0E0C9000U, + 0x0DEC9800U, + 0x0DACA800U, + 0x0D6CB800U, + 0x0D4CC000U, + 0x0D0CD000U, + 0x0CCCD800U, + 0x0CACE800U, + 0x0C6CF800U, + 0x0C4D0000U, + 0x0C0D1000U, + 0x0BCD1800U, + 0x0BAD2800U, + 0x0B6D3800U, + 0x0B2D4000U, + 0x0B0D5000U, + 0x0ACD5800U, + 0x0AAD6800U, + 0x0A6D7800U, + 0x0A2D8000U, + 0x0A0D9000U, + 0x09CD9800U, + 0x098DA800U, + 0x096DB800U, + 0x092DC000U, + 0x090DD000U, + 0x08CDD800U, + 0x088DE800U, + 0x086DF800U, + 0x082E0000U, + 0x07EE1000U, + 0x07CE1800U, + 0x078E2800U, + 0x076E3800U, + 0x072E4000U, + 0x06EE5000U, + 0x06CE5800U, + 0x068E6800U, + 0x064E7800U, + 0x062E8000U, + 0x05EE9000U, + 0x05CE9800U, + 0x058EA800U, + 0x054EB800U, + 0x052EC000U, + 0x04EED000U, + 0x04AED800U, + 0x048EE800U, + 0x044EF000U, + 0x042F0000U, + 0x03EF1000U, + 0x03AF1800U, + 0x038F2800U, + 0x034F3000U, + 0x030F4000U, + 0x02EF5000U, + 0x02AF5800U, + 0x028F6800U, + 0x024F7000U, + 0x020F8000U, + 0x01EF9000U, + 0x01AF9800U, + 0x016FA800U, + 0x014FB000U, + 0x010FC000U, + 0x00EFD000U, + 0x00AFD800U, + 0x006FE800U, + 0x004FF000U, + 0x00100000U, + 0xFFD01000U, + 0xFFB01800U, + 0xFF702800U, + 0xFF303000U, + 0xFF104000U, + 0xFED05000U, + 0xFEB05800U, + 0xFE706800U, + 0xFE307000U, + 0xFE108000U, + 0xFDD09000U, + 0xFD909800U, + 0xFD70A800U, + 0xFD30B000U, + 0xFD10C000U, + 0xFCD0D000U, + 0xFC90D800U, + 0xFC70E800U, + 0xFC30F000U, + 0xFBF10000U, + 0xFBD11000U, + 0xFB911800U, + 0xFB712800U, + 0xFB313000U, + 0xFAF14000U, + 0xFAD14800U, + 0xFA915800U, + 0xFA516800U, + 0xFA317000U, + 0xF9F18000U, + 0xF9D18800U, + 0xF9919800U, + 0xF951A800U, + 0xF931B000U, + 0xF8F1C000U, + 0xF8B1C800U, + 0xF891D800U, + 0xF851E800U, + 0xF831F000U, + 0xF7F20000U, + 0xF7B20800U, + 0xF7921800U, + 0xF7522800U, + 0xF7123000U, + 0xF6F24000U, + 0xF6B24800U, + 0xF6925800U, + 0xF6526800U, + 0xF6127000U, + 0xF5F28000U, + 0xF5B28800U, + 0xF5729800U, + 0xF552A800U, + 0xF512B000U, + 0xF4F2C000U, + 0xF4B2C800U, + 0xF472D800U, + 0xF452E800U, + 0xF412F000U, + 0xF3D30000U, + 0xF3B30800U, + 0xF3731800U, + 0xF3532800U, + 0xF3133000U, + 0xF2D34000U, + 0xF2B34800U, + 0xF2735800U, + 0xF2336800U, + 0xF2137000U, + 0xF1D38000U, + 0xF1B38800U, + 0xF1739800U, + 0xF133A800U, + 0xF113B000U, + 0xF0D3C000U, + 0xF093C800U, + 0xF073D800U, + 0xF033E000U, + 0xF013F000U, + 0xEFD40000U, + 0xEF940800U, + 0xEF741800U, + 0xEF342000U, + 0xEEF43000U, + 0xEED44000U, + 0xEE944800U, + 0xEE745800U, + 0xEE346000U, + 0xEDF47000U, + 0xEDD48000U, + 0xED948800U, + 0xED549800U, + 0xED34A000U, + 0xECF4B000U, + 0xECD4C000U, + 0xEC94C800U, + 0xEC54D800U, + 0xEC34E000U, + 0xEBF4F000U, + 0xEBB50000U, + 0xEB950800U, + 0xEB551800U, + 0xEB352000U, + 0xEAF53000U, + 0xEAB54000U, + 0xEA954800U, + 0xEA555800U, + 0xEA156000U, + 0xE9F57000U, + 0xE9B58000U, + 0xE9958800U, + 0xE9559800U, + 0xE915A000U, + 0xE8F5B000U, + 0xE8B5C000U, + 0xE875C800U, + 0xE855D800U, + 0xE815E000U, + 0xE7F5F000U, + 0xE7B60000U, + 0xE7760800U, + 0xE7561800U, + 0xE7162000U, + 0xE6D63000U, + 0xE6B64000U, + 0xE6764800U, + 0xE6365800U +}; + +#define FLAGS 0x40080100 +#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) +#define READY(Y) tables[Y] +#define FIXUP(Y) \ +do { \ + int tmp = (Y) & FLAGS; \ + if (tmp != 0) \ + { \ + tmp -= tmp>>8; \ + (Y) |= tmp; \ + tmp = FLAGS & ~(Y>>1); \ + (Y) += tmp>>8; \ + } \ +} while (0 == 1) + +#define STORE(Y,DSTPTR) \ +do { \ + *(DSTPTR)++ = (Y); \ + *(DSTPTR)++ = (Y)>>22; \ + *(DSTPTR)++ = (Y)>>11; \ + *(DSTPTR)++ = 255; \ +} while (0 == 1) + +void yuv422_2_rgb8888(uint8_t *dst_ptr, + const uint8_t *y_ptr, + const uint8_t *u_ptr, + const uint8_t *v_ptr, + int32_t width, + int32_t height, + int32_t y_span, + int32_t uv_span, + int32_t dst_span, + int32_t dither) +{ + height -= 1; + while (height > 0) + { + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do top row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y0); + FIXUP(y1); + STORE(y0, dst_ptr); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing top row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr,*v_ptr); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-(width>>1); + v_ptr += uv_span-(width>>1); + height = (height<<16)>>16; + height -= 1; + if (height == 0) + break; + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do second row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y0); + FIXUP(y1); + STORE(y0, dst_ptr); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing bottom row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr,*v_ptr); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-(width>>1); + v_ptr += uv_span-(width>>1); + height = (height<<16)>>16; + height -= 1; + } +} + + +#undef FLAGS +#undef READUV +#undef READY +#undef FIXUP +#undef STORE + + +#define FLAGS 0x40080100 +#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) +#define READY(Y) tables[Y] +#define FIXUP(Y) \ +do { \ + int tmp = (Y) & FLAGS; \ + if (tmp != 0) \ + { \ + tmp -= tmp>>8; \ + (Y) |= tmp; \ + tmp = FLAGS & ~(Y>>1); \ + (Y) += tmp>>8; \ + } \ +} while (0 == 1) + +#define STORE(Y,DSTPTR) \ +do { \ + (DSTPTR) = 0xFF000000 | (Y & 0xFF) | (0xFF00 & (Y>>14)) | (0xFF0000 & (Y<<5));\ +} while (0 == 1) + +void yuv420_2_rgb8888(uint8_t *dst_ptr_, + const uint8_t *y_ptr, + const uint8_t *u_ptr, + const uint8_t *v_ptr, + int32_t width, + int32_t height, + int32_t y_span, + int32_t uv_span, + int32_t dst_span, + int32_t dither) +{ + uint32_t *dst_ptr = (uint32_t *)(void *)dst_ptr_; + dst_span >>= 2; + + height -= 1; + while (height > 0) + { + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do 2 column pairs */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(y_ptr[y_span]); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y1, dst_ptr[dst_span]); + STORE(y0, *dst_ptr++); + y1 = uv + READY(y_ptr[y_span]); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y1, dst_ptr[dst_span]); + STORE(y0, *dst_ptr++); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing column pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr,*v_ptr); + y1 = uv + READY(y_ptr[y_span]); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y0, dst_ptr[dst_span]); + STORE(y1, *dst_ptr++); + } + dst_ptr += dst_span*2-width; + y_ptr += y_span*2-width; + u_ptr += uv_span-(width>>1); + v_ptr += uv_span-(width>>1); + height = (height<<16)>>16; + height -= 2; + } + if (height == 0) + { + /* Trail row */ + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do a row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(*y_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y1, *dst_ptr++); + STORE(y0, *dst_ptr++); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, *dst_ptr++); + } + } +} + + + +#undef FLAGS +#undef READUV +#undef READY +#undef FIXUP +#undef STORE + +#define FLAGS 0x40080100 +#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) +#define READY(Y) tables[Y] +#define FIXUP(Y) \ +do { \ + int tmp = (Y) & FLAGS; \ + if (tmp != 0) \ + { \ + tmp -= tmp>>8; \ + (Y) |= tmp; \ + tmp = FLAGS & ~(Y>>1); \ + (Y) += tmp>>8; \ + } \ +} while (0 == 1) + +#define STORE(Y,DSTPTR) \ +do { \ + *(DSTPTR)++ = (Y); \ + *(DSTPTR)++ = (Y)>>22; \ + *(DSTPTR)++ = (Y)>>11; \ + *(DSTPTR)++ = 255; \ +} while (0 == 1) + +void yuv444_2_rgb8888(uint8_t *dst_ptr, + const uint8_t *y_ptr, + const uint8_t *u_ptr, + const uint8_t *v_ptr, + int32_t width, + int32_t height, + int32_t y_span, + int32_t uv_span, + int32_t dst_span, + int32_t dither) +{ + height -= 1; + while (height > 0) + { + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do top row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y1); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing top row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-width; + v_ptr += uv_span-width; + height = (height<<16)>>16; + height -= 1; + if (height == 0) + break; + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do second row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y1); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing bottom row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-width; + v_ptr += uv_span-width; + height = (height<<16)>>16; + height -= 1; + } +} +#endif // YUV2RGB_H diff --git a/platform/bb10/detect.py b/platform/bb10/detect.py index 2860b7f090..5aa68b36dc 100644 --- a/platform/bb10/detect.py +++ b/platform/bb10/detect.py @@ -33,8 +33,7 @@ def get_flags(): return [ ('tools', 'no'), - ('theora', 'no'), - + ('module_theora_enabled', 'no'), ] def configure(env): diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index f80c3b4915..fce680b002 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -26,10 +26,10 @@ def get_flags(): return [ ('tools', 'no'), - ('theora', 'no'), ('musepack', 'no'), ('squish', 'no'), ('etc1', 'no'), + ('module_theora_enabled', 'no'), ] diff --git a/platform/x11/detect.py b/platform/x11/detect.py index 98c2d1e8fd..7c22bd97ff 100644 --- a/platform/x11/detect.py +++ b/platform/x11/detect.py @@ -73,7 +73,6 @@ def get_flags(): ("openssl", "system"), ('freetype','yes'), # use system freetype ('libpng', 'system'), - #("theora","no"), ] @@ -155,16 +154,24 @@ def configure(env): if (env["enet"] == "system"): env.ParseConfig('pkg-config libenet --cflags --libs') - if (env["libogg"] == "system"): - env.ParseConfig('pkg-config ogg --cflags --libs') + # Sound and video libraries + # Keep the order as it triggers chained dependencies (ogg needed by others, etc.) + + if (env["libtheora"] == "system"): + env["libogg"] = "system" # Needed to link against system libtheora + env["libvorbis"] = "system" # Needed to link against system libtheora + env.ParseConfig('pkg-config theora theoradec --cflags --libs') if (env["libvorbis"] == "system"): env["libogg"] = "system" # Needed to link against system libvorbis - env.ParseConfig('pkg-config vorbis vorbisfile ogg --cflags --libs') + env.ParseConfig('pkg-config vorbis vorbisfile --cflags --libs') if (env["opus"] == "system"): env["libogg"] = "system" # Needed to link against system opus - env.ParseConfig('pkg-config opus opusfile ogg --cflags --libs') + env.ParseConfig('pkg-config opus opusfile --cflags --libs') + + if (env["libogg"] == "system"): + env.ParseConfig('pkg-config ogg --cflags --libs') env.Append(CPPFLAGS=['-DOPENGL_ENABLED']) diff --git a/thirdparty/README.md b/thirdparty/README.md index a8c2c7df95..a567eb177d 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -132,3 +132,16 @@ Files extracted from upstream source: Files extracted from upstream source: - all of them: rg_etc1.{cpp,h} + + +## theora + +- Upstream: https://www.theora.org +- Version: 1.1.1 +- License: BSD-3-Clause + +Files extracted from upstream source: + +- all .c, .h in lib/ +- all .h files in include/theora/ as theora/ +- COPYING and LICENSE diff --git a/thirdparty/libtheora/COPYING b/thirdparty/libtheora/COPYING new file mode 100644 index 0000000000..c8ccce4ffb --- /dev/null +++ b/thirdparty/libtheora/COPYING @@ -0,0 +1,28 @@ +Copyright (C) 2002-2009 Xiph.org Foundation + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +- Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +- Neither the name of the Xiph.org Foundation nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/libtheora/LICENSE b/thirdparty/libtheora/LICENSE new file mode 100644 index 0000000000..5e5ec08469 --- /dev/null +++ b/thirdparty/libtheora/LICENSE @@ -0,0 +1,18 @@ +Please see the file COPYING for the copyright license for this software. + +In addition to and irrespective of the copyright license associated +with this software, On2 Technologies, Inc. makes the following statement +regarding technology used in this software: + + On2 represents and warrants that it shall not assert any rights + relating to infringement of On2's registered patents, nor initiate + any litigation asserting such rights, against any person who, or + entity which utilizes the On2 VP3 Codec Software, including any + use, distribution, and sale of said Software; which make changes, + modifications, and improvements in said Software; and to use, + distribute, and sell said changes as well as applications for other + fields of use. + +This reference implementation is originally derived from the On2 VP3 +Codec Software, and the Theora video format is essentially compatible +with the VP3 video format, consisting of a backward-compatible superset. diff --git a/thirdparty/libtheora/analyze.c b/thirdparty/libtheora/analyze.c new file mode 100644 index 0000000000..af01b60dff --- /dev/null +++ b/thirdparty/libtheora/analyze.c @@ -0,0 +1,2709 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: mode selection code + last mod: $Id$ + + ********************************************************************/ +#include +#include +#include "encint.h" +#include "modedec.h" + + + +typedef struct oc_fr_state oc_fr_state; +typedef struct oc_qii_state oc_qii_state; +typedef struct oc_enc_pipeline_state oc_enc_pipeline_state; +typedef struct oc_rd_metric oc_rd_metric; +typedef struct oc_mode_choice oc_mode_choice; + + + +/*There are 8 possible schemes used to encode macro block modes. + Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes. + The same set of Huffman codes is used for each of these 7 schemes, but the + mode assigned to each codeword varies. + Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream, + while schemes 1-6 have a fixed mapping. + Scheme 7 just encodes each mode directly in 3 bits.*/ + +/*The mode orderings for the various mode coding schemes. + Scheme 0 uses a custom alphabet, which is not stored in this table. + This is the inverse of the equivalent table OC_MODE_ALPHABETS in the + decoder.*/ +static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={ + /*Last MV dominates.*/ + /*L P M N I G GM 4*/ + {3,4,2,0,1,5,6,7}, + /*L P N M I G GM 4*/ + {2,4,3,0,1,5,6,7}, + /*L M P N I G GM 4*/ + {3,4,1,0,2,5,6,7}, + /*L M N P I G GM 4*/ + {2,4,1,0,3,5,6,7}, + /*No MV dominates.*/ + /*N L P M I G GM 4*/ + {0,4,3,1,2,5,6,7}, + /*N G L P M I GM 4*/ + {0,5,4,2,3,1,6,7}, + /*Default ordering.*/ + /*N I M L P G GM 4*/ + {0,1,2,3,4,5,6,7} +}; + + + +/*Initialize the mode scheme chooser. + This need only be called once per encoder.*/ +void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){ + int si; + _chooser->mode_ranks[0]=_chooser->scheme0_ranks; + for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1]; +} + +/*Reset the mode scheme chooser. + This needs to be called once for each frame, including the first.*/ +static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){ + int si; + memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts)); + /*Scheme 0 starts with 24 bits to store the mode list in.*/ + _chooser->scheme_bits[0]=24; + memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits)); + for(si=0;si<8;si++){ + /*Scheme 7 should always start first, and scheme 0 should always start + last.*/ + _chooser->scheme_list[si]=7-si; + _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si; + } +} + + +/*This is the real purpose of this data structure: not actually selecting a + mode scheme, but estimating the cost of coding a given mode given all the + modes selected so far. + This is done via opportunity cost: the cost is defined as the number of bits + required to encode all the modes selected so far including the current one + using the best possible scheme, minus the number of bits required to encode + all the modes selected so far not including the current one using the best + possible scheme. + The computational expense of doing this probably makes it overkill. + Just be happy we take a greedy approach instead of trying to solve the + global mode-selection problem (which is NP-hard). + _mb_mode: The mode to determine the cost of. + Return: The number of bits required to code this mode.*/ +static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser, + int _mb_mode){ + int scheme0; + int scheme1; + int best_bits; + int mode_bits; + int si; + int scheme_bits; + scheme0=_chooser->scheme_list[0]; + scheme1=_chooser->scheme_list[1]; + best_bits=_chooser->scheme_bits[scheme0]; + mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]]; + /*Typical case: If the difference between the best scheme and the next best + is greater than 6 bits, then adding just one mode cannot change which + scheme we use.*/ + if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits; + /*Otherwise, check to see if adding this mode selects a different scheme as + the best.*/ + si=1; + best_bits+=mode_bits; + do{ + /*For any scheme except 0, we can just use the bit cost of the mode's rank + in that scheme.*/ + if(scheme1!=0){ + scheme_bits=_chooser->scheme_bits[scheme1]+ + OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]]; + } + else{ + int ri; + /*For scheme 0, incrementing the mode count could potentially change the + mode's rank. + Find the index where the mode would be moved to in the optimal list, + and use its bit cost instead of the one for the mode's current + position in the list.*/ + /*We don't recompute scheme bits; this is computing opportunity cost, not + an update.*/ + for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&& + _chooser->mode_counts[_mb_mode]>= + _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--); + scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri]; + } + if(scheme_bits=8)break; + scheme1=_chooser->scheme_list[si]; + } + while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6); + return best_bits-_chooser->scheme_bits[scheme0]; +} + +/*Incrementally update the mode counts and per-scheme bit counts and re-order + the scheme lists once a mode has been selected. + _mb_mode: The mode that was chosen.*/ +static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser, + int _mb_mode){ + int ri; + int si; + _chooser->mode_counts[_mb_mode]++; + /*Re-order the scheme0 mode list if necessary.*/ + for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){ + int pmode; + pmode=_chooser->scheme0_list[ri-1]; + if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break; + /*Reorder the mode ranking.*/ + _chooser->scheme0_ranks[pmode]++; + _chooser->scheme0_list[ri]=pmode; + } + _chooser->scheme0_ranks[_mb_mode]=ri; + _chooser->scheme0_list[ri]=_mb_mode; + /*Now add the bit cost for the mode to each scheme.*/ + for(si=0;si<8;si++){ + _chooser->scheme_bits[si]+= + OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]]; + } + /*Finally, re-order the list of schemes.*/ + for(si=1;si<8;si++){ + int sj; + int scheme0; + int bits0; + sj=si; + scheme0=_chooser->scheme_list[si]; + bits0=_chooser->scheme_bits[scheme0]; + do{ + int scheme1; + scheme1=_chooser->scheme_list[sj-1]; + if(bits0>=_chooser->scheme_bits[scheme1])break; + _chooser->scheme_list[sj]=scheme1; + } + while(--sj>0); + _chooser->scheme_list[sj]=scheme0; + } +} + + + +/*The number of bits required to encode a super block run. + _run_count: The desired run count; must be positive and less than 4130.*/ +static int oc_sb_run_bits(int _run_count){ + int i; + for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++); + return OC_SB_RUN_CODE_NBITS[i]; +} + +/*The number of bits required to encode a block run. + _run_count: The desired run count; must be positive and less than 30.*/ +static int oc_block_run_bits(int _run_count){ + return OC_BLOCK_RUN_CODE_NBITS[_run_count-1]; +} + + + +/*State to track coded block flags and their bit cost.*/ +struct oc_fr_state{ + ptrdiff_t bits; + unsigned sb_partial_count:16; + unsigned sb_full_count:16; + unsigned b_coded_count_prev:8; + unsigned b_coded_count:8; + unsigned b_count:8; + signed int sb_partial:2; + signed int sb_full:2; + signed int b_coded_prev:2; + signed int b_coded:2; +}; + + + +static void oc_fr_state_init(oc_fr_state *_fr){ + _fr->bits=0; + _fr->sb_partial_count=0; + _fr->sb_full_count=0; + _fr->b_coded_count_prev=0; + _fr->b_coded_count=0; + _fr->b_count=0; + _fr->sb_partial=-1; + _fr->sb_full=-1; + _fr->b_coded_prev=-1; + _fr->b_coded=-1; +} + + +static void oc_fr_state_advance_sb(oc_fr_state *_fr, + int _sb_partial,int _sb_full){ + ptrdiff_t bits; + int sb_partial_count; + int sb_full_count; + bits=_fr->bits; + /*Extend the sb_partial run, or start a new one.*/ + sb_partial_count=_fr->sb_partial; + if(_fr->sb_partial==_sb_partial){ + if(sb_partial_count>=4129){ + bits++; + sb_partial_count=0; + } + else bits-=oc_sb_run_bits(sb_partial_count); + } + else sb_partial_count=0; + sb_partial_count++; + bits+=oc_sb_run_bits(sb_partial_count); + if(!_sb_partial){ + /*Extend the sb_full run, or start a new one.*/ + sb_full_count=_fr->sb_full_count; + if(_fr->sb_full==_sb_full){ + if(sb_full_count>=4129){ + bits++; + sb_full_count=0; + } + else bits-=oc_sb_run_bits(sb_full_count); + } + else sb_full_count=0; + sb_full_count++; + bits+=oc_sb_run_bits(sb_full_count); + _fr->sb_full=_sb_full; + _fr->sb_full_count=sb_full_count; + } + _fr->bits=bits; + _fr->sb_partial=_sb_partial; + _fr->sb_partial_count=sb_partial_count; +} + +/*Flush any outstanding block flags for a SB (e.g., one with fewer than 16 + blocks).*/ +static void oc_fr_state_flush_sb(oc_fr_state *_fr){ + ptrdiff_t bits; + int sb_partial; + int sb_full=sb_full; + int b_coded_count; + int b_coded; + int b_count; + b_count=_fr->b_count; + if(b_count>0){ + bits=_fr->bits; + b_coded=_fr->b_coded; + b_coded_count=_fr->b_coded_count; + if(b_coded_count>=b_count){ + /*This SB was fully coded/uncoded; roll back the partial block flags.*/ + bits-=oc_block_run_bits(b_coded_count); + if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count); + sb_partial=0; + sb_full=b_coded; + b_coded=_fr->b_coded_prev; + b_coded_count=_fr->b_coded_count_prev; + } + else{ + /*It was partially coded.*/ + sb_partial=1; + /*sb_full is unused.*/ + } + _fr->bits=bits; + _fr->b_coded_count=b_coded_count; + _fr->b_coded_count_prev=b_coded_count; + _fr->b_count=0; + _fr->b_coded=b_coded; + _fr->b_coded_prev=b_coded; + oc_fr_state_advance_sb(_fr,sb_partial,sb_full); + } +} + +static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){ + ptrdiff_t bits; + int b_coded_count; + int b_count; + int sb_partial; + int sb_full=sb_full; + bits=_fr->bits; + /*Extend the b_coded run, or start a new one.*/ + b_coded_count=_fr->b_coded_count; + if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count); + else b_coded_count=0; + b_coded_count++; + b_count=_fr->b_count+1; + if(b_count>=16){ + /*We finished a superblock.*/ + if(b_coded_count>=16){ + /*It was fully coded/uncoded; roll back the partial block flags.*/ + if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16); + sb_partial=0; + sb_full=_b_coded; + _b_coded=_fr->b_coded_prev; + b_coded_count=_fr->b_coded_count_prev; + } + else{ + bits+=oc_block_run_bits(b_coded_count); + /*It was partially coded.*/ + sb_partial=1; + /*sb_full is unused.*/ + } + _fr->bits=bits; + _fr->b_coded_count=b_coded_count; + _fr->b_coded_count_prev=b_coded_count; + _fr->b_count=0; + _fr->b_coded=_b_coded; + _fr->b_coded_prev=_b_coded; + oc_fr_state_advance_sb(_fr,sb_partial,sb_full); + } + else{ + bits+=oc_block_run_bits(b_coded_count); + _fr->bits=bits; + _fr->b_coded_count=b_coded_count; + _fr->b_count=b_count; + _fr->b_coded=_b_coded; + } +} + +static void oc_fr_skip_block(oc_fr_state *_fr){ + oc_fr_state_advance_block(_fr,0); +} + +static void oc_fr_code_block(oc_fr_state *_fr){ + oc_fr_state_advance_block(_fr,1); +} + +static int oc_fr_cost1(const oc_fr_state *_fr){ + oc_fr_state tmp; + ptrdiff_t bits; + *&tmp=*_fr; + oc_fr_skip_block(&tmp); + bits=tmp.bits; + *&tmp=*_fr; + oc_fr_code_block(&tmp); + return (int)(tmp.bits-bits); +} + +static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){ + oc_fr_state tmp; + *&tmp=*_pre; + oc_fr_skip_block(&tmp); + oc_fr_skip_block(&tmp); + oc_fr_skip_block(&tmp); + oc_fr_skip_block(&tmp); + return (int)(_post->bits-tmp.bits); +} + + + +struct oc_qii_state{ + ptrdiff_t bits; + unsigned qi01_count:14; + signed int qi01:2; + unsigned qi12_count:14; + signed int qi12:2; +}; + + + +static void oc_qii_state_init(oc_qii_state *_qs){ + _qs->bits=0; + _qs->qi01_count=0; + _qs->qi01=-1; + _qs->qi12_count=0; + _qs->qi12=-1; +} + + +static void oc_qii_state_advance(oc_qii_state *_qd, + const oc_qii_state *_qs,int _qii){ + ptrdiff_t bits; + int qi01; + int qi01_count; + int qi12; + int qi12_count; + bits=_qs->bits; + qi01=_qii+1>>1; + qi01_count=_qs->qi01_count; + if(qi01==_qs->qi01){ + if(qi01_count>=4129){ + bits++; + qi01_count=0; + } + else bits-=oc_sb_run_bits(qi01_count); + } + else qi01_count=0; + qi01_count++; + bits+=oc_sb_run_bits(qi01_count); + qi12_count=_qs->qi12_count; + if(_qii){ + qi12=_qii>>1; + if(qi12==_qs->qi12){ + if(qi12_count>=4129){ + bits++; + qi12_count=0; + } + else bits-=oc_sb_run_bits(qi12_count); + } + else qi12_count=0; + qi12_count++; + bits+=oc_sb_run_bits(qi12_count); + } + else qi12=_qs->qi12; + _qd->bits=bits; + _qd->qi01=qi01; + _qd->qi01_count=qi01_count; + _qd->qi12=qi12; + _qd->qi12_count=qi12_count; +} + + + +/*Temporary encoder state for the analysis pipeline.*/ +struct oc_enc_pipeline_state{ + int bounding_values[256]; + oc_fr_state fr[3]; + oc_qii_state qs[3]; + /*Condensed dequantization tables.*/ + const ogg_uint16_t *dequant[3][3][2]; + /*Condensed quantization tables.*/ + const oc_iquant *enquant[3][3][2]; + /*Skip SSD storage for the current MCU in each plane.*/ + unsigned *skip_ssd[3]; + /*Coded/uncoded fragment lists for each plane for the current MCU.*/ + ptrdiff_t *coded_fragis[3]; + ptrdiff_t *uncoded_fragis[3]; + ptrdiff_t ncoded_fragis[3]; + ptrdiff_t nuncoded_fragis[3]; + /*The starting fragment for the current MCU in each plane.*/ + ptrdiff_t froffset[3]; + /*The starting row for the current MCU in each plane.*/ + int fragy0[3]; + /*The ending row for the current MCU in each plane.*/ + int fragy_end[3]; + /*The starting superblock for the current MCU in each plane.*/ + unsigned sbi0[3]; + /*The ending superblock for the current MCU in each plane.*/ + unsigned sbi_end[3]; + /*The number of tokens for zzi=1 for each color plane.*/ + int ndct_tokens1[3]; + /*The outstanding eob_run count for zzi=1 for each color plane.*/ + int eob_run1[3]; + /*Whether or not the loop filter is enabled.*/ + int loop_filter; +}; + + +static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){ + ptrdiff_t *coded_fragis; + unsigned mcu_nvsbs; + ptrdiff_t mcu_nfrags; + int hdec; + int vdec; + int pli; + int qii; + int qti; + /*Initialize the per-plane coded block flag trackers. + These are used for bit-estimation purposes only; the real flag bits span + all three planes, so we can't compute them in parallel.*/ + for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli); + for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli); + /*Set up the per-plane skip SSD storage pointers.*/ + mcu_nvsbs=_enc->mcu_nvsbs; + mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16; + hdec=!(_enc->state.info.pixel_fmt&1); + vdec=!(_enc->state.info.pixel_fmt&2); + _pipe->skip_ssd[0]=_enc->mcu_skip_ssd; + _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags; + _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec); + /*Set up per-plane pointers to the coded and uncoded fragments lists. + Unlike the decoder, each planes' coded and uncoded fragment list is kept + separate during the analysis stage; we only make the coded list for all + three planes contiguous right before the final packet is output + (destroying the uncoded lists, which are no longer needed).*/ + coded_fragis=_enc->state.coded_fragis; + for(pli=0;pli<3;pli++){ + _pipe->coded_fragis[pli]=coded_fragis; + coded_fragis+=_enc->state.fplanes[pli].nfrags; + _pipe->uncoded_fragis[pli]=coded_fragis; + } + memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis)); + memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis)); + /*Set up condensed quantizer tables.*/ + for(pli=0;pli<3;pli++){ + for(qii=0;qii<_enc->state.nqis;qii++){ + int qi; + qi=_enc->state.qis[qii]; + for(qti=0;qti<2;qti++){ + _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti]; + _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti]; + } + } + } + /*Initialize the tokenization state.*/ + for(pli=0;pli<3;pli++){ + _pipe->ndct_tokens1[pli]=0; + _pipe->eob_run1[pli]=0; + } + /*Initialize the bounding value array for the loop filter.*/ + _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state, + _pipe->bounding_values); +} + +/*Sets the current MCU stripe to super block row _sby. + Return: A non-zero value if this was the last MCU.*/ +static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _sby){ + const oc_fragment_plane *fplane; + unsigned mcu_nvsbs; + int sby_end; + int notdone; + int vdec; + int pli; + mcu_nvsbs=_enc->mcu_nvsbs; + sby_end=_enc->state.fplanes[0].nvsbs; + notdone=_sby+mcu_nvsbsstate.fplanes+pli; + _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs; + _pipe->fragy0[pli]=_sby<<2-vdec; + _pipe->froffset[pli]=fplane->froffset + +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags; + if(notdone){ + _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs; + _pipe->fragy_end[pli]=sby_end<<2-vdec; + } + else{ + _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs; + _pipe->fragy_end[pli]=fplane->nvfrags; + } + vdec=!(_enc->state.info.pixel_fmt&2); + } + return notdone; +} + +static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){ + int refi; + /*Copy over all the uncoded fragments from this plane and advance the uncoded + fragment list.*/ + _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; + oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli], + _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); + _pipe->nuncoded_fragis[_pli]=0; + /*Perform DC prediction.*/ + oc_enc_pred_dc_frag_rows(_enc,_pli, + _pipe->fragy0[_pli],_pipe->fragy_end[_pli]); + /*Finish DC tokenization.*/ + oc_enc_tokenize_dc_frag_list(_enc,_pli, + _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli], + _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]); + _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1]; + _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1]; + /*And advance the coded fragment list.*/ + _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli]; + _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli]; + _pipe->ncoded_fragis[_pli]=0; + /*Apply the loop filter if necessary.*/ + refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; + if(_pipe->loop_filter){ + oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values, + refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay); + } + else _sdelay=_edelay=0; + /*To fill borders, we have an additional two pixel delay, since a fragment + in the next row could filter its top edge, using two pixels from a + fragment in this row. + But there's no reason to delay a full fragment between the two.*/ + oc_state_borders_fill_rows(&_enc->state,refi,_pli, + (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1), + (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1)); +} + + + +/*Cost information about the coded blocks in a MB.*/ +struct oc_rd_metric{ + int uncoded_ac_ssd; + int coded_ac_ssd; + int ac_bits; + int dc_flag; +}; + + + +static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits, + oc_rd_metric *_mo,oc_token_checkpoint **_stack){ + OC_ALIGN16(ogg_int16_t dct[64]); + OC_ALIGN16(ogg_int16_t data[64]); + ogg_uint16_t dc_dequant; + const ogg_uint16_t *dequant; + const oc_iquant *enquant; + ptrdiff_t frag_offs; + int ystride; + const unsigned char *src; + const unsigned char *ref; + unsigned char *dst; + int frame_type; + int nonzero; + unsigned uncoded_ssd; + unsigned coded_ssd; + int coded_dc; + oc_token_checkpoint *checkpoint; + oc_fragment *frags; + int mb_mode; + int mv_offs[2]; + int nmv_offs; + int ac_bits; + int borderi; + int qti; + int qii; + int pi; + int zzi; + int v; + int val; + int d; + int s; + int dc; + frags=_enc->state.frags; + frag_offs=_enc->state.frag_buf_offs[_fragi]; + ystride=_enc->state.ref_ystride[_pli]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs; + borderi=frags[_fragi].borderi; + qii=frags[_fragi].qii; + if(qii&~3){ +#if !defined(OC_COLLECT_METRICS) + if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){ + /*Enable early skip detection.*/ + frags[_fragi].coded=0; + return 0; + } +#endif + /*Try and code this block anyway.*/ + qii&=3; + frags[_fragi].qii=qii; + } + mb_mode=frags[_fragi].mb_mode; + ref=_enc->state.ref_frame_data[ + _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs; + dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]] + +frag_offs; + /*Motion compensation:*/ + switch(mb_mode){ + case OC_MODE_INTRA:{ + nmv_offs=0; + oc_enc_frag_sub_128(_enc,data,src,ystride); + }break; + case OC_MODE_GOLDEN_NOMV: + case OC_MODE_INTER_NOMV:{ + nmv_offs=1; + mv_offs[0]=0; + oc_enc_frag_sub(_enc,data,src,ref,ystride); + }break; + default:{ + const oc_mv *frag_mvs; + frag_mvs=(const oc_mv *)_enc->state.frag_mvs; + nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli, + frag_mvs[_fragi][0],frag_mvs[_fragi][1]); + if(nmv_offs>1){ + oc_enc_frag_copy2(_enc,dst, + ref+mv_offs[0],ref+mv_offs[1],ystride); + oc_enc_frag_sub(_enc,data,src,dst,ystride); + } + else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride); + }break; + } +#if defined(OC_COLLECT_METRICS) + { + unsigned satd; + switch(nmv_offs){ + case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break; + case 1:{ + satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX); + }break; + default:{ + satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX); + } + } + _enc->frag_satd[_fragi]=satd; + } +#endif + /*Transform:*/ + oc_enc_fdct8x8(_enc,dct,data); + /*Quantize the DC coefficient:*/ + qti=mb_mode!=OC_MODE_INTRA; + enquant=_pipe->enquant[_pli][0][qti]; + dc_dequant=_pipe->dequant[_pli][0][qti][0]; + v=dct[0]; + val=v<<1; + s=OC_SIGNMASK(val); + val+=dc_dequant+s^s; + val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s; + dc=OC_CLAMPI(-580,val,580); + nonzero=0; + /*Quantize the AC coefficients:*/ + dequant=_pipe->dequant[_pli][qii][qti]; + enquant=_pipe->enquant[_pli][qii][qti]; + for(zzi=1;zzi<64;zzi++){ + v=dct[OC_FZIG_ZAG[zzi]]; + d=dequant[zzi]; + val=v<<1; + v=abs(val); + if(v>=d){ + s=OC_SIGNMASK(val); + /*The bias added here rounds ties away from zero, since token + optimization can only decrease the magnitude of the quantized + value.*/ + val+=d+s^s; + /*Note the arithmetic right shift is not guaranteed by ANSI C. + Hopefully no one still uses ones-complement architectures.*/ + val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s; + data[zzi]=OC_CLAMPI(-580,val,580); + nonzero=zzi; + } + else data[zzi]=0; + } + /*Tokenize.*/ + checkpoint=*_stack; + ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1, + _stack,qti?0:3); + /*Reconstruct. + TODO: nonzero may need to be adjusted after tokenization.*/ + if(nonzero==0){ + ogg_int16_t p; + int ci; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5); + /*LOOP VECTORIZES.*/ + for(ci=0;ci<64;ci++)data[ci]=p; + } + else{ + data[0]=dc*dc_dequant; + oc_idct8x8(&_enc->state,data,nonzero+1); + } + if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data); + else{ + oc_enc_frag_recon_inter(_enc,dst, + nmv_offs==1?ref+mv_offs[0]:dst,ystride,data); + } + frame_type=_enc->state.frame_type; +#if !defined(OC_COLLECT_METRICS) + if(frame_type!=OC_INTRA_FRAME) +#endif + { + /*In retrospect, should we have skipped this block?*/ + oc_enc_frag_sub(_enc,data,src,dst,ystride); + coded_ssd=coded_dc=0; + if(borderi<0){ + for(pi=0;pi<64;pi++){ + coded_ssd+=data[pi]*data[pi]; + coded_dc+=data[pi]; + } + } + else{ + ogg_int64_t mask; + mask=_enc->state.borders[borderi].mask; + for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ + coded_ssd+=data[pi]*data[pi]; + coded_dc+=data[pi]; + } + } + /*Scale to match DCT domain.*/ + coded_ssd<<=4; + /*We actually only want the AC contribution to the SSD.*/ + coded_ssd-=coded_dc*coded_dc>>2; +#if defined(OC_COLLECT_METRICS) + _enc->frag_ssd[_fragi]=coded_ssd; + } + if(frame_type!=OC_INTRA_FRAME){ +#endif + uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]]; + if(uncoded_ssdlambda&& + /*Don't allow luma blocks to be skipped in 4MV mode when VP3 + compatibility is enabled.*/ + (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){ + /*Hm, not worth it; roll back.*/ + oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint); + *_stack=checkpoint; + frags[_fragi].coded=0; + return 0; + } + } + else _mo->dc_flag=1; + _mo->uncoded_ac_ssd+=uncoded_ssd; + _mo->coded_ac_ssd+=coded_ssd; + _mo->ac_bits+=ac_bits; + } + oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii); + frags[_fragi].dc=dc; + frags[_fragi].coded=1; + return 1; +} + +static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){ + /*Worst case token stack usage for 4 fragments.*/ + oc_token_checkpoint stack[64*4]; + oc_token_checkpoint *stackptr; + const oc_sb_map *sb_maps; + signed char *mb_modes; + oc_fragment *frags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t *uncoded_fragis; + ptrdiff_t nuncoded_fragis; + oc_rd_metric mo; + oc_fr_state fr_checkpoint; + oc_qii_state qs_checkpoint; + int mb_mode; + int ncoded; + ptrdiff_t fragi; + int bi; + *&fr_checkpoint=*(_pipe->fr+0); + *&qs_checkpoint=*(_pipe->qs+0); + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + mb_modes=_enc->state.mb_modes; + frags=_enc->state.frags; + coded_fragis=_pipe->coded_fragis[0]; + ncoded_fragis=_pipe->ncoded_fragis[0]; + uncoded_fragis=_pipe->uncoded_fragis[0]; + nuncoded_fragis=_pipe->nuncoded_fragis[0]; + mb_mode=mb_modes[_mbi]; + ncoded=0; + stackptr=stack; + memset(&mo,0,sizeof(mo)); + for(bi=0;bi<4;bi++){ + fragi=sb_maps[_mbi>>2][_mbi&3][bi]; + frags[fragi].mb_mode=mb_mode; + if(oc_enc_block_transform_quantize(_enc, + _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){ + oc_fr_code_block(_pipe->fr+0); + coded_fragis[ncoded_fragis++]=fragi; + ncoded++; + } + else{ + *(uncoded_fragis-++nuncoded_fragis)=fragi; + oc_fr_skip_block(_pipe->fr+0); + } + } + if(_enc->state.frame_type!=OC_INTRA_FRAME){ + if(ncoded>0&&!mo.dc_flag){ + int cost; + /*Some individual blocks were worth coding. + See if that's still true when accounting for mode and MV overhead.*/ + cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits + +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead); + if(mo.uncoded_ac_ssd<=cost){ + /*Taking macroblock overhead into account, it is not worth coding this + MB.*/ + oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack); + *(_pipe->fr+0)=*&fr_checkpoint; + *(_pipe->qs+0)=*&qs_checkpoint; + for(bi=0;bi<4;bi++){ + fragi=sb_maps[_mbi>>2][_mbi&3][bi]; + if(frags[fragi].coded){ + *(uncoded_fragis-++nuncoded_fragis)=fragi; + frags[fragi].coded=0; + } + oc_fr_skip_block(_pipe->fr+0); + } + ncoded_fragis-=ncoded; + ncoded=0; + } + } + /*If no luma blocks coded, the mode is forced.*/ + if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV; + /*Assume that a 1MV with a single coded block is always cheaper than a 4MV + with a single coded block. + This may not be strictly true: a 4MV computes chroma MVs using (0,0) for + skipped blocks, while a 1MV does not.*/ + else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){ + mb_modes[_mbi]=OC_MODE_INTER_MV; + } + } + _pipe->ncoded_fragis[0]=ncoded_fragis; + _pipe->nuncoded_fragis[0]=nuncoded_fragis; + return ncoded; +} + +static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){ + const oc_sb_map *sb_maps; + oc_sb_flags *sb_flags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t *uncoded_fragis; + ptrdiff_t nuncoded_fragis; + int sbi; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + sb_flags=_enc->state.sb_flags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + uncoded_fragis=_pipe->uncoded_fragis[_pli]; + nuncoded_fragis=_pipe->nuncoded_fragis[_pli]; + for(sbi=_sbi_start;sbi<_sbi_end;sbi++){ + /*Worst case token stack usage for 1 fragment.*/ + oc_token_checkpoint stack[64]; + oc_rd_metric mo; + int quadi; + int bi; + memset(&mo,0,sizeof(mo)); + for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + fragi=sb_maps[sbi][quadi][bi]; + if(fragi>=0){ + oc_token_checkpoint *stackptr; + stackptr=stack; + if(oc_enc_block_transform_quantize(_enc, + _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){ + coded_fragis[ncoded_fragis++]=fragi; + oc_fr_code_block(_pipe->fr+_pli); + } + else{ + *(uncoded_fragis-++nuncoded_fragis)=fragi; + oc_fr_skip_block(_pipe->fr+_pli); + } + } + } + oc_fr_state_flush_sb(_pipe->fr+_pli); + sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full; + sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial; + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; + _pipe->nuncoded_fragis[_pli]=nuncoded_fragis; +} + +/*Mode decision is done by exhaustively examining all potential choices. + Obviously, doing the motion compensation, fDCT, tokenization, and then + counting the bits each token uses is computationally expensive. + Theora's EOB runs can also split the cost of these tokens across multiple + fragments, and naturally we don't know what the optimal choice of Huffman + codes will be until we know all the tokens we're going to encode in all the + fragments. + So we use a simple approach to estimating the bit cost and distortion of each + mode based upon the SATD value of the residual before coding. + The mathematics behind the technique are outlined by Kim \cite{Kim03}, but + the process (modified somewhat from that of the paper) is very simple. + We build a non-linear regression of the mappings from + (pre-transform+quantization) SATD to (post-transform+quantization) bits and + SSD for each qi. + A separate set of mappings is kept for each quantization type and color + plane. + The mappings are constructed by partitioning the SATD values into a small + number of bins (currently 24) and using a linear regression in each bin + (as opposed to the 0th-order regression used by Kim). + The bit counts and SSD measurements are obtained by examining actual encoded + frames, with appropriate lambda values and optimal Huffman codes selected. + EOB bits are assigned to the fragment that started the EOB run (as opposed to + dividing them among all the blocks in the run; though the latter approach + seems more theoretically correct, Monty's testing showed a small improvement + with the former, though that may have been merely statistical noise). + + @ARTICLE{Kim03, + author="Hyun Mun Kim", + title="Adaptive Rate Control Using Nonlinear Regression", + journal="IEEE Transactions on Circuits and Systems for Video Technology", + volume=13, + number=5, + pages="432--439", + month=May, + year=2003 + }*/ + +/*Computes (_ssd+_lambda*_rate)/(1<>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \ + +(((_ssd)&(1<>1)>>OC_BIT_SCALE) + +/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after + prediction.*/ +static unsigned oc_dct_cost2(unsigned *_ssd, + int _qi,int _pli,int _qti,int _satd){ + unsigned rmse; + int bin; + int dx; + int y0; + int z0; + int dy; + int dz; + /*SATD metrics for chroma planes vary much less than luma, so we scale them + by 4 to distribute them into the mode decision bins more evenly.*/ + _satd<<=_pli+1&2; + bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2); + dx=_satd-(bin<>OC_SAD_SHIFT),0); + *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE; + return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0); +} + +/*Select luma block-level quantizers for a MB in an INTRA frame.*/ +static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc, + const oc_qii_state *_qs,unsigned _mbi){ + const unsigned char *src; + const ptrdiff_t *frag_buf_offs; + const oc_sb_map *sb_maps; + oc_fragment *frags; + ptrdiff_t frag_offs; + ptrdiff_t fragi; + oc_qii_state qs[4][3]; + unsigned cost[4][3]; + unsigned ssd[4][3]; + unsigned rate[4][3]; + int prev[3][3]; + unsigned satd; + unsigned best_cost; + unsigned best_ssd; + unsigned best_rate; + int best_qii; + int qii; + int lambda; + int ystride; + int nqis; + int bi; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ystride=_enc->state.ref_ystride[0]; + fragi=sb_maps[_mbi>>2][_mbi&3][0]; + frag_offs=frag_buf_offs[fragi]; + satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + nqis=_enc->state.nqis; + lambda=_enc->lambda; + for(qii=0;qiistate.qis[qii],0,0,satd) + +(qs[0][qii].bits-_qs->bits<>2][_mbi&3][bi]; + frag_offs=frag_buf_offs[fragi]; + satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + for(qii=0;qiistate.qis[qii],0,0,satd); + best_ssd=ssd[bi-1][0]+cur_ssd; + best_rate=rate[bi-1][0]+cur_rate + +(qt[0].bits-qs[bi-1][0].bits<state.frags; + for(bi=3;;){ + fragi=sb_maps[_mbi>>2][_mbi&3][bi]; + frags[fragi].qii=best_qii; + if(bi--<=0)break; + best_qii=prev[bi][best_qii]; + } + return best_cost; +} + +/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/ +static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc, + const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){ + const unsigned char *src; + oc_fragment *frags; + ptrdiff_t frag_offs; + oc_qii_state qt[3]; + unsigned cost[3]; + unsigned satd; + unsigned best_cost; + int best_qii; + int qii; + int lambda; + int ystride; + int nqis; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ystride=_enc->state.ref_ystride[_pli]; + frag_offs=_enc->state.frag_buf_offs[_fragi]; + satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + nqis=_enc->state.nqis; + lambda=_enc->lambda; + best_qii=0; + for(qii=0;qiistate.qis[qii],_pli,0,satd) + +(qt[qii].bits-_qs->bits<state.frags; + frags[_fragi].qii=best_qii; + return best_cost; +} + +static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc, + oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){ + const oc_sb_map *sb_maps; + oc_sb_flags *sb_flags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + int sbi; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + sb_flags=_enc->state.sb_flags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + for(sbi=_sbi_start;sbi<_sbi_end;sbi++){ + /*Worst case token stack usage for 1 fragment.*/ + oc_token_checkpoint stack[64]; + int quadi; + int bi; + for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + fragi=sb_maps[sbi][quadi][bi]; + if(fragi>=0){ + oc_token_checkpoint *stackptr; + oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi); + stackptr=stack; + oc_enc_block_transform_quantize(_enc, + _pipe,_pli,fragi,0,NULL,&stackptr); + coded_fragis[ncoded_fragis++]=fragi; + } + } + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; +} + +/*Analysis stage for an INTRA frame.*/ +void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){ + oc_enc_pipeline_state pipe; + const unsigned char *map_idxs; + int nmap_idxs; + oc_sb_flags *sb_flags; + signed char *mb_modes; + const oc_mb_map *mb_maps; + oc_mb_enc_info *embs; + oc_fragment *frags; + unsigned stripe_sby; + unsigned mcu_nvsbs; + int notstart; + int notdone; + int refi; + int pli; + _enc->state.frame_type=OC_INTRA_FRAME; + oc_enc_tokenize_start(_enc); + oc_enc_pipeline_init(_enc,&pipe); + /*Choose MVs and MB modes and quantize and code luma. + Must be done in Hilbert order.*/ + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + _enc->state.ncoded_fragis[0]=0; + _enc->state.ncoded_fragis[1]=0; + _enc->state.ncoded_fragis[2]=0; + sb_flags=_enc->state.sb_flags; + mb_modes=_enc->state.mb_modes; + mb_maps=(const oc_mb_map *)_enc->state.mb_maps; + embs=_enc->mb_info; + frags=_enc->state.frags; + notstart=0; + notdone=1; + mcu_nvsbs=_enc->mcu_nvsbs; + for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){ + unsigned sbi; + unsigned sbi_end; + notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby); + sbi_end=pipe.sbi_end[0]; + for(sbi=pipe.sbi0[0];sbistate.curframe_num>0)oc_mcenc_search(_enc,mbi); + oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi); + mb_modes[mbi]=OC_MODE_INTRA; + oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0); + /*Propagate final MB mode and MVs to the chroma blocks.*/ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_maps[mbi][pli][bi]; + frags[fragi].mb_mode=OC_MODE_INTRA; + } + } + } + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone); + /*Code chroma planes.*/ + for(pli=1;pli<3;pli++){ + oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe, + pli,pipe.sbi0[pli],pipe.sbi_end[pli]); + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli); + _enc->state.ntotal_coded_fragis=_enc->state.nfrags; +} + + + +/*Cost information about a MB mode.*/ +struct oc_mode_choice{ + unsigned cost; + unsigned ssd; + unsigned rate; + unsigned overhead; + unsigned char qii[12]; +}; + + + +static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){ + _modec->cost=OC_MODE_RD_COST(_modec->ssd, + _modec->rate+_modec->overhead,_lambda); +} + +/*A set of skip SSD's to use to disable early skipping.*/ +static const unsigned OC_NOSKIP[12]={ + UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX, + UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX, + UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX +}; + +/*The estimated number of bits used by a coded chroma block to specify the AC + quantizer. + TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression); + measurements suggest this is in the right ballpark, but it varies somewhat + with lambda.*/ +#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1) + +static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc, + oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){ + oc_fr_state fr; + oc_qii_state qs; + unsigned ssd; + unsigned rate; + int overhead; + unsigned satd; + unsigned best_ssd; + unsigned best_rate; + int best_overhead; + int best_fri; + int best_qii; + unsigned cur_cost; + unsigned cur_ssd; + unsigned cur_rate; + int cur_overhead; + int lambda; + int nqis; + int nskipped; + int bi; + int qii; + lambda=_enc->lambda; + nqis=_enc->state.nqis; + /*We could do a trellis optimization here, but we don't make final skip + decisions until after transform+quantization, so the result wouldn't be + optimal anyway. + Instead we just use a greedy approach; for most SATD values, the + differences between the qiis are large enough to drown out the cost to + code the flags, anyway.*/ + *&fr=*_fr; + *&qs=*_qs; + ssd=rate=overhead=nskipped=0; + for(bi=0;bi<4;bi++){ + oc_fr_state ft[2]; + oc_qii_state qt[3]; + unsigned best_cost; + satd=_frag_satd[bi]; + *(ft+0)=*&fr; + oc_fr_code_block(ft+0); + oc_qii_state_advance(qt+0,&qs,0); + best_overhead=(ft[0].bits-fr.bits<state.qis[0],0,_qti,satd) + +(qt[0].bits-qs.bits<state.qis[qii],0,_qti,satd) + +(qt[qii].bits-qs.bits<qii[bi]=best_qii; + } + _modec->ssd=ssd; + _modec->rate=rate; + _modec->overhead=OC_MAXI(overhead,0); +} + +static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc, + oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){ + unsigned ssd; + unsigned rate; + unsigned satd; + unsigned best_ssd; + unsigned best_rate; + int best_qii; + unsigned cur_cost; + unsigned cur_ssd; + unsigned cur_rate; + int lambda; + int nblocks; + int nqis; + int pli; + int bi; + int qii; + lambda=_enc->lambda; + nqis=_enc->state.nqis; + ssd=_modec->ssd; + rate=_modec->rate; + /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded + order, we assume a constant overhead for coded block and qii flags.*/ + nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + nblocks=(nblocks-4>>1)+4; + bi=4; + for(pli=1;pli<3;pli++){ + for(;bistate.qis[0],pli,_qti,satd) + +OC_CHROMA_QII_RATE; + best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda); + best_qii=0; + for(qii=1;qiistate.qis[qii],0,_qti,satd) + +OC_CHROMA_QII_RATE; + cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda); + if(cur_costqii[bi]=best_qii; + } + nblocks=(nblocks-4<<1)+4; + } + _modec->ssd=ssd; + _modec->rate=rate; +} + +static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe, + unsigned _mbi,unsigned _ssd[12]){ + OC_ALIGN16(ogg_int16_t buffer[64]); + const unsigned char *src; + const unsigned char *ref; + int ystride; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *sb_map; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + ogg_int64_t mask; + unsigned uncoded_ssd; + int uncoded_dc; + unsigned dc_dequant; + int dc_flag; + int mapii; + int mapi; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + int borderi; + int pi; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; + ystride=_enc->state.ref_ystride[0]; + frags=_enc->state.frags; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; + dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0]; + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride); + borderi=frags[fragi].borderi; + uncoded_ssd=uncoded_dc=0; + if(borderi<0){ + for(pi=0;pi<64;pi++){ + uncoded_ssd+=buffer[pi]*buffer[pi]; + uncoded_dc+=buffer[pi]; + } + } + else{ + ogg_int64_t mask; + mask=_enc->state.borders[borderi].mask; + for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ + uncoded_ssd+=buffer[pi]*buffer[pi]; + uncoded_dc+=buffer[pi]; + } + } + /*Scale to match DCT domain.*/ + uncoded_ssd<<=4; + /*We actually only want the AC contribution to the SSD.*/ + uncoded_ssd-=uncoded_dc*uncoded_dc>>2; + /*DC is a special case; if there's more than a full-quantizer improvement + in the effective DC component, always force-code the block.*/ + dc_flag=abs(uncoded_dc)>dc_dequant<<1; + uncoded_ssd|=-dc_flag; + _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd; + } + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=(map_nidxs-4>>1)+4; + mapii=4; + for(pli=1;pli<3;pli++){ + ystride=_enc->state.ref_ystride[pli]; + dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0]; + for(;mapiistate.borders[borderi].mask; + for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){ + uncoded_ssd+=buffer[pi]*buffer[pi]; + uncoded_dc+=buffer[pi]; + } + } + /*Scale to match DCT domain.*/ + uncoded_ssd<<=4; + /*We actually only want the AC contribution to the SSD.*/ + uncoded_ssd-=uncoded_dc*uncoded_dc>>2; + /*DC is a special case; if there's more than a full-quantizer improvement + in the effective DC component, always force-code the block.*/ + dc_flag=abs(uncoded_dc)>dc_dequant<<1; + uncoded_ssd|=-dc_flag; + _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd; + } + map_nidxs=(map_nidxs-4<<1)+4; + } +} + +static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi, + unsigned _frag_satd[12]){ + const unsigned char *src; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *sb_map; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + int mapii; + int mapi; + int ystride; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ystride=_enc->state.ref_ystride[0]; + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + } + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ + ystride=_enc->state.ref_ystride[1]; + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + frag_offs=frag_buf_offs[fragi]; + _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride); + } +} + +static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){ + oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0); + oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0); + _modec->overhead+= + oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<lambda); +} + +static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,int _mb_mode,const signed char *_mv, + const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){ + unsigned frag_satd[12]; + const unsigned char *src; + const unsigned char *ref; + int ystride; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *sb_map; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + int mapii; + int mapi; + int mv_offs[2]; + int dx; + int dy; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[ + _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]]; + ystride=_enc->state.ref_ystride[0]; + frag_buf_offs=_enc->state.frag_buf_offs; + sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3]; + dx=_mv[0]; + dy=_mv[1]; + _modec->rate=_modec->ssd=0; + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){ + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + } + else{ + for(bi=0;bi<4;bi++){ + fragi=sb_map[bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + } + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ + ystride=_enc->state.ref_ystride[1]; + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + } + else{ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + frag_offs=frag_buf_offs[fragi]; + frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + } + oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); + oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); + _modec->overhead+= + oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<lambda); +} + +static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _skip_ssd[12]){ + static const oc_mv OC_MV_ZERO; + oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd); +} + +static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,int _mb_mode,const signed char *_mv, + const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){ + int bits0; + oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd); + bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31]; + _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12) + -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda); + return bits0; +} + +/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/ +static const unsigned char OC_MB_PHASE[4][4]={ + {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0} +}; + +static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec, + unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs, + const unsigned _skip_ssd[12]){ + unsigned frag_satd[12]; + oc_mv lbmvs[4]; + oc_mv cbmvs[4]; + const unsigned char *src; + const unsigned char *ref; + int ystride; + const ptrdiff_t *frag_buf_offs; + oc_mv *frag_mvs; + const oc_mb_map_plane *mb_map; + const unsigned char *map_idxs; + int map_nidxs; + int nqis; + int mapii; + int mapi; + int mv_offs[2]; + int dx; + int dy; + int pli; + int bi; + ptrdiff_t fragi; + ptrdiff_t frag_offs; + int bits0; + int bits1; + unsigned satd; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; + ystride=_enc->state.ref_ystride[0]; + frag_buf_offs=_enc->state.frag_buf_offs; + frag_mvs=_enc->state.frag_mvs; + mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi]; + _modec->rate=_modec->ssd=0; + for(bi=0;bi<4;bi++){ + fragi=mb_map[0][bi]; + dx=_mv[bi][0]; + dy=_mv[bi][1]; + /*Save the block MVs as the current ones while we're here; we'll replace + them if we don't ultimately choose 4MV mode.*/ + frag_mvs[fragi][0]=(signed char)dx; + frag_mvs[fragi][1]=(signed char)dy; + frag_offs=frag_buf_offs[fragi]; + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){ + satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + else{ + satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd; + } + oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd, + _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1); + /*Figure out which blocks are being skipped and give them (0,0) MVs.*/ + bits0=0; + bits1=0; + nqis=_enc->state.nqis; + for(bi=0;bi<4;bi++){ + if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){ + memset(lbmvs+bi,0,sizeof(*lbmvs)); + } + else{ + memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs)); + bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31]; + bits1+=12; + } + } + (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs, + (const oc_mv *)lbmvs); + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/ + ystride=_enc->state.ref_ystride[1]; + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_map[pli][bi]; + dx=cbmvs[bi][0]; + dy=cbmvs[bi][1]; + frag_offs=frag_buf_offs[fragi]; + /*TODO: We could save half these calls by re-using the results for the Cb + and Cr planes; is it worth it?*/ + if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){ + satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX); + } + else{ + satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs, + ref+frag_offs+mv_offs[0],ystride,UINT_MAX); + } + frag_satd[mapii]=satd; + } + oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1); + _modec->overhead+= + oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR) + +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1) + -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<lambda); +} + +int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){ + oc_set_chroma_mvs_func set_chroma_mvs; + oc_enc_pipeline_state pipe; + oc_qii_state intra_luma_qs; + oc_mv last_mv; + oc_mv prior_mv; + ogg_int64_t interbits; + ogg_int64_t intrabits; + const unsigned char *map_idxs; + int nmap_idxs; + unsigned *coded_mbis; + unsigned *uncoded_mbis; + size_t ncoded_mbis; + size_t nuncoded_mbis; + oc_sb_flags *sb_flags; + signed char *mb_modes; + const oc_sb_map *sb_maps; + const oc_mb_map *mb_maps; + oc_mb_enc_info *embs; + oc_fragment *frags; + oc_mv *frag_mvs; + int qi; + unsigned stripe_sby; + unsigned mcu_nvsbs; + int notstart; + int notdone; + int vdec; + unsigned sbi; + unsigned sbi_end; + int refi; + int pli; + set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt]; + _enc->state.frame_type=OC_INTER_FRAME; + oc_mode_scheme_chooser_reset(&_enc->chooser); + oc_enc_tokenize_start(_enc); + oc_enc_pipeline_init(_enc,&pipe); + if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs); + _enc->mv_bits[0]=_enc->mv_bits[1]=0; + interbits=intrabits=0; + last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0; + /*Choose MVs and MB modes and quantize and code luma. + Must be done in Hilbert order.*/ + map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt]; + nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt]; + qi=_enc->state.qis[0]; + coded_mbis=_enc->coded_mbis; + uncoded_mbis=coded_mbis+_enc->state.nmbs; + ncoded_mbis=0; + nuncoded_mbis=0; + _enc->state.ncoded_fragis[0]=0; + _enc->state.ncoded_fragis[1]=0; + _enc->state.ncoded_fragis[2]=0; + sb_flags=_enc->state.sb_flags; + mb_modes=_enc->state.mb_modes; + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + mb_maps=(const oc_mb_map *)_enc->state.mb_maps; + embs=_enc->mb_info; + frags=_enc->state.frags; + frag_mvs=_enc->state.frag_mvs; + vdec=!(_enc->state.info.pixel_fmt&2); + notstart=0; + notdone=1; + mcu_nvsbs=_enc->mcu_nvsbs; + for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){ + notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby); + sbi_end=pipe.sbi_end[0]; + for(sbi=pipe.sbi0[0];sbisp_levelsp_levellambda*3; + if(modes[OC_MODE_INTER_MV_FOUR].cost>2][mbi&3][bi]; + frags[fragi].qii=modes[mb_mode].qii[bi]; + } + if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi, + modes[mb_mode].overhead>>OC_BIT_SCALE)>0){ + int orig_mb_mode; + orig_mb_mode=mb_mode; + mb_mode=mb_modes[mbi]; + switch(mb_mode){ + case OC_MODE_INTER_MV:{ + memcpy(prior_mv,last_mv,sizeof(prior_mv)); + /*If we're backing out from 4MV, find the MV we're actually + using.*/ + if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){ + for(bi=0;;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(frags[fragi].coded){ + memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv)); + dx=frag_mvs[fragi][0]; + dy=frag_mvs[fragi][1]; + break; + } + } + mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31]; + } + /*Otherwise we used the original analysis MV.*/ + else{ + memcpy(last_mv, + embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv)); + } + _enc->mv_bits[0]+=mb_mv_bits_0; + _enc->mv_bits[1]+=12; + }break; + case OC_MODE_INTER_MV_LAST2:{ + oc_mv tmp_mv; + memcpy(tmp_mv,prior_mv,sizeof(tmp_mv)); + memcpy(prior_mv,last_mv,sizeof(prior_mv)); + memcpy(last_mv,tmp_mv,sizeof(last_mv)); + }break; + case OC_MODE_GOLDEN_MV:{ + _enc->mv_bits[0]+=mb_gmv_bits_0; + _enc->mv_bits[1]+=12; + }break; + case OC_MODE_INTER_MV_FOUR:{ + oc_mv lbmvs[4]; + oc_mv cbmvs[4]; + memcpy(prior_mv,last_mv,sizeof(prior_mv)); + for(bi=0;bi<4;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(frags[fragi].coded){ + memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv)); + memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi])); + _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31] + +OC_MV_BITS[0][frag_mvs[fragi][1]+31]; + _enc->mv_bits[1]+=12; + } + /*Replace the block MVs for not-coded blocks with (0,0).*/ + else memset(lbmvs[bi],0,sizeof(lbmvs[bi])); + } + (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs); + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_maps[mbi][pli][bi]; + frags[fragi].mb_mode=mb_mode; + frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii]; + memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi])); + } + }break; + } + coded_mbis[ncoded_mbis++]=mbi; + oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode); + interbits+=modes[mb_mode].rate+modes[mb_mode].overhead; + } + else{ + *(uncoded_mbis-++nuncoded_mbis)=mbi; + mb_mode=OC_MODE_INTER_NOMV; + dx=dy=0; + } + /*Propagate final MB mode and MVs to the chroma blocks. + This has already been done for 4MV mode, since it requires individual + block motion vectors.*/ + if(mb_mode!=OC_MODE_INTER_MV_FOUR){ + for(mapii=4;mapii>2; + bi=mapi&3; + fragi=mb_maps[mbi][pli][bi]; + frags[fragi].mb_mode=mb_mode; + /*If we switched from 4MV mode to INTER_MV mode, then the qii + values won't have been chosen with the right MV, but it's + probaby not worth re-estimating them.*/ + frags[fragi].qii=modes[mb_mode].qii[mapii]; + frag_mvs[fragi][0]=(signed char)dx; + frag_mvs[fragi][1]=(signed char)dy; + } + } + } + oc_fr_state_flush_sb(pipe.fr+0); + sb_flags[sbi].coded_fully=pipe.fr[0].sb_full; + sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial; + } + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone); + /*Code chroma planes.*/ + for(pli=1;pli<3;pli++){ + oc_enc_sb_transform_quantize_chroma(_enc,&pipe, + pli,pipe.sbi0[pli],pipe.sbi_end[pli]); + oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + refi=_enc->state.ref_frame_idx[OC_FRAME_SELF]; + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli); + /*Finish adding flagging overhead costs to inter bit counts to determine if + we should have coded a key frame instead.*/ + if(_allow_keyframe){ + if(interbits>intrabits)return 1; + /*Technically the chroma plane counts are over-estimations, because they + don't account for continuing runs from the luma planes, but the + inaccuracy is small.*/ + for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<mv_bits[0],_enc->mv_bits[1])<chooser.scheme_bits[_enc->chooser.scheme_list[0]]<intrabits)return 1; + } + _enc->ncoded_mbis=ncoded_mbis; + /*Compact the coded fragment list.*/ + { + ptrdiff_t ncoded_fragis; + ncoded_fragis=_enc->state.ncoded_fragis[0]; + for(pli=1;pli<3;pli++){ + memmove(_enc->state.coded_fragis+ncoded_fragis, + _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset, + _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis)); + ncoded_fragis+=_enc->state.ncoded_fragis[pli]; + } + _enc->state.ntotal_coded_fragis=ncoded_fragis; + } + return 0; +} + +#if defined(OC_COLLECT_METRICS) +# include +# include + +/*TODO: It may be helpful (for block-level quantizers especially) to separate + out the contributions from AC and DC into separate tables.*/ + +# define OC_ZWEIGHT (0.25) + +static void oc_mode_metrics_add(oc_mode_metrics *_metrics, + double _w,int _satd,int _rate,double _rmse){ + double rate; + /*Accumulate statistics without the scaling; this lets us change the scale + factor yet still use old data.*/ + rate=ldexp(_rate,-OC_BIT_SCALE); + if(_metrics->fragw>0){ + double dsatd; + double drate; + double drmse; + double w; + dsatd=_satd-_metrics->satd/_metrics->fragw; + drate=rate-_metrics->rate/_metrics->fragw; + drmse=_rmse-_metrics->rmse/_metrics->fragw; + w=_metrics->fragw*_w/(_metrics->fragw+_w); + _metrics->satd2+=dsatd*dsatd*w; + _metrics->satdrate+=dsatd*drate*w; + _metrics->rate2+=drate*drate*w; + _metrics->satdrmse+=dsatd*drmse*w; + _metrics->rmse2+=drmse*drmse*w; + } + _metrics->fragw+=_w; + _metrics->satd+=_satd*_w; + _metrics->rate+=rate*_w; + _metrics->rmse+=_rmse*_w; +} + +static void oc_mode_metrics_merge(oc_mode_metrics *_dst, + const oc_mode_metrics *_src,int _n){ + int i; + /*Find a non-empty set of metrics.*/ + for(i=0;i<_n&&_src[i].fragw<=0;i++); + if(i>=_n){ + memset(_dst,0,sizeof(*_dst)); + return; + } + memcpy(_dst,_src+i,sizeof(*_dst)); + /*And iterate over the remaining non-empty sets of metrics.*/ + for(i++;i<_n;i++)if(_src[i].fragw>0){ + double wa; + double wb; + double dsatd; + double drate; + double drmse; + double w; + wa=_dst->fragw; + wb=_src[i].fragw; + dsatd=_src[i].satd/wb-_dst->satd/wa; + drate=_src[i].rate/wb-_dst->rate/wa; + drmse=_src[i].rmse/wb-_dst->rmse/wa; + w=wa*wb/(wa+wb); + _dst->fragw+=_src[i].fragw; + _dst->satd+=_src[i].satd; + _dst->rate+=_src[i].rate; + _dst->rmse+=_src[i].rmse; + _dst->satd2+=_src[i].satd2+dsatd*dsatd*w; + _dst->satdrate+=_src[i].satdrate+dsatd*drate*w; + _dst->rate2+=_src[i].rate2+drate*drate*w; + _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w; + _dst->rmse2+=_src[i].rmse2+drmse*drmse*w; + } +} + +/*Compile collected SATD/rate/RMSE metrics into a form that's immediately + useful for mode decision.*/ +static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){ + int pli; + int qti; + oc_restore_fpu(&_enc->state); + /*Convert raw collected data into cleaned up sample points.*/ + for(pli=0;pli<3;pli++){ + for(qti=0;qti<2;qti++){ + double fragw; + int bin0; + int bin1; + int bin; + fragw=0; + bin0=bin1=0; + for(bin=0;bin=OC_ZWEIGHT){ + fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw; + } + /*Merge statistics and fit lines.*/ + oc_mode_metrics_merge(&metrics, + OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0); + if(metrics.fragw>0&&metrics.satd2>0){ + double a; + double b; + double msatd; + double mrate; + double mrmse; + double rate; + double rmse; + msatd=metrics.satd/metrics.fragw; + mrate=metrics.rate/metrics.fragw; + mrmse=metrics.rmse/metrics.fragw; + /*Compute the points on these lines corresponding to the actual bin + value.*/ + b=metrics.satdrate/metrics.satd2; + a=mrate-b*msatd; + rate=ldexp(a+b*(bin<>1); + return -_extra_bits; +} + +/*Handles the pure zero run tokens.*/ +static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){ + return _extra_bits+1; +} + +/*Handles a normal coefficient value token.*/ +static ptrdiff_t oc_token_skip_val(void){ + return 1; +} + +/*Handles a category 1A zero run/coefficient value combo token.*/ +static ptrdiff_t oc_token_skip_run_cat1a(int _token){ + return _token-OC_DCT_RUN_CAT1A+2; +} + +/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/ +static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){ + int run_cati; + int ncoeffs_mask; + int ncoeffs_adjust; + run_cati=_token-OC_DCT_RUN_CAT1B; + ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati); + ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati); + return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust; +} + +/*A jump table for computing the number of coefficients or blocks to skip for + a given token value. + This reduces all the conditional branches, etc., needed to parse these token + values down to one indirect jump.*/ +static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={ + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob, + oc_token_skip_eob6, + oc_token_skip_zrl, + oc_token_skip_zrl, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_val, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + (oc_token_skip_func)oc_token_skip_run_cat1a, + oc_token_skip_run, + oc_token_skip_run, + oc_token_skip_run, + oc_token_skip_run +}; + +/*Determines the number of blocks or coefficients to be skipped for a given + token value. + _token: The token value to skip. + _extra_bits: The extra bits attached to this token. + Return: A positive value indicates that number of coefficients are to be + skipped in the current block. + Otherwise, the negative of the return value indicates that number of + blocks are to be ended. + 0 will never be returned, so that at least one coefficient in one + block will always be decoded for every token.*/ +static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){ + return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits); +} + + + +void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){ + static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ + 0,16,16,16,16,16,32,32, + 32,32,32,32,32,32,32,48, + 48,48,48,48,48,48,48,48, + 48,48,48,48,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 + }; + const oc_fragment *frags; + const unsigned *frag_satd; + const unsigned *frag_ssd; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + double fragw; + int qti; + int qii; + int qi; + int pli; + int zzi; + int token; + int eb; + oc_restore_fpu(&_enc->state); + /*Load any existing mode metrics if we haven't already.*/ + if(!oc_has_mode_metrics){ + FILE *fmetrics; + memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS)); + fmetrics=fopen("modedec.stats","rb"); + if(fmetrics!=NULL){ + fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics); + fclose(fmetrics); + } + for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi); + oc_has_mode_metrics=1; + } + qti=_enc->state.frame_type; + frags=_enc->state.frags; + frag_satd=_enc->frag_satd; + frag_ssd=_enc->frag_ssd; + coded_fragis=_enc->state.coded_fragis; + ncoded_fragis=fragii=0; + /*Weight the fragments by the inverse frame size; this prevents HD content + from dominating the statistics.*/ + fragw=1.0/_enc->state.nfrags; + for(pli=0;pli<3;pli++){ + ptrdiff_t ti[64]; + int eob_token[64]; + int eob_run[64]; + /*Set up token indices and eob run counts. + We don't bother trying to figure out the real cost of the runs that span + coefficients; instead we use the costs that were available when R-D + token optimization was done.*/ + for(zzi=0;zzi<64;zzi++){ + ti[zzi]=_enc->dct_token_offs[pli][zzi]; + if(ti[zzi]>0){ + token=_enc->dct_tokens[pli][zzi][0]; + eb=_enc->extra_bits[pli][zzi][0]; + eob_token[zzi]=token; + eob_run[zzi]=-oc_dct_token_skip(token,eb); + } + else{ + eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX; + eob_run[zzi]=0; + } + } + /*Scan the list of coded fragments for this plane.*/ + ncoded_fragis+=_enc->state.ncoded_fragis[pli]; + for(;fragii0){ + /*We've reached the end of the block.*/ + eob_run[zzi]--; + break; + } + huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1] + +OC_ZZI_HUFF_OFFSET[zzi]; + if(eob_token[zzi]huff_codes[huffi][eob_token[zzi]].nbits + +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]]; + eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX; + } + token=_enc->dct_tokens[pli][zzi][ti[zzi]]; + eb=_enc->extra_bits[pli][zzi][ti[zzi]]; + ti[zzi]++; + skip=oc_dct_token_skip(token,eb); + if(skip<0){ + eob_token[zzi]=token; + eob_run[zzi]=-skip; + } + else{ + /*A regular DCT value token; accumulate the bits for it.*/ + frag_bits+=_enc->huff_codes[huffi][token].nbits + +OC_DCT_TOKEN_EXTRA_BITS[token]; + zzi+=skip; + } + } + mb_mode=frags[fragi].mb_mode; + qi=_enc->state.qis[frags[fragi].qii]; + satd=frag_satd[fragi]<<(pli+1&2); + bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1); + oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin, + fragw,satd,frag_bits<state.nqis;qii++){ + oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]); + } +} + +void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){ + FILE *fmetrics; + int qi; + /*Generate sample points for complete list of QI values.*/ + for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi); + fmetrics=fopen("modedec.stats","wb"); + if(fmetrics!=NULL){ + fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics); + fclose(fmetrics); + } + fprintf(stdout, + "/*File generated by libtheora with OC_COLLECT_METRICS" + " defined at compile time.*/\n" + "#if !defined(_modedec_H)\n" + "# define _modedec_H (1)\n" + "\n" + "\n" + "\n" + "# if defined(OC_COLLECT_METRICS)\n" + "typedef struct oc_mode_metrics oc_mode_metrics;\n" + "# endif\n" + "typedef struct oc_mode_rd oc_mode_rd;\n" + "\n" + "\n" + "\n" + "/*The number of extra bits of precision at which to store rate" + " metrics.*/\n" + "# define OC_BIT_SCALE (%i)\n" + "/*The number of extra bits of precision at which to store RMSE metrics.\n" + " This must be at least half OC_BIT_SCALE (rounded up).*/\n" + "# define OC_RMSE_SCALE (%i)\n" + "/*The number of bins to partition statistics into.*/\n" + "# define OC_SAD_BINS (%i)\n" + "/*The number of bits of precision to drop" + " from SAD scores to assign them to a\n" + " bin.*/\n" + "# define OC_SAD_SHIFT (%i)\n" + "\n" + "\n" + "\n" + "# if defined(OC_COLLECT_METRICS)\n" + "struct oc_mode_metrics{\n" + " double fragw;\n" + " double satd;\n" + " double rate;\n" + " double rmse;\n" + " double satd2;\n" + " double satdrate;\n" + " double rate2;\n" + " double satdrmse;\n" + " double rmse2;\n" + "};\n" + "\n" + "\n" + "int oc_has_mode_metrics;\n" + "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n" + "# endif\n" + "\n" + "\n" + "\n" + "struct oc_mode_rd{\n" + " ogg_int16_t rate;\n" + " ogg_int16_t rmse;\n" + "};\n" + "\n" + "\n" + "# if !defined(OC_COLLECT_METRICS)\n" + "static const\n" + "# endif\n" + "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n", + OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT); + for(qi=0;qi<64;qi++){ + int pli; + fprintf(stdout," {\n"); + for(pli=0;pli<3;pli++){ + int qti; + fprintf(stdout," {\n"); + for(qti=0;qti<2;qti++){ + int bin; + static const char *pl_names[3]={"Y'","Cb","Cr"}; + static const char *qti_names[2]={"INTRA","INTER"}; + fprintf(stdout," /*%s qi=%i %s*/\n", + pl_names[pli],qi,qti_names[qti]); + fprintf(stdout," {\n"); + fprintf(stdout," "); + for(bin=0;bin +#include +#include +#include "apiwrapper.h" + + + +const char *theora_version_string(void){ + return th_version_string(); +} + +ogg_uint32_t theora_version_number(void){ + return th_version_number(); +} + +void theora_info_init(theora_info *_ci){ + memset(_ci,0,sizeof(*_ci)); +} + +void theora_info_clear(theora_info *_ci){ + th_api_wrapper *api; + api=(th_api_wrapper *)_ci->codec_setup; + memset(_ci,0,sizeof(*_ci)); + if(api!=NULL){ + if(api->clear!=NULL)(*api->clear)(api); + _ogg_free(api); + } +} + +void theora_clear(theora_state *_th){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th); + } + if(_th->internal_encode!=NULL){ + (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th); + } + if(_th->i!=NULL)theora_info_clear(_th->i); + memset(_th,0,sizeof(*_th)); +} + +int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th, + _req,_buf,_buf_sz); + } + else if(_th->internal_encode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th, + _req,_buf,_buf_sz); + } + else return TH_EINVAL; +} + +ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)( + _th,_gp); + } + else if(_th->internal_encode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)( + _th,_gp); + } + else return -1; +} + +double theora_granule_time(theora_state *_th, ogg_int64_t _gp){ + /*Provide compatibility with mixed encoder and decoder shared lib versions.*/ + if(_th->internal_decode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)( + _th,_gp); + } + else if(_th->internal_encode!=NULL){ + return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)( + _th,_gp); + } + else return -1; +} + +void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){ + _info->version_major=_ci->version_major; + _info->version_minor=_ci->version_minor; + _info->version_subminor=_ci->version_subminor; + _info->frame_width=_ci->width; + _info->frame_height=_ci->height; + _info->pic_width=_ci->frame_width; + _info->pic_height=_ci->frame_height; + _info->pic_x=_ci->offset_x; + _info->pic_y=_ci->offset_y; + _info->fps_numerator=_ci->fps_numerator; + _info->fps_denominator=_ci->fps_denominator; + _info->aspect_numerator=_ci->aspect_numerator; + _info->aspect_denominator=_ci->aspect_denominator; + switch(_ci->colorspace){ + case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break; + case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break; + default:_info->colorspace=TH_CS_UNSPECIFIED;break; + } + switch(_ci->pixelformat){ + case OC_PF_420:_info->pixel_fmt=TH_PF_420;break; + case OC_PF_422:_info->pixel_fmt=TH_PF_422;break; + case OC_PF_444:_info->pixel_fmt=TH_PF_444;break; + default:_info->pixel_fmt=TH_PF_RSVD; + } + _info->target_bitrate=_ci->target_bitrate; + _info->quality=_ci->quality; + _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0? + OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0; +} + +int theora_packet_isheader(ogg_packet *_op){ + return th_packet_isheader(_op); +} + +int theora_packet_iskeyframe(ogg_packet *_op){ + return th_packet_iskeyframe(_op); +} + +int theora_granule_shift(theora_info *_ci){ + /*This breaks when keyframe_frequency_force is not positive or is larger than + 2**31 (if your int is more than 32 bits), but that's what the original + function does.*/ + return oc_ilog(_ci->keyframe_frequency_force-1); +} + +void theora_comment_init(theora_comment *_tc){ + th_comment_init((th_comment *)_tc); +} + +char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){ + return th_comment_query((th_comment *)_tc,_tag,_count); +} + +int theora_comment_query_count(theora_comment *_tc,char *_tag){ + return th_comment_query_count((th_comment *)_tc,_tag); +} + +void theora_comment_clear(theora_comment *_tc){ + th_comment_clear((th_comment *)_tc); +} + +void theora_comment_add(theora_comment *_tc,char *_comment){ + th_comment_add((th_comment *)_tc,_comment); +} + +void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){ + th_comment_add_tag((th_comment *)_tc,_tag,_value); +} diff --git a/thirdparty/libtheora/apiwrapper.h b/thirdparty/libtheora/apiwrapper.h new file mode 100644 index 0000000000..93454d7bda --- /dev/null +++ b/thirdparty/libtheora/apiwrapper.h @@ -0,0 +1,54 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $ + + ********************************************************************/ + +#if !defined(_apiwrapper_H) +# define _apiwrapper_H (1) +# include +# include +# include "theora/theoradec.h" +# include "theora/theoraenc.h" +# include "internal.h" + +typedef struct th_api_wrapper th_api_wrapper; +typedef struct th_api_info th_api_info; + +/*Provide an entry point for the codec setup to clear itself in case we ever + want to break pieces off into a common base library shared by encoder and + decoder. + In addition, this makes several other pieces of the API wrapper cleaner.*/ +typedef void (*oc_setup_clear_func)(void *_ts); + +/*Generally only one of these pointers will be non-NULL in any given instance. + Technically we do not even really need this struct, since we should be able + to figure out which one from "context", but doing it this way makes sure we + don't flub it up.*/ +struct th_api_wrapper{ + oc_setup_clear_func clear; + th_setup_info *setup; + th_dec_ctx *decode; + th_enc_ctx *encode; +}; + +struct th_api_info{ + th_api_wrapper api; + theora_info info; +}; + + +void oc_theora_info2th_info(th_info *_info,const theora_info *_ci); + +#endif diff --git a/thirdparty/libtheora/bitpack.c b/thirdparty/libtheora/bitpack.c new file mode 100644 index 0000000000..8195003bad --- /dev/null +++ b/thirdparty/libtheora/bitpack.c @@ -0,0 +1,111 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitpack.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "bitpack.h" + +/*We're 'MSb' endian; if we write a word but read individual bits, + then we'll read the MSb first.*/ + +void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){ + memset(_b,0,sizeof(*_b)); + _b->ptr=_buf; + _b->stop=_buf+_bytes; +} + +static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ + const unsigned char *ptr; + const unsigned char *stop; + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + ptr=_b->ptr; + stop=_b->stop; + while(available<=OC_PB_WINDOW_SIZE-8&&ptrptr=ptr; + if(_bits>available){ + if(ptr>=stop){ + _b->eof=1; + available=OC_LOTS_OF_BITS; + } + else window|=*ptr>>(available&7); + } + _b->bits=available; + return window; +} + +int oc_pack_look1(oc_pack_buf *_b){ + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + if(available<1)_b->window=window=oc_pack_refill(_b,1); + return window>>OC_PB_WINDOW_SIZE-1; +} + +void oc_pack_adv1(oc_pack_buf *_b){ + _b->window<<=1; + _b->bits--; +} + +/*Here we assume that 0<=_bits&&_bits<=32.*/ +long oc_pack_read(oc_pack_buf *_b,int _bits){ + oc_pb_window window; + int available; + long result; + window=_b->window; + available=_b->bits; + if(_bits==0)return 0; + if(available<_bits){ + window=oc_pack_refill(_b,_bits); + available=_b->bits; + } + result=window>>OC_PB_WINDOW_SIZE-_bits; + available-=_bits; + window<<=1; + window<<=_bits-1; + _b->bits=available; + _b->window=window; + return result; +} + +int oc_pack_read1(oc_pack_buf *_b){ + oc_pb_window window; + int available; + int result; + window=_b->window; + available=_b->bits; + if(available<1){ + window=oc_pack_refill(_b,1); + available=_b->bits; + } + result=window>>OC_PB_WINDOW_SIZE-1; + available--; + window<<=1; + _b->bits=available; + _b->window=window; + return result; +} + +long oc_pack_bytes_left(oc_pack_buf *_b){ + if(_b->eof)return -1; + return _b->stop-_b->ptr+(_b->bits>>3); +} diff --git a/thirdparty/libtheora/bitpack.h b/thirdparty/libtheora/bitpack.h new file mode 100644 index 0000000000..a020a292f5 --- /dev/null +++ b/thirdparty/libtheora/bitpack.h @@ -0,0 +1,59 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $ + + ********************************************************************/ +#if !defined(_bitpack_H) +# define _bitpack_H (1) +# include + + + +typedef unsigned long oc_pb_window; +typedef struct oc_pack_buf oc_pack_buf; + + + +# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT) +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +# define OC_LOTS_OF_BITS (0x40000000) + + + +struct oc_pack_buf{ + oc_pb_window window; + const unsigned char *ptr; + const unsigned char *stop; + int bits; + int eof; +}; + +void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes); +int oc_pack_look1(oc_pack_buf *_b); +void oc_pack_adv1(oc_pack_buf *_b); +/*Here we assume 0<=_bits&&_bits<=32.*/ +long oc_pack_read(oc_pack_buf *_b,int _bits); +int oc_pack_read1(oc_pack_buf *_b); +/* returns -1 for read beyond EOF, or the number of whole bytes available */ +long oc_pack_bytes_left(oc_pack_buf *_b); + +/*These two functions are implemented locally in huffdec.c*/ +/*Read in bits without advancing the bitptr. + Here we assume 0<=_bits&&_bits<=32.*/ +/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/ +/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/ + +#endif diff --git a/thirdparty/libtheora/cpu.c b/thirdparty/libtheora/cpu.c new file mode 100644 index 0000000000..a863aad7f3 --- /dev/null +++ b/thirdparty/libtheora/cpu.c @@ -0,0 +1,226 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + CPU capability detection for x86 processors. + Originally written by Rudolf Marek. + + function: + last mod: $Id: cpu.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include "cpu.h" + +#if !defined(OC_X86_ASM) +static ogg_uint32_t oc_cpu_flags_get(void){ + return 0; +} +#else +# if !defined(_MSC_VER) +# if defined(__amd64__)||defined(__x86_64__) +/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when + compiling with -fPIC.*/ +# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ + __asm__ __volatile__( \ + "cpuid\n\t" \ + :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ + :"a"(_op) \ + :"cc" \ + ) +# else +/*On x86-32, not so much.*/ +# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ + __asm__ __volatile__( \ + "xchgl %%ebx,%[ebx]\n\t" \ + "cpuid\n\t" \ + "xchgl %%ebx,%[ebx]\n\t" \ + :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ + :"a"(_op) \ + :"cc" \ + ) +# endif +# else +/*Why does MSVC need this complicated rigamarole? + At this point I honestly do not care.*/ + +/*Visual C cpuid helper function. + For VS2005 we could as well use the _cpuid builtin, but that wouldn't work + for VS2003 users, so we do it in inline assembler.*/ +static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){ + _asm{ + mov eax,[_op] + mov esi,_cpu_info + cpuid + mov [esi+0],eax + mov [esi+4],ebx + mov [esi+8],ecx + mov [esi+12],edx + } +} + +# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ + do{ \ + ogg_uint32_t cpu_info[4]; \ + oc_cpuid_helper(cpu_info,_op); \ + (_eax)=cpu_info[0]; \ + (_ebx)=cpu_info[1]; \ + (_ecx)=cpu_info[2]; \ + (_edx)=cpu_info[3]; \ + }while(0) + +static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){ + _asm{ + pushfd + pushfd + pop eax + mov ebx,eax + xor eax,200000h + push eax + popfd + pushfd + pop eax + popfd + mov ecx,_eax + mov [ecx],eax + mov ecx,_ebx + mov [ecx],ebx + } +} +# endif + +static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ + ogg_uint32_t flags; + /*If there isn't even MMX, give up.*/ + if(!(_edx&0x00800000))return 0; + flags=OC_CPU_X86_MMX; + if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; + if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2; + if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI; + if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3; + if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1; + if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2; + return flags; +} + +static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ + ogg_uint32_t flags; + /*If there isn't even MMX, give up.*/ + if(!(_edx&0x00800000))return 0; + flags=OC_CPU_X86_MMX; + if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; + if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW; + if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; + if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A; + if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5; + return flags; +} + +static ogg_uint32_t oc_cpu_flags_get(void){ + ogg_uint32_t flags; + ogg_uint32_t eax; + ogg_uint32_t ebx; + ogg_uint32_t ecx; + ogg_uint32_t edx; +# if !defined(__amd64__)&&!defined(__x86_64__) + /*Not all x86-32 chips support cpuid, so we have to check.*/ +# if !defined(_MSC_VER) + __asm__ __volatile__( + "pushfl\n\t" + "pushfl\n\t" + "popl %[a]\n\t" + "movl %[a],%[b]\n\t" + "xorl $0x200000,%[a]\n\t" + "pushl %[a]\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %[a]\n\t" + "popfl\n\t" + :[a]"=r"(eax),[b]"=r"(ebx) + : + :"cc" + ); +# else + oc_detect_cpuid_helper(&eax,&ebx); +# endif + /*No cpuid.*/ + if(eax==ebx)return 0; +# endif + cpuid(0,eax,ebx,ecx,edx); + /* l e t n I e n i u n e G*/ + if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547|| + /* 6 8 x M T e n i u n e G*/ + ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){ + /*Intel, Transmeta (tested with Crusoe TM5800):*/ + cpuid(1,eax,ebx,ecx,edx); + flags=oc_parse_intel_flags(edx,ecx); + } + /* D M A c i t n e h t u A*/ + else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| + /* C S N y b e d o e G*/ + ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){ + /*AMD, Geode:*/ + cpuid(0x80000000,eax,ebx,ecx,edx); + if(eax<0x80000001)flags=0; + else{ + cpuid(0x80000001,eax,ebx,ecx,edx); + flags=oc_parse_amd_flags(edx,ecx); + } + /*Also check for SSE.*/ + cpuid(1,eax,ebx,ecx,edx); + flags|=oc_parse_intel_flags(edx,ecx); + } + /*Technically some VIA chips can be configured in the BIOS to return any + string here the user wants. + There is a special detection method that can be used to identify such + processors, but in my opinion, if the user really wants to change it, they + deserve what they get.*/ + /* s l u a H r u a t n e C*/ + else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){ + /*VIA:*/ + /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming) + chips (thanks to the engineers from Centaur Technology who provided it). + These chips support Intel-like cpuid info. + The C3-2 (Nehemiah) cores appear to, as well.*/ + cpuid(1,eax,ebx,ecx,edx); + flags=oc_parse_intel_flags(edx,ecx); + if(eax>=0x80000001){ + /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. + We need to check this even if the Intel test succeeds to pick up 3DNow! + support on these processors. + Unlike actual AMD processors, we cannot _rely_ on this info, since + some cores (e.g., the 693 stepping of the Nehemiah) claim to support + this function, yet return edx=0, despite the Intel test indicating + MMX support. + Therefore the features detected here are strictly added to those + detected by the Intel test.*/ + /*TODO: How about earlier chips?*/ + cpuid(0x80000001,eax,ebx,ecx,edx); + /*Note: As of the C7, this function returns Intel-style extended feature + flags, not AMD-style. + Currently, this only defines bits 11, 20, and 29 (0x20100800), which + do not conflict with any of the AMD flags we inspect. + For the remaining bits, Intel tells us, "Do not count on their value", + but VIA assures us that they will all be zero (at least on the C7 and + Isaiah chips). + In the (unlikely) event a future processor uses bits 18, 19, 30, or 31 + (0xC0C00000) for something else, we will have to add code to detect + the model to decide when it is appropriate to inspect them.*/ + flags|=oc_parse_amd_flags(edx,ecx); + } + } + else{ + /*Implement me.*/ + flags=0; + } + return flags; +} +#endif diff --git a/thirdparty/libtheora/cpu.h b/thirdparty/libtheora/cpu.h new file mode 100644 index 0000000000..a43c957a39 --- /dev/null +++ b/thirdparty/libtheora/cpu.h @@ -0,0 +1,34 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + function: + last mod: $Id: cpu.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_x86_cpu_H) +# define _x86_cpu_H (1) +#include "internal.h" + +#define OC_CPU_X86_MMX (1<<0) +#define OC_CPU_X86_3DNOW (1<<1) +#define OC_CPU_X86_3DNOWEXT (1<<2) +#define OC_CPU_X86_MMXEXT (1<<3) +#define OC_CPU_X86_SSE (1<<4) +#define OC_CPU_X86_SSE2 (1<<5) +#define OC_CPU_X86_PNI (1<<6) +#define OC_CPU_X86_SSSE3 (1<<7) +#define OC_CPU_X86_SSE4_1 (1<<8) +#define OC_CPU_X86_SSE4_2 (1<<9) +#define OC_CPU_X86_SSE4A (1<<10) +#define OC_CPU_X86_SSE5 (1<<11) + +#endif diff --git a/thirdparty/libtheora/dct.h b/thirdparty/libtheora/dct.h new file mode 100644 index 0000000000..24ba6f111a --- /dev/null +++ b/thirdparty/libtheora/dct.h @@ -0,0 +1,31 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dct.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*Definitions shared by the forward and inverse DCT transforms.*/ +#if !defined(_dct_H) +# define _dct_H (1) + +/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/ +#define OC_C1S7 ((ogg_int32_t)64277) +#define OC_C2S6 ((ogg_int32_t)60547) +#define OC_C3S5 ((ogg_int32_t)54491) +#define OC_C4S4 ((ogg_int32_t)46341) +#define OC_C5S3 ((ogg_int32_t)36410) +#define OC_C6S2 ((ogg_int32_t)25080) +#define OC_C7S1 ((ogg_int32_t)12785) + +#endif diff --git a/thirdparty/libtheora/decapiwrapper.c b/thirdparty/libtheora/decapiwrapper.c new file mode 100644 index 0000000000..12ea475d17 --- /dev/null +++ b/thirdparty/libtheora/decapiwrapper.c @@ -0,0 +1,193 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $ + + ********************************************************************/ + +#include +#include +#include +#include "apiwrapper.h" +#include "decint.h" +#include "theora/theoradec.h" + +static void th_dec_api_clear(th_api_wrapper *_api){ + if(_api->setup)th_setup_free(_api->setup); + if(_api->decode)th_decode_free(_api->decode); + memset(_api,0,sizeof(*_api)); +} + +static void theora_decode_clear(theora_state *_td){ + if(_td->i!=NULL)theora_info_clear(_td->i); + memset(_td,0,sizeof(*_td)); +} + +static int theora_decode_control(theora_state *_td,int _req, + void *_buf,size_t _buf_sz){ + return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode, + _req,_buf,_buf_sz); +} + +static ogg_int64_t theora_decode_granule_frame(theora_state *_td, + ogg_int64_t _gp){ + return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); +} + +static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){ + return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp); +} + +static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={ + (oc_state_clear_func)theora_decode_clear, + (oc_state_control_func)theora_decode_control, + (oc_state_granule_frame_func)theora_decode_granule_frame, + (oc_state_granule_time_func)theora_decode_granule_time, +}; + +static void th_info2theora_info(theora_info *_ci,const th_info *_info){ + _ci->version_major=_info->version_major; + _ci->version_minor=_info->version_minor; + _ci->version_subminor=_info->version_subminor; + _ci->width=_info->frame_width; + _ci->height=_info->frame_height; + _ci->frame_width=_info->pic_width; + _ci->frame_height=_info->pic_height; + _ci->offset_x=_info->pic_x; + _ci->offset_y=_info->pic_y; + _ci->fps_numerator=_info->fps_numerator; + _ci->fps_denominator=_info->fps_denominator; + _ci->aspect_numerator=_info->aspect_numerator; + _ci->aspect_denominator=_info->aspect_denominator; + switch(_info->colorspace){ + case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break; + case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break; + default:_ci->colorspace=OC_CS_UNSPECIFIED;break; + } + switch(_info->pixel_fmt){ + case TH_PF_420:_ci->pixelformat=OC_PF_420;break; + case TH_PF_422:_ci->pixelformat=OC_PF_422;break; + case TH_PF_444:_ci->pixelformat=OC_PF_444;break; + default:_ci->pixelformat=OC_PF_RSVD; + } + _ci->target_bitrate=_info->target_bitrate; + _ci->quality=_info->quality; + _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift; +} + +int theora_decode_init(theora_state *_td,theora_info *_ci){ + th_api_info *apiinfo; + th_api_wrapper *api; + th_info info; + api=(th_api_wrapper *)_ci->codec_setup; + /*Allocate our own combined API wrapper/theora_info struct. + We put them both in one malloc'd block so that when the API wrapper is + freed, the info struct goes with it. + This avoids having to figure out whether or not we need to free the info + struct in either theora_info_clear() or theora_clear().*/ + apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo)); + if(apiinfo==NULL)return OC_FAULT; + /*Make our own copy of the info struct, since its lifetime should be + independent of the one we were passed in.*/ + *&apiinfo->info=*_ci; + /*Convert the info struct now instead of saving the the one we decoded with + theora_decode_header(), since the user might have modified values (i.e., + color space, aspect ratio, etc. can be specified from a higher level). + The user also might be doing something "clever" with the header packets if + they are not using an Ogg encapsulation.*/ + oc_theora_info2th_info(&info,_ci); + /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy + of the stuff it needs.*/ + apiinfo->api.decode=th_decode_alloc(&info,api->setup); + if(apiinfo->api.decode==NULL){ + _ogg_free(apiinfo); + return OC_EINVAL; + } + apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear; + _td->internal_encode=NULL; + /*Provide entry points for ABI compatibility with old decoder shared libs.*/ + _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL; + _td->granulepos=0; + _td->i=&apiinfo->info; + _td->i->codec_setup=&apiinfo->api; + return 0; +} + +int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){ + th_api_wrapper *api; + th_info info; + int ret; + api=(th_api_wrapper *)_ci->codec_setup; + /*Allocate an API wrapper struct on demand, since it will not also include a + theora_info struct like the ones that are used in a theora_state struct.*/ + if(api==NULL){ + _ci->codec_setup=_ogg_calloc(1,sizeof(*api)); + if(_ci->codec_setup==NULL)return OC_FAULT; + api=(th_api_wrapper *)_ci->codec_setup; + api->clear=(oc_setup_clear_func)th_dec_api_clear; + } + /*Convert from the theora_info struct instead of saving our own th_info + struct between calls. + The user might be doing something "clever" with the header packets if they + are not using an Ogg encapsulation, and we don't want to break this.*/ + oc_theora_info2th_info(&info,_ci); + /*We rely on the fact that theora_comment and th_comment structures are + actually identical. + Take care not to change this fact unless you change the code here as + well!*/ + ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op); + /*We also rely on the fact that the error return code values are the same, + and that the implementations of these two functions return the same set of + them. + Note that theora_decode_header() really can return OC_NOTFORMAT, even + though it is not currently documented to do so.*/ + if(ret<0)return ret; + th_info2theora_info(_ci,&info); + return 0; +} + +int theora_decode_packetin(theora_state *_td,ogg_packet *_op){ + th_api_wrapper *api; + ogg_int64_t gp; + int ret; + if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; + api=(th_api_wrapper *)_td->i->codec_setup; + ret=th_decode_packetin(api->decode,_op,&gp); + if(ret<0)return OC_BADPACKET; + _td->granulepos=gp; + return 0; +} + +int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){ + th_api_wrapper *api; + th_dec_ctx *decode; + th_ycbcr_buffer buf; + int ret; + if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; + api=(th_api_wrapper *)_td->i->codec_setup; + decode=(th_dec_ctx *)api->decode; + if(!decode)return OC_FAULT; + ret=th_decode_ycbcr_out(decode,buf); + if(ret>=0){ + _yuv->y_width=buf[0].width; + _yuv->y_height=buf[0].height; + _yuv->y_stride=buf[0].stride; + _yuv->uv_width=buf[1].width; + _yuv->uv_height=buf[1].height; + _yuv->uv_stride=buf[1].stride; + _yuv->y=buf[0].data; + _yuv->u=buf[1].data; + _yuv->v=buf[2].data; + } + return ret; +} diff --git a/thirdparty/libtheora/decinfo.c b/thirdparty/libtheora/decinfo.c new file mode 100644 index 0000000000..845eb1361c --- /dev/null +++ b/thirdparty/libtheora/decinfo.c @@ -0,0 +1,246 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decinfo.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "decint.h" + + + +/*Unpacks a series of octets from a given byte array into the pack buffer. + No checking is done to ensure the buffer contains enough data. + _opb: The pack buffer to read the octets from. + _buf: The byte array to store the unpacked bytes in. + _len: The number of octets to unpack.*/ +static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){ + while(_len-->0){ + long val; + val=oc_pack_read(_opb,8); + *_buf++=(char)val; + } +} + +/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/ +static long oc_unpack_length(oc_pack_buf *_opb){ + long ret[4]; + int i; + for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8); + return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24; +} + +static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){ + long val; + /*Check the codec bitstream version.*/ + val=oc_pack_read(_opb,8); + _info->version_major=(unsigned char)val; + val=oc_pack_read(_opb,8); + _info->version_minor=(unsigned char)val; + val=oc_pack_read(_opb,8); + _info->version_subminor=(unsigned char)val; + /*verify we can parse this bitstream version. + We accept earlier minors and all subminors, by spec*/ + if(_info->version_major>TH_VERSION_MAJOR|| + _info->version_major==TH_VERSION_MAJOR&& + _info->version_minor>TH_VERSION_MINOR){ + return TH_EVERSION; + } + /*Read the encoded frame description.*/ + val=oc_pack_read(_opb,16); + _info->frame_width=(ogg_uint32_t)val<<4; + val=oc_pack_read(_opb,16); + _info->frame_height=(ogg_uint32_t)val<<4; + val=oc_pack_read(_opb,24); + _info->pic_width=(ogg_uint32_t)val; + val=oc_pack_read(_opb,24); + _info->pic_height=(ogg_uint32_t)val; + val=oc_pack_read(_opb,8); + _info->pic_x=(ogg_uint32_t)val; + val=oc_pack_read(_opb,8); + _info->pic_y=(ogg_uint32_t)val; + val=oc_pack_read(_opb,32); + _info->fps_numerator=(ogg_uint32_t)val; + val=oc_pack_read(_opb,32); + _info->fps_denominator=(ogg_uint32_t)val; + if(_info->frame_width==0||_info->frame_height==0|| + _info->pic_width+_info->pic_x>_info->frame_width|| + _info->pic_height+_info->pic_y>_info->frame_height|| + _info->fps_numerator==0||_info->fps_denominator==0){ + return TH_EBADHEADER; + } + /*Note: The sense of pic_y is inverted in what we pass back to the + application compared to how it is stored in the bitstream. + This is because the bitstream uses a right-handed coordinate system, while + applications expect a left-handed one.*/ + _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y; + val=oc_pack_read(_opb,24); + _info->aspect_numerator=(ogg_uint32_t)val; + val=oc_pack_read(_opb,24); + _info->aspect_denominator=(ogg_uint32_t)val; + val=oc_pack_read(_opb,8); + _info->colorspace=(th_colorspace)val; + val=oc_pack_read(_opb,24); + _info->target_bitrate=(int)val; + val=oc_pack_read(_opb,6); + _info->quality=(int)val; + val=oc_pack_read(_opb,5); + _info->keyframe_granule_shift=(int)val; + val=oc_pack_read(_opb,2); + _info->pixel_fmt=(th_pixel_fmt)val; + if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER; + val=oc_pack_read(_opb,3); + if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + return 0; +} + +static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){ + long len; + int i; + /*Read the vendor string.*/ + len=oc_unpack_length(_opb); + if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER; + _tc->vendor=_ogg_malloc((size_t)len+1); + if(_tc->vendor==NULL)return TH_EFAULT; + oc_unpack_octets(_opb,_tc->vendor,len); + _tc->vendor[len]='\0'; + /*Read the user comments.*/ + _tc->comments=(int)oc_unpack_length(_opb); + len=_tc->comments; + if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){ + _tc->comments=0; + return TH_EBADHEADER; + } + _tc->comment_lengths=(int *)_ogg_malloc( + _tc->comments*sizeof(_tc->comment_lengths[0])); + _tc->user_comments=(char **)_ogg_malloc( + _tc->comments*sizeof(_tc->user_comments[0])); + for(i=0;i<_tc->comments;i++){ + len=oc_unpack_length(_opb); + if(len<0||len>oc_pack_bytes_left(_opb)){ + _tc->comments=i; + return TH_EBADHEADER; + } + _tc->comment_lengths[i]=len; + _tc->user_comments[i]=_ogg_malloc((size_t)len+1); + if(_tc->user_comments[i]==NULL){ + _tc->comments=i; + return TH_EFAULT; + } + oc_unpack_octets(_opb,_tc->user_comments[i],len); + _tc->user_comments[i][len]='\0'; + } + return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0; +} + +static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){ + int ret; + /*Read the quantizer tables.*/ + ret=oc_quant_params_unpack(_opb,&_setup->qinfo); + if(ret<0)return ret; + /*Read the Huffman trees.*/ + return oc_huff_trees_unpack(_opb,_setup->huff_tables); +} + +static void oc_setup_clear(th_setup_info *_setup){ + oc_quant_params_clear(&_setup->qinfo); + oc_huff_trees_clear(_setup->huff_tables); +} + +static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info, + th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){ + char buffer[6]; + long val; + int packtype; + int ret; + val=oc_pack_read(_opb,8); + packtype=(int)val; + /*If we're at a data packet and we have received all three headers, we're + done.*/ + if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){ + return 0; + } + /*Check the codec string.*/ + oc_unpack_octets(_opb,buffer,6); + if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT; + switch(packtype){ + /*Codec info header.*/ + case 0x80:{ + /*This should be the first packet, and we should not already be + initialized.*/ + if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER; + ret=oc_info_unpack(_opb,_info); + if(ret<0)th_info_clear(_info); + else ret=3; + }break; + /*Comment header.*/ + case 0x81:{ + if(_tc==NULL)return TH_EFAULT; + /*We shoud have already decoded the info header, and should not yet have + decoded the comment header.*/ + if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER; + ret=oc_comment_unpack(_opb,_tc); + if(ret<0)th_comment_clear(_tc); + else ret=2; + }break; + /*Codec setup header.*/ + case 0x82:{ + oc_setup_info *setup; + if(_tc==NULL||_setup==NULL)return TH_EFAULT; + /*We should have already decoded the info header and the comment header, + and should not yet have decoded the setup header.*/ + if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){ + return TH_EBADHEADER; + } + setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup)); + if(setup==NULL)return TH_EFAULT; + ret=oc_setup_unpack(_opb,setup); + if(ret<0){ + oc_setup_clear(setup); + _ogg_free(setup); + } + else{ + *_setup=setup; + ret=1; + } + }break; + default:{ + /*We don't know what this header is.*/ + return TH_EBADHEADER; + }break; + } + return ret; +} + + +/*Decodes one header packet. + This should be called repeatedly with the packets at the beginning of the + stream until it returns 0.*/ +int th_decode_headerin(th_info *_info,th_comment *_tc, + th_setup_info **_setup,ogg_packet *_op){ + oc_pack_buf opb; + if(_op==NULL)return TH_EBADHEADER; + if(_info==NULL)return TH_EFAULT; + oc_pack_readinit(&opb,_op->packet,_op->bytes); + return oc_dec_headerin(&opb,_info,_tc,_setup,_op); +} + +void th_setup_free(th_setup_info *_setup){ + if(_setup!=NULL){ + oc_setup_clear(_setup); + _ogg_free(_setup); + } +} diff --git a/thirdparty/libtheora/decint.h b/thirdparty/libtheora/decint.h new file mode 100644 index 0000000000..261b67631a --- /dev/null +++ b/thirdparty/libtheora/decint.h @@ -0,0 +1,107 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decint.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#if !defined(_decint_H) +# define _decint_H (1) +# include "theora/theoradec.h" +# include "internal.h" +# include "bitpack.h" + +typedef struct th_setup_info oc_setup_info; +typedef struct th_dec_ctx oc_dec_ctx; + +# include "huffdec.h" +# include "dequant.h" + +/*Constants for the packet-in state machine specific to the decoder.*/ + +/*Next packet to read: Data packet.*/ +#define OC_PACKET_DATA (0) + + + +struct th_setup_info{ + /*The Huffman codes.*/ + oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; + /*The quantization parameters.*/ + th_quant_info qinfo; +}; + + + +struct th_dec_ctx{ + /*Shared encoder/decoder state.*/ + oc_theora_state state; + /*Whether or not packets are ready to be emitted. + This takes on negative values while there are remaining header packets to + be emitted, reaches 0 when the codec is ready for input, and goes to 1 + when a frame has been processed and a data packet is ready.*/ + int packet_state; + /*Buffer in which to assemble packets.*/ + oc_pack_buf opb; + /*Huffman decode trees.*/ + oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; + /*The index of the first token in each plane for each coefficient.*/ + ptrdiff_t ti0[3][64]; + /*The number of outstanding EOB runs at the start of each coefficient in each + plane.*/ + ptrdiff_t eob_runs[3][64]; + /*The DCT token lists.*/ + unsigned char *dct_tokens; + /*The extra bits associated with DCT tokens.*/ + unsigned char *extra_bits; + /*The number of dct tokens unpacked so far.*/ + int dct_tokens_count; + /*The out-of-loop post-processing level.*/ + int pp_level; + /*The DC scale used for out-of-loop deblocking.*/ + int pp_dc_scale[64]; + /*The sharpen modifier used for out-of-loop deringing.*/ + int pp_sharp_mod[64]; + /*The DC quantization index of each block.*/ + unsigned char *dc_qis; + /*The variance of each block.*/ + int *variances; + /*The storage for the post-processed frame buffer.*/ + unsigned char *pp_frame_data; + /*Whether or not the post-processsed frame buffer has space for chroma.*/ + int pp_frame_state; + /*The buffer used for the post-processed frame. + Note that this is _not_ guaranteed to have the same strides and offsets as + the reference frame buffers.*/ + th_ycbcr_buffer pp_frame_buf; + /*The striped decode callback function.*/ + th_stripe_callback stripe_cb; +# if defined(HAVE_CAIRO) + /*Output metrics for debugging.*/ + int telemetry; + int telemetry_mbmode; + int telemetry_mv; + int telemetry_qi; + int telemetry_bits; + int telemetry_frame_bytes; + int telemetry_coding_bytes; + int telemetry_mode_bytes; + int telemetry_mv_bytes; + int telemetry_qi_bytes; + int telemetry_dc_bytes; + unsigned char *telemetry_frame_data; +# endif +}; + +#endif diff --git a/thirdparty/libtheora/decode.c b/thirdparty/libtheora/decode.c new file mode 100644 index 0000000000..7be66463d8 --- /dev/null +++ b/thirdparty/libtheora/decode.c @@ -0,0 +1,2943 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: decode.c 16581 2009-09-25 22:56:16Z gmaxwell $ + + ********************************************************************/ + +#include +#include +#include +#include "decint.h" +#if defined(OC_DUMP_IMAGES) +# include +# include "png.h" +#endif +#if defined(HAVE_CAIRO) +# include +#endif + + +/*No post-processing.*/ +#define OC_PP_LEVEL_DISABLED (0) +/*Keep track of DC qi for each block only.*/ +#define OC_PP_LEVEL_TRACKDCQI (1) +/*Deblock the luma plane.*/ +#define OC_PP_LEVEL_DEBLOCKY (2) +/*Dering the luma plane.*/ +#define OC_PP_LEVEL_DERINGY (3) +/*Stronger luma plane deringing.*/ +#define OC_PP_LEVEL_SDERINGY (4) +/*Deblock the chroma planes.*/ +#define OC_PP_LEVEL_DEBLOCKC (5) +/*Dering the chroma planes.*/ +#define OC_PP_LEVEL_DERINGC (6) +/*Stronger chroma plane deringing.*/ +#define OC_PP_LEVEL_SDERINGC (7) +/*Maximum valid post-processing level.*/ +#define OC_PP_LEVEL_MAX (7) + + + +/*The mode alphabets for the various mode coding schemes. + Scheme 0 uses a custom alphabet, which is not stored in this table.*/ +static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={ + /*Last MV dominates */ + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV, + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV, + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2, + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV, + OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV, + OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR + }, + /*No MV dominates.*/ + { + OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2, + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + { + OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST, + OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + }, + /*Default ordering.*/ + { + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST, + OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, + OC_MODE_INTER_MV_FOUR + } +}; + + +/*The original DCT tokens are extended and reordered during the construction of + the Huffman tables. + The extension means more bits can be read with fewer calls to the bitpacker + during the Huffman decoding process (at the cost of larger Huffman tables), + and fewer tokens require additional extra bits (reducing the average storage + per decoded token). + The revised ordering reveals essential information in the token value + itself; specifically, whether or not there are additional extra bits to read + and the parameter to which those extra bits are applied. + The token is used to fetch a code word from the OC_DCT_CODE_WORD table below. + The extra bits are added into code word at the bit position inferred from the + token value, giving the final code word from which all required parameters + are derived. + The number of EOBs and the leading zero run length can be extracted directly. + The coefficient magnitude is optionally negated before extraction, according + to a 'flip' bit.*/ + +/*The number of additional extra bits that are decoded with each of the + internal DCT tokens.*/ +static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={ + 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6 +}; + +/*Whether or not an internal token needs any additional extra bits.*/ +#define OC_DCT_TOKEN_NEEDS_MORE(token) \ + (token<(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \ + sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS))) + +/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/ +#define OC_DCT_TOKEN_FAT_EOB (0) + +/*The number of EOBs to use for an end-of-frame token. + Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which + is not yet available everywhere; this should be equivalent.*/ +#define OC_DCT_EOB_FINISH (~(size_t)0>>1) + +/*The location of the (6) run legth bits in the code word. + These are placed at index 0 and given 8 bits (even though 6 would suffice) + because it may be faster to extract the lower byte on some platforms.*/ +#define OC_DCT_CW_RLEN_SHIFT (0) +/*The location of the (12) EOB bits in the code word.*/ +#define OC_DCT_CW_EOB_SHIFT (8) +/*The location of the (1) flip bit in the code word. + This must be right under the magnitude bits.*/ +#define OC_DCT_CW_FLIP_BIT (20) +/*The location of the (11) token magnitude bits in the code word. + These must be last, and rely on a sign-extending right shift.*/ +#define OC_DCT_CW_MAG_SHIFT (21) + +/*Pack the given fields into a code word.*/ +#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \ + ((_eobs)<state,_info,3); + if(ret<0)return ret; + ret=oc_huff_trees_copy(_dec->huff_tables, + (const oc_huff_node *const *)_setup->huff_tables); + if(ret<0){ + oc_state_clear(&_dec->state); + return ret; + } + /*For each fragment, allocate one byte for every DCT coefficient token, plus + one byte for extra-bits for each token, plus one more byte for the long + EOB run, just in case it's the very last token and has a run length of + one.*/ + _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)* + _dec->state.nfrags*sizeof(_dec->dct_tokens[0])); + if(_dec->dct_tokens==NULL){ + oc_huff_trees_clear(_dec->huff_tables); + oc_state_clear(&_dec->state); + return TH_EFAULT; + } + for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ + _dec->state.dequant_tables[qi][pli][qti]= + _dec->state.dequant_table_data[qi][pli][qti]; + } + oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, + &_setup->qinfo); + for(qi=0;qi<64;qi++){ + int qsum; + qsum=0; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + qsum+=_dec->state.dequant_tables[qti][pli][qi][12]+ + _dec->state.dequant_tables[qti][pli][qi][17]+ + _dec->state.dequant_tables[qti][pli][qi][18]+ + _dec->state.dequant_tables[qti][pli][qi][24]<<(pli==0); + } + _dec->pp_sharp_mod[qi]=-(qsum>>11); + } + memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits, + sizeof(_dec->state.loop_filter_limits)); + _dec->pp_level=OC_PP_LEVEL_DISABLED; + _dec->dc_qis=NULL; + _dec->variances=NULL; + _dec->pp_frame_data=NULL; + _dec->stripe_cb.ctx=NULL; + _dec->stripe_cb.stripe_decoded=NULL; +#if defined(HAVE_CAIRO) + _dec->telemetry=0; + _dec->telemetry_bits=0; + _dec->telemetry_qi=0; + _dec->telemetry_mbmode=0; + _dec->telemetry_mv=0; + _dec->telemetry_frame_data=NULL; +#endif + return 0; +} + +static void oc_dec_clear(oc_dec_ctx *_dec){ +#if defined(HAVE_CAIRO) + _ogg_free(_dec->telemetry_frame_data); +#endif + _ogg_free(_dec->pp_frame_data); + _ogg_free(_dec->variances); + _ogg_free(_dec->dc_qis); + _ogg_free(_dec->dct_tokens); + oc_huff_trees_clear(_dec->huff_tables); + oc_state_clear(&_dec->state); +} + + +static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ + long val; + /*Check to make sure this is a data packet.*/ + val=oc_pack_read1(&_dec->opb); + if(val!=0)return TH_EBADPACKET; + /*Read in the frame type (I or P).*/ + val=oc_pack_read1(&_dec->opb); + _dec->state.frame_type=(int)val; + /*Read in the qi list.*/ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[0]=(unsigned char)val; + val=oc_pack_read1(&_dec->opb); + if(!val)_dec->state.nqis=1; + else{ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[1]=(unsigned char)val; + val=oc_pack_read1(&_dec->opb); + if(!val)_dec->state.nqis=2; + else{ + val=oc_pack_read(&_dec->opb,6); + _dec->state.qis[2]=(unsigned char)val; + _dec->state.nqis=3; + } + } + if(_dec->state.frame_type==OC_INTRA_FRAME){ + /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. + Most of the other unused bits in the VP3 headers were eliminated. + I don't know why these remain.*/ + /*I wanted to eliminate wasted bits, but not all config wiggle room + --Monty.*/ + val=oc_pack_read(&_dec->opb,3); + if(val!=0)return TH_EIMPL; + } + return 0; +} + +/*Mark all fragments as coded and in OC_MODE_INTRA. + This also builds up the coded fragment list (in coded order), and clears the + uncoded fragment list. + It does not update the coded macro block list nor the super block flags, as + those are not used when decoding INTRA frames.*/ +static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){ + const oc_sb_map *sb_maps; + const oc_sb_flags *sb_flags; + oc_fragment *frags; + ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t prev_ncoded_fragis; + unsigned nsbs; + unsigned sbi; + int pli; + coded_fragis=_dec->state.coded_fragis; + prev_ncoded_fragis=ncoded_fragis=0; + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; + sb_flags=_dec->state.sb_flags; + frags=_dec->state.frags; + sbi=nsbs=0; + for(pli=0;pli<3;pli++){ + nsbs+=_dec->state.fplanes[pli].nsbs; + for(;sbi=0){ + frags[fragi].coded=1; + frags[fragi].mb_mode=OC_MODE_INTRA; + coded_fragis[ncoded_fragis++]=fragi; + } + } + } + } + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; + prev_ncoded_fragis=ncoded_fragis; + } + _dec->state.ntotal_coded_fragis=ncoded_fragis; +} + +/*Decodes the bit flags indicating whether each super block is partially coded + or not. + Return: The number of partially coded super blocks.*/ +static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ + oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned npartial; + unsigned run_count; + long val; + int flag; + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + sb_flags=_dec->state.sb_flags; + nsbs=_dec->state.nsbs; + sbi=npartial=0; + while(sbiopb); + full_run=run_count>=4129; + do{ + sb_flags[sbi].coded_partially=flag; + sb_flags[sbi].coded_fully=0; + npartial+=flag; + sbi++; + } + while(--run_count>0&&sbiopb); + flag=(int)val; + } + else flag=!flag; + } + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ + return npartial; +} + +/*Decodes the bit flags for whether or not each non-partially-coded super + block is fully coded or not. + This function should only be called if there is at least one + non-partially-coded super block. + Return: The number of partially coded super blocks.*/ +static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ + oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned run_count; + long val; + int flag; + sb_flags=_dec->state.sb_flags; + nsbs=_dec->state.nsbs; + /*Skip partially coded super blocks.*/ + for(sbi=0;sb_flags[sbi].coded_partially;sbi++); + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + do{ + int full_run; + run_count=oc_sb_run_unpack(&_dec->opb); + full_run=run_count>=4129; + for(;sbiopb); + flag=(int)val; + } + else flag=!flag; + } + while(sbistate.nsbs)oc_dec_coded_sb_flags_unpack(_dec); + if(npartial>0){ + val=oc_pack_read1(&_dec->opb); + flag=!(int)val; + } + else flag=0; + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; + sb_flags=_dec->state.sb_flags; + frags=_dec->state.frags; + sbi=nsbs=run_count=0; + coded_fragis=_dec->state.coded_fragis; + uncoded_fragis=coded_fragis+_dec->state.nfrags; + prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0; + for(pli=0;pli<3;pli++){ + nsbs+=_dec->state.fplanes[pli].nsbs; + for(;sbi=0){ + int coded; + if(sb_flags[sbi].coded_fully)coded=1; + else if(!sb_flags[sbi].coded_partially)coded=0; + else{ + if(run_count<=0){ + run_count=oc_block_run_unpack(&_dec->opb); + flag=!flag; + } + run_count--; + coded=flag; + } + if(coded)coded_fragis[ncoded_fragis++]=fragi; + else *(uncoded_fragis-++nuncoded_fragis)=fragi; + frags[fragi].coded=coded; + } + } + } + } + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; + prev_ncoded_fragis=ncoded_fragis; + } + _dec->state.ntotal_coded_fragis=ncoded_fragis; + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ +} + + + +typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb); + +static int oc_vlc_mode_unpack(oc_pack_buf *_opb){ + long val; + int i; + for(i=0;i<7;i++){ + val=oc_pack_read1(_opb); + if(!val)break; + } + return i; +} + +static int oc_clc_mode_unpack(oc_pack_buf *_opb){ + long val; + val=oc_pack_read(_opb,3); + return (int)val; +} + +/*Unpacks the list of macro block modes for INTER frames.*/ +static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ + const oc_mb_map *mb_maps; + signed char *mb_modes; + const oc_fragment *frags; + const unsigned char *alphabet; + unsigned char scheme0_alphabet[8]; + oc_mode_unpack_func mode_unpack; + size_t nmbs; + size_t mbi; + long val; + int mode_scheme; + val=oc_pack_read(&_dec->opb,3); + mode_scheme=(int)val; + if(mode_scheme==0){ + int mi; + /*Just in case, initialize the modes to something. + If the bitstream doesn't contain each index exactly once, it's likely + corrupt and the rest of the packet is garbage anyway, but this way we + won't crash, and we'll decode SOMETHING.*/ + /*LOOP VECTORIZES*/ + for(mi=0;miopb,3); + scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; + } + alphabet=scheme0_alphabet; + } + else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; + if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack; + else mode_unpack=oc_vlc_mode_unpack; + mb_modes=_dec->state.mb_modes; + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; + nmbs=_dec->state.nmbs; + frags=_dec->state.frags; + for(mbi=0;mbiopb)]; + /*There were none: INTER_NOMV is forced.*/ + else mb_modes[mbi]=OC_MODE_INTER_NOMV; + } + } +} + + + +typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb); + +static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){ + long bits; + int mask; + int mv; + bits=oc_pack_read(_opb,3); + switch(bits){ + case 0:return 0; + case 1:return 1; + case 2:return -1; + case 3: + case 4:{ + mv=(int)(bits-1); + bits=oc_pack_read1(_opb); + }break; + /*case 5: + case 6: + case 7:*/ + default:{ + mv=1<>1); + bits&=1; + }break; + } + mask=-(int)bits; + return mv+mask^mask; +} + +static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){ + long bits; + int mask; + int mv; + bits=oc_pack_read(_opb,6); + mv=(int)bits>>1; + mask=-((int)bits&1); + return mv+mask^mask; +} + +/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro + block modes and motion vectors to the individual fragments.*/ +static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ + const oc_mb_map *mb_maps; + const signed char *mb_modes; + oc_set_chroma_mvs_func set_chroma_mvs; + oc_mv_comp_unpack_func mv_comp_unpack; + oc_fragment *frags; + oc_mv *frag_mvs; + const unsigned char *map_idxs; + int map_nidxs; + oc_mv last_mv[2]; + oc_mv cbmvs[4]; + size_t nmbs; + size_t mbi; + long val; + set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; + val=oc_pack_read1(&_dec->opb); + mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack; + map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; + map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; + memset(last_mv,0,sizeof(last_mv)); + frags=_dec->state.frags; + frag_mvs=_dec->state.frag_mvs; + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; + mb_modes=_dec->state.mb_modes; + nmbs=_dec->state.nmbs; + for(mbi=0;mbi>2][mapi&3]; + if(frags[fragi].coded)coded[ncoded++]=mapi; + } + while(++mapiiopb); + lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi])); + } + else lbmvs[bi][0]=lbmvs[bi][1]=0; + } + if(codedi>0){ + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0])); + } + if(codedi>2][bi]; + frags[fragi].mb_mode=mb_mode; + memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi])); + } + } + }break; + case OC_MODE_INTER_MV:{ + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break; + case OC_MODE_INTER_MV_LAST2:{ + memcpy(mbmv,last_mv[1],sizeof(mbmv)); + memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1])); + memcpy(last_mv[0],mbmv,sizeof(last_mv[0])); + }break; + case OC_MODE_GOLDEN_MV:{ + mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + default:memset(mbmv,0,sizeof(mbmv));break; + } + /*4MV mode fills in the fragments itself. + For all other modes we can use this common code.*/ + if(mb_mode!=OC_MODE_INTER_MV_FOUR){ + for(codedi=0;codedi>2][mapi&3]; + frags[fragi].mb_mode=mb_mode; + memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv)); + } + } + } + } +} + +static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ + oc_fragment *frags; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + ptrdiff_t fragi; + ncoded_fragis=_dec->state.ntotal_coded_fragis; + if(ncoded_fragis<=0)return; + frags=_dec->state.frags; + coded_fragis=_dec->state.coded_fragis; + if(_dec->state.nqis==1){ + /*If this frame has only a single qi value, then just use it for all coded + fragments.*/ + for(fragii=0;fragiiopb); + flag=(int)val; + nqi1=0; + fragii=0; + while(fragiiopb); + full_run=run_count>=4129; + do{ + frags[coded_fragis[fragii++]].qii=flag; + nqi1+=flag; + } + while(--run_count>0&&fragiiopb); + flag=(int)val; + } + else flag=!flag; + } + /*TODO: run_count should be 0 here. + If it's not, we should issue a warning of some kind.*/ + /*If we have 3 different qi's for this frame, and there was at least one + fragment with a non-zero qi, make the second pass.*/ + if(_dec->state.nqis==3&&nqi1>0){ + /*Skip qii==0 fragments.*/ + for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++); + val=oc_pack_read1(&_dec->opb); + flag=(int)val; + do{ + int full_run; + run_count=oc_sb_run_unpack(&_dec->opb); + full_run=run_count>=4129; + for(;fragiiopb); + flag=(int)val; + } + else flag=!flag; + } + while(fragiidct_tokens; + frags=_dec->state.frags; + coded_fragis=_dec->state.coded_fragis; + ncoded_fragis=fragii=eobs=ti=0; + for(pli=0;pli<3;pli++){ + ptrdiff_t run_counts[64]; + ptrdiff_t eob_count; + ptrdiff_t eobi; + int rli; + ncoded_fragis+=_dec->state.ncoded_fragis[pli]; + memset(run_counts,0,sizeof(run_counts)); + _dec->eob_runs[pli][0]=eobs; + _dec->ti0[pli][0]=ti; + /*Continue any previous EOB run, if there was one.*/ + eobi=eobs; + if(ncoded_fragis-fragii0)frags[coded_fragis[fragii++]].dc=0; + while(fragiiopb, + _dec->huff_tables[_huff_idxs[pli+1>>1]]); + dct_tokens[ti++]=(unsigned char)token; + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + eb=(int)oc_pack_read(&_dec->opb, + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); + dct_tokens[ti++]=(unsigned char)eb; + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); + eb<<=OC_DCT_TOKEN_EB_POS(token); + } + else eb=0; + cw=OC_DCT_CODE_WORD[token]+eb; + eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH; + if(eobs){ + eobi=OC_MINI(eobs,ncoded_fragis-fragii); + eob_count+=eobi; + eobs-=eobi; + while(eobi-->0)frags[coded_fragis[fragii++]].dc=0; + } + else{ + int coeff; + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; + if(skip)coeff=0; + run_counts[skip]++; + frags[coded_fragis[fragii++]].dc=coeff; + } + } + /*Add the total EOB count to the longest run length.*/ + run_counts[63]+=eob_count; + /*And convert the run_counts array to a moment table.*/ + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; + /*Finally, subtract off the number of coefficients that have been + accounted for by runs started in this coefficient.*/ + for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli]; + } + _dec->dct_tokens_count=ti; + return eobs; +} + +/*Unpacks the AC coefficient tokens. + This can completely discard coefficient values while unpacking, and so is + somewhat simpler than unpacking the DC coefficient tokens. + _huff_idx: The index of the Huffman table to use for each color plane. + _ntoks_left: The number of tokens left to be decoded in each color plane for + each coefficient. + This is updated as EOB tokens and zero run tokens are decoded. + _eobs: The length of any outstanding EOB run from previous + coefficients. + Return: The length of any outstanding EOB run.*/ +static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2], + ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){ + unsigned char *dct_tokens; + ptrdiff_t ti; + int pli; + dct_tokens=_dec->dct_tokens; + ti=_dec->dct_tokens_count; + for(pli=0;pli<3;pli++){ + ptrdiff_t run_counts[64]; + ptrdiff_t eob_count; + size_t ntoks_left; + size_t ntoks; + int rli; + _dec->eob_runs[pli][_zzi]=_eobs; + _dec->ti0[pli][_zzi]=ti; + ntoks_left=_ntoks_left[pli][_zzi]; + memset(run_counts,0,sizeof(run_counts)); + eob_count=0; + ntoks=0; + while(ntoks+_eobsopb, + _dec->huff_tables[_huff_idxs[pli+1>>1]]); + dct_tokens[ti++]=(unsigned char)token; + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + eb=(int)oc_pack_read(&_dec->opb, + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); + dct_tokens[ti++]=(unsigned char)eb; + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); + eb<<=OC_DCT_TOKEN_EB_POS(token); + } + else eb=0; + cw=OC_DCT_CODE_WORD[token]+eb; + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH; + if(_eobs==0){ + run_counts[skip]++; + ntoks++; + } + } + /*Add the portion of the last EOB run actually used by this coefficient.*/ + eob_count+=ntoks_left-ntoks; + /*And remove it from the remaining EOB count.*/ + _eobs-=ntoks_left-ntoks; + /*Add the total EOB count to the longest run length.*/ + run_counts[63]+=eob_count; + /*And convert the run_counts array to a moment table.*/ + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; + /*Finally, subtract off the number of coefficients that have been + accounted for by runs started in this coefficient.*/ + for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli]; + } + _dec->dct_tokens_count=ti; + return _eobs; +} + +/*Tokens describing the DCT coefficients that belong to each fragment are + stored in the bitstream grouped by coefficient, not by fragment. + + This means that we either decode all the tokens in order, building up a + separate coefficient list for each fragment as we go, and then go back and + do the iDCT on each fragment, or we have to create separate lists of tokens + for each coefficient, so that we can pull the next token required off the + head of the appropriate list when decoding a specific fragment. + + The former was VP3's choice, and it meant 2*w*h extra storage for all the + decoded coefficient values. + + We take the second option, which lets us store just one to three bytes per + token (generally far fewer than the number of coefficients, due to EOB + tokens and zero runs), and which requires us to only maintain a counter for + each of the 64 coefficients, instead of a counter for every fragment to + determine where the next token goes. + + We actually use 3 counters per coefficient, one for each color plane, so we + can decode all color planes simultaneously. + This lets color conversion, etc., be done as soon as a full MCU (one or + two super block rows) is decoded, while the image data is still in cache.*/ + +static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){ + static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64}; + ptrdiff_t ntoks_left[3][64]; + int huff_idxs[2]; + ptrdiff_t eobs; + long val; + int pli; + int zzi; + int hgi; + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ + ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli]; + } + val=oc_pack_read(&_dec->opb,4); + huff_idxs[0]=(int)val; + val=oc_pack_read(&_dec->opb,4); + huff_idxs[1]=(int)val; + _dec->eob_runs[0][0]=0; + eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left); +#if defined(HAVE_CAIRO) + _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + val=oc_pack_read(&_dec->opb,4); + huff_idxs[0]=(int)val; + val=oc_pack_read(&_dec->opb,4); + huff_idxs[1]=(int)val; + zzi=1; + for(hgi=1;hgi<5;hgi++){ + huff_idxs[0]+=16; + huff_idxs[1]+=16; + for(;zzipp_level<=OC_PP_LEVEL_DISABLED){ + if(_dec->dc_qis!=NULL){ + _ogg_free(_dec->dc_qis); + _dec->dc_qis=NULL; + _ogg_free(_dec->variances); + _dec->variances=NULL; + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + } + return 1; + } + if(_dec->dc_qis==NULL){ + /*If we haven't been tracking DC quantization indices, there's no point in + starting now.*/ + if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1; + _dec->dc_qis=(unsigned char *)_ogg_malloc( + _dec->state.nfrags*sizeof(_dec->dc_qis[0])); + if(_dec->dc_qis==NULL)return 1; + memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags); + } + else{ + unsigned char *dc_qis; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + unsigned char qi0; + /*Update the DC quantization index of each coded block.*/ + dc_qis=_dec->dc_qis; + coded_fragis=_dec->state.coded_fragis; + ncoded_fragis=_dec->state.ncoded_fragis[0]+ + _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; + qi0=(unsigned char)_dec->state.qis[0]; + for(fragii=0;fragiipp_level<=OC_PP_LEVEL_TRACKDCQI){ + if(_dec->variances!=NULL){ + _ogg_free(_dec->variances); + _dec->variances=NULL; + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + } + return 1; + } + if(_dec->variances==NULL){ + size_t frame_sz; + size_t c_sz; + int c_w; + int c_h; + frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); + c_sz=c_w*(size_t)c_h; + /*Allocate space for the chroma planes, even if we're not going to use + them; this simplifies allocation state management, though it may waste + memory on the few systems that don't overcommit pages.*/ + frame_sz+=c_sz<<1; + _dec->pp_frame_data=(unsigned char *)_ogg_malloc( + frame_sz*sizeof(_dec->pp_frame_data[0])); + _dec->variances=(int *)_ogg_malloc( + _dec->state.nfrags*sizeof(_dec->variances[0])); + if(_dec->variances==NULL||_dec->pp_frame_data==NULL){ + _ogg_free(_dec->pp_frame_data); + _dec->pp_frame_data=NULL; + _ogg_free(_dec->variances); + _dec->variances=NULL; + return 1; + } + /*Force an update of the PP buffer pointers.*/ + _dec->pp_frame_state=0; + } + /*Update the PP buffer pointers if necessary.*/ + if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){ + if(_dec->pp_levelpp_frame_buf[0].width=_dec->state.info.frame_width; + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; + _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width; + _dec->pp_frame_buf[0].data=_dec->pp_frame_data+ + (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride; + } + else{ + size_t y_sz; + size_t c_sz; + int c_w; + int c_h; + /*Otherwise, set up pointers to all three PP planes.*/ + y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); + c_sz=c_w*(size_t)c_h; + _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; + _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width; + _dec->pp_frame_buf[0].data=_dec->pp_frame_data; + _dec->pp_frame_buf[1].width=c_w; + _dec->pp_frame_buf[1].height=c_h; + _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width; + _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz; + _dec->pp_frame_buf[2].width=c_w; + _dec->pp_frame_buf[2].height=c_h; + _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width; + _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz; + oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf); + } + _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC); + } + /*If we're not processing chroma, copy the reference frame's chroma planes.*/ + if(_dec->pp_levelpp_frame_buf+1, + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1, + sizeof(_dec->pp_frame_buf[1])*2); + } + return 0; +} + + + +typedef struct{ + int bounding_values[256]; + ptrdiff_t ti[3][64]; + ptrdiff_t eob_runs[3][64]; + const ptrdiff_t *coded_fragis[3]; + const ptrdiff_t *uncoded_fragis[3]; + ptrdiff_t ncoded_fragis[3]; + ptrdiff_t nuncoded_fragis[3]; + const ogg_uint16_t *dequant[3][3][2]; + int fragy0[3]; + int fragy_end[3]; + int pred_last[3][3]; + int mcu_nvfrags; + int loop_filter; + int pp_level; +}oc_dec_pipeline_state; + + + +/*Initialize the main decoding pipeline.*/ +static void oc_dec_pipeline_init(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe){ + const ptrdiff_t *coded_fragis; + const ptrdiff_t *uncoded_fragis; + int pli; + int qii; + int qti; + /*If chroma is sub-sampled in the vertical direction, we have to decode two + super block rows of Y' for each super block row of Cb and Cr.*/ + _pipe->mcu_nvfrags=4<state.info.pixel_fmt&2); + /*Initialize the token and extra bits indices for each plane and + coefficient.*/ + memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti)); + /*Also copy over the initial the EOB run counts.*/ + memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs)); + /*Set up per-plane pointers to the coded and uncoded fragments lists.*/ + coded_fragis=_dec->state.coded_fragis; + uncoded_fragis=coded_fragis+_dec->state.nfrags; + for(pli=0;pli<3;pli++){ + ptrdiff_t ncoded_fragis; + _pipe->coded_fragis[pli]=coded_fragis; + _pipe->uncoded_fragis[pli]=uncoded_fragis; + ncoded_fragis=_dec->state.ncoded_fragis[pli]; + coded_fragis+=ncoded_fragis; + uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags; + } + /*Set up condensed quantizer tables.*/ + for(pli=0;pli<3;pli++){ + for(qii=0;qii<_dec->state.nqis;qii++){ + for(qti=0;qti<2;qti++){ + _pipe->dequant[pli][qii][qti]= + _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti]; + } + } + } + /*Set the previous DC predictor to 0 for all color planes and frame types.*/ + memset(_pipe->pred_last,0,sizeof(_pipe->pred_last)); + /*Initialize the bounding value array for the loop filter.*/ + _pipe->loop_filter=!oc_state_loop_filter_init(&_dec->state, + _pipe->bounding_values); + /*Initialize any buffers needed for post-processing. + We also save the current post-processing level, to guard against the user + changing it from a callback.*/ + if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level; + /*If we don't have enough information to post-process, disable it, regardless + of the user-requested level.*/ + else{ + _pipe->pp_level=OC_PP_LEVEL_DISABLED; + memcpy(_dec->pp_frame_buf, + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]], + sizeof(_dec->pp_frame_buf[0])*3); + } +} + +/*Undo the DC prediction in a single plane of an MCU (one or two super block + rows). + As a side effect, the number of coded and uncoded fragments in this plane of + the MCU is also computed.*/ +static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe,int _pli){ + const oc_fragment_plane *fplane; + oc_fragment *frags; + int *pred_last; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragi; + int fragx; + int fragy; + int fragy0; + int fragy_end; + int nhfrags; + /*Compute the first and last fragment row of the current MCU for this + plane.*/ + fplane=_dec->state.fplanes+_pli; + fragy0=_pipe->fragy0[_pli]; + fragy_end=_pipe->fragy_end[_pli]; + nhfrags=fplane->nhfrags; + pred_last=_pipe->pred_last[_pli]; + frags=_dec->state.frags; + ncoded_fragis=0; + fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags; + for(fragy=fragy0;fragy=nhfrags)ur_ref=-1; + else{ + ur_ref=u_frags[fragi+1].coded? + OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; + } + if(frags[fragi].coded){ + int pred; + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + /*We break out a separate case based on which of our neighbors use + the same reference frames. + This is somewhat faster than trying to make a generic case which + handles all of them, since it reduces lots of poorly predicted + jumps to one switch statement, and also lets a number of the + multiplications be optimized out by strength reduction.*/ + switch((l_ref==ref)|(ul_ref==ref)<<1| + (u_ref==ref)<<2|(ur_ref==ref)<<3){ + default:pred=pred_last[ref];break; + case 1: + case 3:pred=frags[fragi-1].dc;break; + case 2:pred=u_frags[fragi-1].dc;break; + case 4: + case 6: + case 12:pred=u_frags[fragi].dc;break; + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; + case 8:pred=u_frags[fragi+1].dc;break; + case 9: + case 11: + case 13:{ + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; + }break; + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; + case 14:{ + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) + +10*u_frags[fragi].dc)/16; + }break; + case 7: + case 15:{ + int p0; + int p1; + int p2; + p0=frags[fragi-1].dc; + p1=u_frags[fragi-1].dc; + p2=u_frags[fragi].dc; + pred=(29*(p0+p2)-26*p1)/32; + if(abs(pred-p2)>128)pred=p2; + else if(abs(pred-p0)>128)pred=p0; + else if(abs(pred-p1)>128)pred=p1; + }break; + } + pred_last[ref]=frags[fragi].dc+=pred; + ncoded_fragis++; + l_ref=ref; + } + else l_ref=-1; + ul_ref=u_ref; + u_ref=ur_ref; + } + } + } + _pipe->ncoded_fragis[_pli]=ncoded_fragis; + /*Also save the number of uncoded fragments so we know how many to copy.*/ + _pipe->nuncoded_fragis[_pli]= + (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis; +} + +/*Reconstructs all coded fragments in a single MCU (one or two super block + rows). + This requires that each coded fragment have a proper macro block mode and + motion vector (if not in INTRA mode), and have it's DC value decoded, with + the DC prediction process reversed, and the number of coded and uncoded + fragments in this plane of the MCU be counted. + The token lists for each color plane and coefficient should also be filled + in, along with initial token offsets, extra bits offsets, and EOB run + counts.*/ +static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, + oc_dec_pipeline_state *_pipe,int _pli){ + unsigned char *dct_tokens; + const unsigned char *dct_fzig_zag; + ogg_uint16_t dc_quant[2]; + const oc_fragment *frags; + const ptrdiff_t *coded_fragis; + ptrdiff_t ncoded_fragis; + ptrdiff_t fragii; + ptrdiff_t *ti; + ptrdiff_t *eob_runs; + int qti; + dct_tokens=_dec->dct_tokens; + dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag; + frags=_dec->state.frags; + coded_fragis=_pipe->coded_fragis[_pli]; + ncoded_fragis=_pipe->ncoded_fragis[_pli]; + ti=_pipe->ti[_pli]; + eob_runs=_pipe->eob_runs[_pli]; + for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0]; + for(fragii=0;fragiidequant[_pli][frags[fragi].qii][qti]; + /*Decode the AC coefficients.*/ + for(zzi=0;zzi<64;){ + int token; + last_zzi=zzi; + if(eob_runs[zzi]){ + eob_runs[zzi]--; + break; + } + else{ + ptrdiff_t eob; + int cw; + int rlen; + int coeff; + int lti; + lti=ti[zzi]; + token=dct_tokens[lti++]; + cw=OC_DCT_CODE_WORD[token]; + /*These parts could be done branchless, but the branches are fairly + predictable and the C code translates into more than a few + instructions, so it's worth it to avoid them.*/ + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ + cw+=dct_tokens[lti++]<>OC_DCT_CW_EOB_SHIFT&0xFFF; + if(token==OC_DCT_TOKEN_FAT_EOB){ + eob+=dct_tokens[lti++]<<8; + if(eob==0)eob=OC_DCT_EOB_FINISH; + } + rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); + cw^=-(cw&1<>OC_DCT_CW_MAG_SHIFT; + eob_runs[zzi]=eob; + ti[zzi]=lti; + zzi+=rlen; + dct_coeffs[dct_fzig_zag[zzi]]=(ogg_int16_t)(coeff*(int)ac_quant[zzi]); + zzi+=!eob; + } + } + /*TODO: zzi should be exactly 64 here. + If it's not, we should report some kind of warning.*/ + zzi=OC_MINI(zzi,64); + dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc; + /*last_zzi is always initialized. + If your compiler thinks otherwise, it is dumb.*/ + oc_state_frag_recon(&_dec->state,fragi,_pli, + dct_coeffs,last_zzi,dc_quant[qti]); + } + _pipe->coded_fragis[_pli]+=ncoded_fragis; + /*Right now the reconstructed MCU has only the coded blocks in it.*/ + /*TODO: We make the decision here to always copy the uncoded blocks into it + from the reference frame. + We could also copy the coded blocks back over the reference frame, if we + wait for an additional MCU to be decoded, which might be faster if only a + small number of blocks are coded. + However, this introduces more latency, creating a larger cache footprint. + It's unknown which decision is better, but this one results in simpler + code, and the hard case (high bitrate, high resolution) is handled + correctly.*/ + /*Copy the uncoded blocks from the previous reference frame.*/ + _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; + oc_state_frag_copy_list(&_dec->state,_pipe->uncoded_fragis[_pli], + _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); +} + +/*Filter a horizontal block edge.*/ +static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src,int _src_ystride,int _qstep,int _flimit, + int *_variance0,int *_variance1){ + unsigned char *rdst; + const unsigned char *rsrc; + unsigned char *cdst; + const unsigned char *csrc; + int r[10]; + int sum0; + int sum1; + int bx; + int by; + rdst=_dst; + rsrc=_src; + for(bx=0;bx<8;bx++){ + cdst=rdst; + csrc=rsrc; + for(by=0;by<10;by++){ + r[by]=*csrc; + csrc+=_src_ystride; + } + sum0=sum1=0; + for(by=0;by<4;by++){ + sum0+=abs(r[by+1]-r[by]); + sum1+=abs(r[by+5]-r[by+6]); + } + *_variance0+=OC_MINI(255,sum0); + *_variance1+=OC_MINI(255,sum1); + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ + *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); + cdst+=_dst_ystride; + *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); + cdst+=_dst_ystride; + for(by=0;by<4;by++){ + *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+ + r[by+4]+r[by+5]+r[by+6]+4>>3); + cdst+=_dst_ystride; + } + *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); + cdst+=_dst_ystride; + *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); + } + else{ + for(by=1;by<=8;by++){ + *cdst=(unsigned char)r[by]; + cdst+=_dst_ystride; + } + } + rdst++; + rsrc++; + } +} + +/*Filter a vertical block edge.*/ +static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride, + int _qstep,int _flimit,int *_variances){ + unsigned char *rdst; + const unsigned char *rsrc; + unsigned char *cdst; + int r[10]; + int sum0; + int sum1; + int bx; + int by; + cdst=_dst; + for(by=0;by<8;by++){ + rsrc=cdst-1; + rdst=cdst; + for(bx=0;bx<10;bx++)r[bx]=*rsrc++; + sum0=sum1=0; + for(bx=0;bx<4;bx++){ + sum0+=abs(r[bx+1]-r[bx]); + sum1+=abs(r[bx+5]-r[bx+6]); + } + _variances[0]+=OC_MINI(255,sum0); + _variances[1]+=OC_MINI(255,sum1); + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ + *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); + *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); + for(bx=0;bx<4;bx++){ + *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+ + r[bx+4]+r[bx+5]+r[bx+6]+4>>3); + } + *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); + *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); + } + cdst+=_dst_ystride; + } +} + +static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec, + th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0, + int _fragy_end){ + oc_fragment_plane *fplane; + int *variance; + unsigned char *dc_qi; + unsigned char *dst; + const unsigned char *src; + ptrdiff_t froffset; + int dst_ystride; + int src_ystride; + int nhfrags; + int width; + int notstart; + int notdone; + int flimit; + int qstep; + int y_end; + int y; + int x; + _dst+=_pli; + _src+=_pli; + fplane=_dec->state.fplanes+_pli; + nhfrags=fplane->nhfrags; + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; + variance=_dec->variances+froffset; + dc_qi=_dec->dc_qis+froffset; + notstart=_fragy0>0; + notdone=_fragy_endnvfrags; + /*We want to clear an extra row of variances, except at the end.*/ + memset(variance+(nhfrags&-notstart),0, + (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0]))); + /*Except for the first time, we want to point to the middle of the row.*/ + y=(_fragy0<<3)+(notstart<<2); + dst_ystride=_dst->stride; + src_ystride=_src->stride; + dst=_dst->data+y*(ptrdiff_t)dst_ystride; + src=_src->data+y*(ptrdiff_t)src_ystride; + width=_dst->width; + for(;y<4;y++){ + memcpy(dst,src,width*sizeof(dst[0])); + dst+=dst_ystride; + src+=src_ystride; + } + /*We also want to skip the last row in the frame for this loop.*/ + y_end=_fragy_end-!notdone<<3; + for(;ypp_dc_scale[*dc_qi]; + flimit=(qstep*3)>>2; + oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride, + qstep,flimit,variance,variance+nhfrags); + variance++; + dc_qi++; + for(x=8;xpp_dc_scale[*dc_qi]; + flimit=(qstep*3)>>2; + oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride, + qstep,flimit,variance,variance+nhfrags); + oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride, + qstep,flimit,variance-1); + variance++; + dc_qi++; + } + dst+=dst_ystride<<3; + src+=src_ystride<<3; + } + /*And finally, handle the last row in the frame, if it's in the range.*/ + if(!notdone){ + int height; + height=_dst->height; + for(;ypp_dc_scale[*dc_qi++]; + flimit=(qstep*3)>>2; + oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride, + qstep,flimit,variance++); + } + } +} + +static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, + int _dc_scale,int _sharp_mod,int _strong){ + static const unsigned char OC_MOD_MAX[2]={24,32}; + static const unsigned char OC_MOD_SHIFT[2]={1,0}; + const unsigned char *psrc; + const unsigned char *src; + const unsigned char *nsrc; + unsigned char *dst; + int vmod[72]; + int hmod[72]; + int mod_hi; + int by; + int bx; + mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]); + dst=_idata; + src=dst; + psrc=src-(_ystride&-!(_b&4)); + for(by=0;by<9;by++){ + for(bx=0;bx<8;bx++){ + int mod; + mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<>7); + for(bx=1;bx<7;bx++){ + a=128; + b=64; + w=hmod[(bx<<3)+by]; + a-=w; + b+=w*src[bx-1]; + w=vmod[(by<<3)+bx]; + a-=w; + b+=w*psrc[bx]; + w=vmod[(by+1<<3)+bx]; + a-=w; + b+=w*nsrc[bx]; + w=hmod[(bx+1<<3)+by]; + a-=w; + b+=w*src[bx+1]; + dst[bx]=OC_CLAMP255(a*src[bx]+b>>7); + } + a=128; + b=64; + w=hmod[(7<<3)+by]; + a-=w; + b+=w*src[6]; + w=vmod[(by<<3)+7]; + a-=w; + b+=w*psrc[7]; + w=vmod[(by+1<<3)+7]; + a-=w; + b+=w*nsrc[7]; + w=hmod[(8<<3)+by]; + a-=w; + b+=w*src[7+!(_b&2)]; + dst[7]=OC_CLAMP255(a*src[7]+b>>7); + dst+=_ystride; + psrc=src; + src=nsrc; + nsrc+=_ystride&-(!(_b&8)|by<6); + } +} + +#define OC_DERING_THRESH1 (384) +#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1) +#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1) +#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1) + +static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, + int _pli,int _fragy0,int _fragy_end){ + th_img_plane *iplane; + oc_fragment_plane *fplane; + oc_fragment *frag; + int *variance; + unsigned char *idata; + ptrdiff_t froffset; + int ystride; + int nhfrags; + int sthresh; + int strong; + int y_end; + int width; + int height; + int y; + int x; + iplane=_img+_pli; + fplane=_dec->state.fplanes+_pli; + nhfrags=fplane->nhfrags; + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; + variance=_dec->variances+froffset; + frag=_dec->state.frags+froffset; + strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY); + sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3; + y=_fragy0<<3; + ystride=iplane->stride; + idata=iplane->data+y*(ptrdiff_t)ystride; + y_end=_fragy_end<<3; + width=iplane->width; + height=iplane->height; + for(;ystate.qis[frag->qii]; + var=*variance; + b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3; + if(strong&&var>sthresh){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| + !(b&2)&&variance[1]>OC_DERING_THRESH4|| + !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4|| + !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + } + } + else if(var>OC_DERING_THRESH2){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); + } + else if(var>OC_DERING_THRESH1){ + oc_dering_block(idata+x,ystride,b, + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0); + } + frag++; + variance++; + } + idata+=ystride<<3; + } +} + + + +th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){ + oc_dec_ctx *dec; + if(_info==NULL||_setup==NULL)return NULL; + dec=_ogg_malloc(sizeof(*dec)); + if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){ + _ogg_free(dec); + return NULL; + } + dec->state.curframe_num=0; + return dec; +} + +void th_decode_free(th_dec_ctx *_dec){ + if(_dec!=NULL){ + oc_dec_clear(_dec); + _ogg_free(_dec); + } +} + +int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, + size_t _buf_sz){ + switch(_req){ + case TH_DECCTL_GET_PPLEVEL_MAX:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + (*(int *)_buf)=OC_PP_LEVEL_MAX; + return 0; + }break; + case TH_DECCTL_SET_PPLEVEL:{ + int pp_level; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + pp_level=*(int *)_buf; + if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL; + _dec->pp_level=pp_level; + return 0; + }break; + case TH_DECCTL_SET_GRANPOS:{ + ogg_int64_t granpos; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL; + granpos=*(ogg_int64_t *)_buf; + if(granpos<0)return TH_EINVAL; + _dec->state.granpos=granpos; + _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift) + -_dec->state.granpos_bias; + _dec->state.curframe_num=_dec->state.keyframe_num + +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1); + return 0; + }break; + case TH_DECCTL_SET_STRIPE_CB:{ + th_stripe_callback *cb; + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL; + cb=(th_stripe_callback *)_buf; + _dec->stripe_cb.ctx=cb->ctx; + _dec->stripe_cb.stripe_decoded=cb->stripe_decoded; + return 0; + }break; +#ifdef HAVE_CAIRO + case TH_DECCTL_SET_TELEMETRY_MBMODE:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_mbmode=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_MV:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_mv=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_QI:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_qi=*(int *)_buf; + return 0; + }break; + case TH_DECCTL_SET_TELEMETRY_BITS:{ + if(_dec==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + _dec->telemetry=1; + _dec->telemetry_bits=*(int *)_buf; + return 0; + }break; +#endif + default:return TH_EIMPL; + } +} + +/*We're decoding an INTER frame, but have no initialized reference + buffers (i.e., decoding did not start on a key frame). + We initialize them to a solid gray here.*/ +static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){ + th_info *info; + size_t yplane_sz; + size_t cplane_sz; + int yhstride; + int yheight; + int chstride; + int cheight; + _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0; + _dec->state.ref_frame_idx[OC_FRAME_PREV]=0; + _dec->state.ref_frame_idx[OC_FRAME_SELF]=1; + info=&_dec->state.info; + yhstride=info->frame_width+2*OC_UMV_PADDING; + yheight=info->frame_height+2*OC_UMV_PADDING; + chstride=yhstride>>!(info->pixel_fmt&1); + cheight=yheight>>!(info->pixel_fmt&2); + yplane_sz=yhstride*(size_t)yheight; + cplane_sz=chstride*(size_t)cheight; + memset(_dec->state.ref_frame_data[0],0x80,yplane_sz+2*cplane_sz); +} + +int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, + ogg_int64_t *_granpos){ + int ret; + if(_dec==NULL||_op==NULL)return TH_EFAULT; + /*A completely empty packet indicates a dropped frame and is treated exactly + like an inter frame with no coded blocks. + Only proceed if we have a non-empty packet.*/ + if(_op->bytes!=0){ + oc_dec_pipeline_state pipe; + th_ycbcr_buffer stripe_buf; + int stripe_fragy; + int refi; + int pli; + int notstart; + int notdone; + oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes); +#if defined(HAVE_CAIRO) + _dec->telemetry_frame_bytes=_op->bytes; +#endif + ret=oc_dec_frame_header_unpack(_dec); + if(ret<0)return ret; + /*Select a free buffer to use for the reconstructed version of this + frame.*/ + if(_dec->state.frame_type!=OC_INTRA_FRAME&& + (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){ + /*No reference frames yet!*/ + oc_dec_init_dummy_frame(_dec); + refi=_dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + else{ + for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]|| + refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++); + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; + } + if(_dec->state.frame_type==OC_INTRA_FRAME){ + oc_dec_mark_all_intra(_dec); + _dec->state.keyframe_num=_dec->state.curframe_num; +#if defined(HAVE_CAIRO) + _dec->telemetry_coding_bytes= + _dec->telemetry_mode_bytes= + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + } + else{ + oc_dec_coded_flags_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_mb_modes_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_mv_unpack_and_frag_modes_fill(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + } + oc_dec_block_qis_unpack(_dec); +#if defined(HAVE_CAIRO) + _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb); +#endif + oc_dec_residual_tokens_unpack(_dec); + /*Update granule position. + This must be done before the striped decode callbacks so that the + application knows what to do with the frame data.*/ + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< + _dec->state.info.keyframe_granule_shift) + +(_dec->state.curframe_num-_dec->state.keyframe_num); + _dec->state.curframe_num++; + if(_granpos!=NULL)*_granpos=_dec->state.granpos; + /*All of the rest of the operations -- DC prediction reversal, + reconstructing coded fragments, copying uncoded fragments, loop + filtering, extending borders, and out-of-loop post-processing -- should + be pipelined. + I.e., DC prediction reversal, reconstruction, and uncoded fragment + copying are done for one or two super block rows, then loop filtering is + run as far as it can, then bordering copying, then post-processing. + For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super + block rows, and one chroma. + Otherwise, an MCU consists of one super block row from each plane. + Inside each MCU, we perform all of the steps on one color plane before + moving on to the next. + After reconstruction, the additional filtering stages introduce a delay + since they need some pixels from the next fragment row. + Thus the actual number of decoded rows available is slightly smaller for + the first MCU, and slightly larger for the last. + + This entire process allows us to operate on the data while it is still in + cache, resulting in big performance improvements. + An application callback allows further application processing (blitting + to video memory, color conversion, etc.) to also use the data while it's + in cache.*/ + oc_dec_pipeline_init(_dec,&pipe); + oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf); + notstart=0; + notdone=1; + for(stripe_fragy=0;notdone;stripe_fragy+=pipe.mcu_nvfrags){ + int avail_fragy0; + int avail_fragy_end; + avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags; + notdone=stripe_fragy+pipe.mcu_nvfragsstate.fplanes+pli; + /*Compute the first and last fragment row of the current MCU for this + plane.*/ + frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2); + pipe.fragy0[pli]=stripe_fragy>>frag_shift; + pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags, + pipe.fragy0[pli]+(pipe.mcu_nvfrags>>frag_shift)); + oc_dec_dc_unpredict_mcu_plane(_dec,&pipe,pli); + oc_dec_frags_recon_mcu_plane(_dec,&pipe,pli); + sdelay=edelay=0; + if(pipe.loop_filter){ + sdelay+=notstart; + edelay+=notdone; + oc_state_loop_filter_frag_rows(&_dec->state,pipe.bounding_values, + refi,pli,pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + } + /*To fill the borders, we have an additional two pixel delay, since a + fragment in the next row could filter its top edge, using two pixels + from a fragment in this row. + But there's no reason to delay a full fragment between the two.*/ + oc_state_borders_fill_rows(&_dec->state,refi,pli, + (pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1), + (pipe.fragy_end[pli]-edelay<<3)-(edelay<<1)); + /*Out-of-loop post-processing.*/ + pp_offset=3*(pli!=0); + if(pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){ + /*Perform de-blocking in one plane.*/ + sdelay+=notstart; + edelay+=notdone; + oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf, + _dec->state.ref_frame_bufs[refi],pli, + pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + if(pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){ + /*Perform de-ringing in one plane.*/ + sdelay+=notstart; + edelay+=notdone; + oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli, + pipe.fragy0[pli]-sdelay,pipe.fragy_end[pli]-edelay); + } + } + /*If no post-processing is done, we still need to delay a row for the + loop filter, thanks to the strange filtering order VP3 chose.*/ + else if(pipe.loop_filter){ + sdelay+=notstart; + edelay+=notdone; + } + /*Compute the intersection of the available rows in all planes. + If chroma is sub-sampled, the effect of each of its delays is + doubled, but luma might have more post-processing filters enabled + than chroma, so we don't know up front which one is the limiting + factor.*/ + avail_fragy0=OC_MINI(avail_fragy0,pipe.fragy0[pli]-sdelay<stripe_cb.stripe_decoded!=NULL){ + /*The callback might want to use the FPU, so let's make sure they can. + We violate all kinds of ABI restrictions by not doing this until + now, but none of them actually matter since we don't use floating + point ourselves.*/ + oc_restore_fpu(&_dec->state); + /*Make the callback, ensuring we flip the sense of the "start" and + "end" of the available region upside down.*/ + (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf, + _dec->state.fplanes[0].nvfrags-avail_fragy_end, + _dec->state.fplanes[0].nvfrags-avail_fragy0); + } + notstart=1; + } + /*Finish filling in the reference frame borders.*/ + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); + /*Update the reference frame indices.*/ + if(_dec->state.frame_type==OC_INTRA_FRAME){ + /*The new frame becomes both the previous and gold reference frames.*/ + _dec->state.ref_frame_idx[OC_FRAME_GOLD]= + _dec->state.ref_frame_idx[OC_FRAME_PREV]= + _dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + else{ + /*Otherwise, just replace the previous reference frame.*/ + _dec->state.ref_frame_idx[OC_FRAME_PREV]= + _dec->state.ref_frame_idx[OC_FRAME_SELF]; + } + /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG + gamma values, if nothing else).*/ + oc_restore_fpu(&_dec->state); +#if defined(OC_DUMP_IMAGES) + /*Don't dump images for dropped frames.*/ + oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec"); +#endif + return 0; + } + else{ + if(_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0){ + int refi; + /*No reference frames yet!*/ + oc_dec_init_dummy_frame(_dec); + refi=_dec->state.ref_frame_idx[OC_FRAME_PREV]; + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; + memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[refi], + sizeof(_dec->pp_frame_buf[0])*3); + } + /*Just update the granule position and return.*/ + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< + _dec->state.info.keyframe_granule_shift) + +(_dec->state.curframe_num-_dec->state.keyframe_num); + _dec->state.curframe_num++; + if(_granpos!=NULL)*_granpos=_dec->state.granpos; + return TH_DUPFRAME; + } +} + +int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){ + if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT; + oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf); +#if defined(HAVE_CAIRO) + /*If telemetry ioctls are active, we need to draw to the output buffer. + Stuff the plane into cairo.*/ + if(_dec->telemetry){ + cairo_surface_t *cs; + unsigned char *data; + unsigned char *y_row; + unsigned char *u_row; + unsigned char *v_row; + unsigned char *rgb_row; + int cstride; + int w; + int h; + int x; + int y; + int hdec; + int vdec; + w=_ycbcr[0].width; + h=_ycbcr[0].height; + hdec=!(_dec->state.info.pixel_fmt&1); + vdec=!(_dec->state.info.pixel_fmt&2); + /*Lazy data buffer init. + We could try to re-use the post-processing buffer, which would save + memory, but complicate the allocation logic there. + I don't think anyone cares about memory usage when using telemetry; it is + not meant for embedded devices.*/ + if(_dec->telemetry_frame_data==NULL){ + _dec->telemetry_frame_data=_ogg_malloc( + (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data)); + if(_dec->telemetry_frame_data==NULL)return 0; + } + cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h); + /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/ + data=cairo_image_surface_get_data(cs); + if(data==NULL){ + cairo_surface_destroy(cs); + return 0; + } + cstride=cairo_image_surface_get_stride(cs); + y_row=_ycbcr[0].data; + u_row=_ycbcr[1].data; + v_row=_ycbcr[2].data; + rgb_row=data; + for(y=0;y>hdec]-363703744)/1635200; + g=(3827562*y_row[x]-1287801*u_row[x>>hdec] + -2672387*v_row[x>>hdec]+447306710)/3287200; + b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600; + rgb_row[4*x+0]=OC_CLAMP255(b); + rgb_row[4*x+1]=OC_CLAMP255(g); + rgb_row[4*x+2]=OC_CLAMP255(r); + } + y_row+=_ycbcr[0].stride; + u_row+=_ycbcr[1].stride&-((y&1)|!vdec); + v_row+=_ycbcr[2].stride&-((y&1)|!vdec); + rgb_row+=cstride; + } + /*Draw coded identifier for each macroblock (stored in Hilbert order).*/ + { + cairo_t *c; + const oc_fragment *frags; + oc_mv *frag_mvs; + const signed char *mb_modes; + oc_mb_map *mb_maps; + size_t nmbs; + size_t mbi; + int row2; + int col2; + int qim[3]={0,0,0}; + if(_dec->state.nqis==2){ + int bqi; + bqi=_dec->state.qis[0]; + if(_dec->state.qis[1]>bqi)qim[1]=1; + if(_dec->state.qis[1]state.nqis==3){ + int bqi; + int cqi; + int dqi; + bqi=_dec->state.qis[0]; + cqi=_dec->state.qis[1]; + dqi=_dec->state.qis[2]; + if(cqi>bqi&&dqi>bqi){ + if(dqi>cqi){ + qim[1]=1; + qim[2]=2; + } + else{ + qim[1]=2; + qim[2]=1; + } + } + else if(cqistate.frags; + frag_mvs=_dec->state.frag_mvs; + mb_modes=_dec->state.mb_modes; + mb_maps=_dec->state.mb_maps; + nmbs=_dec->state.nmbs; + row2=0; + col2=0; + for(mbi=0;mbi>1)&1))*16-16; + x=(col2>>1)*16; + cairo_set_line_width(c,1.); + /*Keyframe (all intra) red box.*/ + if(_dec->state.frame_type==OC_INTRA_FRAME){ + if(_dec->telemetry_mbmode&0x02){ + cairo_set_source_rgba(c,1.,0,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,0,0,.25); + cairo_fill(c); + } + } + else{ + const signed char *frag_mv; + ptrdiff_t fragi; + for(bi=0;bi<4;bi++){ + fragi=mb_maps[mbi][0][bi]; + if(fragi>=0&&frags[fragi].coded){ + frag_mv=frag_mvs[fragi]; + break; + } + } + if(bi<4){ + switch(mb_modes[mbi]){ + case OC_MODE_INTRA:{ + if(_dec->telemetry_mbmode&0x02){ + cairo_set_source_rgba(c,1.,0,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,0,0,.25); + cairo_fill(c); + } + }break; + case OC_MODE_INTER_NOMV:{ + if(_dec->telemetry_mbmode&0x01){ + cairo_set_source_rgba(c,0,0,1.,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0,0,1.,.25); + cairo_fill(c); + } + }break; + case OC_MODE_INTER_MV:{ + if(_dec->telemetry_mbmode&0x04){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x04){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_LAST:{ + if(_dec->telemetry_mbmode&0x08){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_move_to(c,x+13.5,y+2.5); + cairo_line_to(c,x+2.5,y+8); + cairo_line_to(c,x+13.5,y+13.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x08){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_LAST2:{ + if(_dec->telemetry_mbmode&0x10){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_move_to(c,x+8,y+2.5); + cairo_line_to(c,x+2.5,y+8); + cairo_line_to(c,x+8,y+13.5); + cairo_move_to(c,x+13.5,y+2.5); + cairo_line_to(c,x+8,y+8); + cairo_line_to(c,x+13.5,y+13.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x10){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_GOLDEN_NOMV:{ + if(_dec->telemetry_mbmode&0x20){ + cairo_set_source_rgba(c,1.,1.,0,.5); + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,1.,0,.25); + cairo_fill(c); + } + }break; + case OC_MODE_GOLDEN_MV:{ + if(_dec->telemetry_mbmode&0x40){ + cairo_rectangle(c,x+2.5,y+2.5,11,11); + cairo_set_source_rgba(c,1.,1.,0,.5); + cairo_stroke(c); + } + if(_dec->telemetry_mv&0x40){ + cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+8,y+8); + cairo_stroke(c); + } + }break; + case OC_MODE_INTER_MV_FOUR:{ + if(_dec->telemetry_mbmode&0x80){ + cairo_rectangle(c,x+2.5,y+2.5,4,4); + cairo_rectangle(c,x+9.5,y+2.5,4,4); + cairo_rectangle(c,x+2.5,y+9.5,4,4); + cairo_rectangle(c,x+9.5,y+9.5,4,4); + cairo_set_source_rgba(c,0,1.,0,.5); + cairo_stroke(c); + } + /*4mv is odd, coded in raster order.*/ + fragi=mb_maps[mbi][0][0]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+4,y+12); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][1]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+12,y+12); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][2]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+4,y+4); + cairo_stroke(c); + } + fragi=mb_maps[mbi][0][3]; + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ + frag_mv=frag_mvs[fragi]; + cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]); + cairo_set_source_rgba(c,1.,1.,1.,.9); + cairo_set_line_width(c,3.); + cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66); + cairo_stroke_preserve(c); + cairo_set_line_width(c,2.); + cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33); + cairo_stroke_preserve(c); + cairo_set_line_width(c,1.); + cairo_line_to(c,x+12,y+4); + cairo_stroke(c); + } + }break; + } + } + } + /*qii illustration.*/ + if(_dec->telemetry_qi&0x2){ + cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE); + for(bi=0;bi<4;bi++){ + ptrdiff_t fragi; + int qiv; + int xp; + int yp; + xp=x+(bi&1)*8; + yp=y+8-(bi&2)*4; + fragi=mb_maps[mbi][0][bi]; + if(fragi>=0&&frags[fragi].coded){ + qiv=qim[frags[fragi].qii]; + cairo_set_line_width(c,3.); + cairo_set_source_rgba(c,0.,0.,0.,.5); + switch(qiv){ + /*Double plus:*/ + case 2:{ + if((bi&1)^((bi&2)>>1)){ + cairo_move_to(c,xp+2.5,yp+1.5); + cairo_line_to(c,xp+2.5,yp+3.5); + cairo_move_to(c,xp+1.5,yp+2.5); + cairo_line_to(c,xp+3.5,yp+2.5); + cairo_move_to(c,xp+5.5,yp+4.5); + cairo_line_to(c,xp+5.5,yp+6.5); + cairo_move_to(c,xp+4.5,yp+5.5); + cairo_line_to(c,xp+6.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0.,1.,1.,1.); + } + else{ + cairo_move_to(c,xp+5.5,yp+1.5); + cairo_line_to(c,xp+5.5,yp+3.5); + cairo_move_to(c,xp+4.5,yp+2.5); + cairo_line_to(c,xp+6.5,yp+2.5); + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+2.5,yp+6.5); + cairo_move_to(c,xp+1.5,yp+5.5); + cairo_line_to(c,xp+3.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,0.,1.,1.,1.); + } + }break; + /*Double minus:*/ + case -2:{ + cairo_move_to(c,xp+2.5,yp+2.5); + cairo_line_to(c,xp+5.5,yp+2.5); + cairo_move_to(c,xp+2.5,yp+5.5); + cairo_line_to(c,xp+5.5,yp+5.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,1.,1.,1.); + }break; + /*Plus:*/ + case 1:{ + if(bi&2==0)yp-=2; + if(bi&1==0)xp-=2; + cairo_move_to(c,xp+4.5,yp+2.5); + cairo_line_to(c,xp+4.5,yp+6.5); + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+6.5,yp+4.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,.1,1.,.3,1.); + break; + } + /*Fall through.*/ + /*Minus:*/ + case -1:{ + cairo_move_to(c,xp+2.5,yp+4.5); + cairo_line_to(c,xp+6.5,yp+4.5); + cairo_stroke_preserve(c); + cairo_set_source_rgba(c,1.,.3,.1,1.); + }break; + default:continue; + } + cairo_set_line_width(c,1.); + cairo_stroke(c); + } + } + } + col2++; + if((col2>>1)>=_dec->state.nhmbs){ + col2=0; + row2+=2; + } + } + /*Bit usage indicator[s]:*/ + if(_dec->telemetry_bits){ + int widths[6]; + int fpsn; + int fpsd; + int mult; + int fullw; + int padw; + int i; + fpsn=_dec->state.info.fps_numerator; + fpsd=_dec->state.info.fps_denominator; + mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits); + fullw=250.f*h*fpsd*mult/fpsn; + padw=w-24; + /*Header and coded block bits.*/ + if(_dec->telemetry_frame_bytes<0|| + _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){ + _dec->telemetry_frame_bytes=0; + } + if(_dec->telemetry_coding_bytes<0|| + _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_coding_bytes=0; + } + if(_dec->telemetry_mode_bytes<0|| + _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_mode_bytes=0; + } + if(_dec->telemetry_mv_bytes<0|| + _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_mv_bytes=0; + } + if(_dec->telemetry_qi_bytes<0|| + _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_qi_bytes=0; + } + if(_dec->telemetry_dc_bytes<0|| + _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){ + _dec->telemetry_dc_bytes=0; + } + widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw; + widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw; + widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw; + widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw; + widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw; + widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw; + for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w; + cairo_set_source_rgba(c,.0,.0,.0,.6); + cairo_rectangle(c,10,h-33,widths[0]+1,5); + cairo_rectangle(c,10,h-29,widths[1]+1,5); + cairo_rectangle(c,10,h-25,widths[2]+1,5); + cairo_rectangle(c,10,h-21,widths[3]+1,5); + cairo_rectangle(c,10,h-17,widths[4]+1,5); + cairo_rectangle(c,10,h-13,widths[5]+1,5); + cairo_fill(c); + cairo_set_source_rgb(c,1,0,0); + cairo_rectangle(c,10.5,h-32.5,widths[0],4); + cairo_fill(c); + cairo_set_source_rgb(c,0,1,0); + cairo_rectangle(c,10.5,h-28.5,widths[1],4); + cairo_fill(c); + cairo_set_source_rgb(c,0,0,1); + cairo_rectangle(c,10.5,h-24.5,widths[2],4); + cairo_fill(c); + cairo_set_source_rgb(c,.6,.4,.0); + cairo_rectangle(c,10.5,h-20.5,widths[3],4); + cairo_fill(c); + cairo_set_source_rgb(c,.3,.3,.3); + cairo_rectangle(c,10.5,h-16.5,widths[4],4); + cairo_fill(c); + cairo_set_source_rgb(c,.5,.5,.8); + cairo_rectangle(c,10.5,h-12.5,widths[5],4); + cairo_fill(c); + } + /*Master qi indicator[s]:*/ + if(_dec->telemetry_qi&0x1){ + cairo_text_extents_t extents; + char buffer[10]; + int p; + int y; + p=0; + y=h-7.5; + if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10; + buffer[p++]=48+_dec->state.qis[0]%10; + if(_dec->state.nqis>=2){ + buffer[p++]=' '; + if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10; + buffer[p++]=48+_dec->state.qis[1]%10; + } + if(_dec->state.nqis==3){ + buffer[p++]=' '; + if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10; + buffer[p++]=48+_dec->state.qis[2]%10; + } + buffer[p++]='\0'; + cairo_select_font_face(c,"sans", + CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD); + cairo_set_font_size(c,18); + cairo_text_extents(c,buffer,&extents); + cairo_set_source_rgb(c,1,1,1); + cairo_move_to(c,w-extents.x_advance-10,y); + cairo_show_text(c,buffer); + cairo_set_source_rgb(c,0,0,0); + cairo_move_to(c,w-extents.x_advance-10,y); + cairo_text_path(c,buffer); + cairo_set_line_width(c,.8); + cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND); + cairo_stroke(c); + } + cairo_destroy(c); + } + /*Out of the Cairo plane into the telemetry YUV buffer.*/ + _ycbcr[0].data=_dec->telemetry_frame_data; + _ycbcr[0].stride=_ycbcr[0].width; + _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride; + _ycbcr[1].stride=_ycbcr[1].width; + _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride; + _ycbcr[2].stride=_ycbcr[2].width; + y_row=_ycbcr[0].data; + u_row=_ycbcr[1].data; + v_row=_ycbcr[2].data; + rgb_row=data; + /*This is one of the few places it's worth handling chroma on a + case-by-case basis.*/ + switch(_dec->state.info.pixel_fmt){ + case TH_PF_420:{ + for(y=0;y>1]=OC_CLAMP255(u); + v_row[x>>1]=OC_CLAMP255(v); + } + y_row+=_ycbcr[0].stride<<1; + u_row+=_ycbcr[1].stride; + v_row+=_ycbcr[2].stride; + rgb_row+=cstride<<1; + } + }break; + case TH_PF_422:{ + for(y=0;y>1]=OC_CLAMP255(u); + v_row[x>>1]=OC_CLAMP255(v); + } + y_row+=_ycbcr[0].stride; + u_row+=_ycbcr[1].stride; + v_row+=_ycbcr[2].stride; + rgb_row+=cstride; + } + }break; + /*case TH_PF_444:*/ + default:{ + for(y=0;y +#include +#include +#include "dequant.h" +#include "decint.h" + +int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){ + th_quant_base *base_mats; + long val; + int nbase_mats; + int sizes[64]; + int indices[64]; + int nbits; + int bmi; + int ci; + int qti; + int pli; + int qri; + int qi; + int i; + val=oc_pack_read(_opb,3); + nbits=(int)val; + for(qi=0;qi<64;qi++){ + val=oc_pack_read(_opb,nbits); + _qinfo->loop_filter_limits[qi]=(unsigned char)val; + } + val=oc_pack_read(_opb,4); + nbits=(int)val+1; + for(qi=0;qi<64;qi++){ + val=oc_pack_read(_opb,nbits); + _qinfo->ac_scale[qi]=(ogg_uint16_t)val; + } + val=oc_pack_read(_opb,4); + nbits=(int)val+1; + for(qi=0;qi<64;qi++){ + val=oc_pack_read(_opb,nbits); + _qinfo->dc_scale[qi]=(ogg_uint16_t)val; + } + val=oc_pack_read(_opb,9); + nbase_mats=(int)val+1; + base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0])); + if(base_mats==NULL)return TH_EFAULT; + for(bmi=0;bmiqi_ranges[qti]+pli; + if(i>0){ + val=oc_pack_read1(_opb); + if(!val){ + int qtj; + int plj; + if(qti>0){ + val=oc_pack_read1(_opb); + if(val){ + qtj=qti-1; + plj=pli; + } + else{ + qtj=(i-1)/3; + plj=(i-1)%3; + } + } + else{ + qtj=(i-1)/3; + plj=(i-1)%3; + } + *qranges=*(_qinfo->qi_ranges[qtj]+plj); + continue; + } + } + val=oc_pack_read(_opb,nbits); + indices[0]=(int)val; + for(qi=qri=0;qi<63;){ + val=oc_pack_read(_opb,oc_ilog(62-qi)); + sizes[qri]=(int)val+1; + qi+=(int)val+1; + val=oc_pack_read(_opb,nbits); + indices[++qri]=(int)val; + } + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + if(qi>63){ + _ogg_free(base_mats); + return TH_EBADHEADER; + } + qranges->nranges=qri; + qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0])); + if(qranges->sizes==NULL){ + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + _ogg_free(base_mats); + return TH_EFAULT; + } + memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0])); + qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0])); + if(qrbms==NULL){ + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + _ogg_free(base_mats); + return TH_EFAULT; + } + qranges->base_matrices=(const th_quant_base *)qrbms; + do{ + bmi=indices[qri]; + /*Note: The caller is responsible for cleaning up any partially + constructed qinfo.*/ + if(bmi>=nbase_mats){ + _ogg_free(base_mats); + return TH_EBADHEADER; + } + memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri])); + } + while(qri-->0); + } + _ogg_free(base_mats); + return 0; +} + +void oc_quant_params_clear(th_quant_info *_qinfo){ + int i; + for(i=6;i-->0;){ + int qti; + int pli; + qti=i/3; + pli=i%3; + /*Clear any duplicate pointer references.*/ + if(i>0){ + int qtj; + int plj; + qtj=(i-1)/3; + plj=(i-1)%3; + if(_qinfo->qi_ranges[qti][pli].sizes== + _qinfo->qi_ranges[qtj][plj].sizes){ + _qinfo->qi_ranges[qti][pli].sizes=NULL; + } + if(_qinfo->qi_ranges[qti][pli].base_matrices== + _qinfo->qi_ranges[qtj][plj].base_matrices){ + _qinfo->qi_ranges[qti][pli].base_matrices=NULL; + } + } + if(qti>0){ + if(_qinfo->qi_ranges[1][pli].sizes== + _qinfo->qi_ranges[0][pli].sizes){ + _qinfo->qi_ranges[1][pli].sizes=NULL; + } + if(_qinfo->qi_ranges[1][pli].base_matrices== + _qinfo->qi_ranges[0][pli].base_matrices){ + _qinfo->qi_ranges[1][pli].base_matrices=NULL; + } + } + /*Now free all the non-duplicate storage.*/ + _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes); + _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices); + } +} diff --git a/thirdparty/libtheora/dequant.h b/thirdparty/libtheora/dequant.h new file mode 100644 index 0000000000..ef25838e35 --- /dev/null +++ b/thirdparty/libtheora/dequant.h @@ -0,0 +1,27 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dequant.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_dequant_H) +# define _dequant_H (1) +# include "quant.h" +# include "bitpack.h" + +int oc_quant_params_unpack(oc_pack_buf *_opb, + th_quant_info *_qinfo); +void oc_quant_params_clear(th_quant_info *_qinfo); + +#endif diff --git a/thirdparty/libtheora/encapiwrapper.c b/thirdparty/libtheora/encapiwrapper.c new file mode 100644 index 0000000000..874f12442d --- /dev/null +++ b/thirdparty/libtheora/encapiwrapper.c @@ -0,0 +1,168 @@ +#include +#include +#include +#include "apiwrapper.h" +#include "encint.h" +#include "theora/theoraenc.h" + + + +static void th_enc_api_clear(th_api_wrapper *_api){ + if(_api->encode)th_encode_free(_api->encode); + memset(_api,0,sizeof(*_api)); +} + +static void theora_encode_clear(theora_state *_te){ + if(_te->i!=NULL)theora_info_clear(_te->i); + memset(_te,0,sizeof(*_te)); +} + +static int theora_encode_control(theora_state *_te,int _req, + void *_buf,size_t _buf_sz){ + return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode, + _req,_buf,_buf_sz); +} + +static ogg_int64_t theora_encode_granule_frame(theora_state *_te, + ogg_int64_t _gp){ + return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp); +} + +static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){ + return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp); +} + +static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={ + (oc_state_clear_func)theora_encode_clear, + (oc_state_control_func)theora_encode_control, + (oc_state_granule_frame_func)theora_encode_granule_frame, + (oc_state_granule_time_func)theora_encode_granule_time, +}; + +int theora_encode_init(theora_state *_te,theora_info *_ci){ + th_api_info *apiinfo; + th_info info; + ogg_uint32_t keyframe_frequency_force; + /*Allocate our own combined API wrapper/theora_info struct. + We put them both in one malloc'd block so that when the API wrapper is + freed, the info struct goes with it. + This avoids having to figure out whether or not we need to free the info + struct in either theora_info_clear() or theora_clear().*/ + apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo)); + if(apiinfo==NULL)return TH_EFAULT; + /*Make our own copy of the info struct, since its lifetime should be + independent of the one we were passed in.*/ + *&apiinfo->info=*_ci; + oc_theora_info2th_info(&info,_ci); + apiinfo->api.encode=th_encode_alloc(&info); + if(apiinfo->api.encode==NULL){ + _ogg_free(apiinfo); + return OC_EINVAL; + } + apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear; + /*Provide entry points for ABI compatibility with old decoder shared libs.*/ + _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL; + _te->internal_decode=NULL; + _te->granulepos=0; + _te->i=&apiinfo->info; + _te->i->codec_setup=&apiinfo->api; + /*Set the precise requested keyframe frequency.*/ + keyframe_frequency_force=_ci->keyframe_auto_p? + _ci->keyframe_frequency_force:_ci->keyframe_frequency; + th_encode_ctl(apiinfo->api.encode, + TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, + &keyframe_frequency_force,sizeof(keyframe_frequency_force)); + /*TODO: Additional codec setup using the extra fields in theora_info.*/ + return 0; +} + +int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){ + th_api_wrapper *api; + th_ycbcr_buffer buf; + int ret; + api=(th_api_wrapper *)_te->i->codec_setup; + buf[0].width=_yuv->y_width; + buf[0].height=_yuv->y_height; + buf[0].stride=_yuv->y_stride; + buf[0].data=_yuv->y; + buf[1].width=_yuv->uv_width; + buf[1].height=_yuv->uv_height; + buf[1].stride=_yuv->uv_stride; + buf[1].data=_yuv->u; + buf[2].width=_yuv->uv_width; + buf[2].height=_yuv->uv_height; + buf[2].stride=_yuv->uv_stride; + buf[2].data=_yuv->v; + ret=th_encode_ycbcr_in(api->encode,buf); + if(ret<0)return ret; + _te->granulepos=api->encode->state.granpos; + return ret; +} + +int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){ + th_api_wrapper *api; + api=(th_api_wrapper *)_te->i->codec_setup; + return th_encode_packetout(api->encode,_last_p,_op); +} + +int theora_encode_header(theora_state *_te,ogg_packet *_op){ + oc_enc_ctx *enc; + th_api_wrapper *api; + int ret; + api=(th_api_wrapper *)_te->i->codec_setup; + enc=api->encode; + /*If we've already started encoding, fail.*/ + if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){ + return TH_EINVAL; + } + /*Reset the state to make sure we output an info packet.*/ + enc->packet_state=OC_PACKET_INFO_HDR; + ret=th_encode_flushheader(api->encode,NULL,_op); + return ret>=0?0:ret; +} + +int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){ + oggpack_buffer opb; + void *buf; + int packet_state; + int ret; + packet_state=OC_PACKET_COMMENT_HDR; + oggpackB_writeinit(&opb); + ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL, + th_version_string(),(th_comment *)_tc,_op); + if(ret>=0){ + /*The oggpack_buffer's lifetime ends with this function, so we have to + copy out the packet contents. + Presumably the application knows it is supposed to free this. + This part works nothing like the Vorbis API, and the documentation on it + has been wrong for some time, claiming libtheora owned the memory.*/ + buf=_ogg_malloc(_op->bytes); + if(buf==NULL){ + _op->packet=NULL; + ret=TH_EFAULT; + } + else{ + memcpy(buf,_op->packet,_op->bytes); + _op->packet=buf; + ret=0; + } + } + oggpack_writeclear(&opb); + return ret; +} + +int theora_encode_tables(theora_state *_te,ogg_packet *_op){ + oc_enc_ctx *enc; + th_api_wrapper *api; + int ret; + api=(th_api_wrapper *)_te->i->codec_setup; + enc=api->encode; + /*If we've already started encoding, fail.*/ + if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){ + return TH_EINVAL; + } + /*Reset the state to make sure we output a setup packet.*/ + enc->packet_state=OC_PACKET_SETUP_HDR; + ret=th_encode_flushheader(api->encode,NULL,_op); + return ret>=0?0:ret; +} diff --git a/thirdparty/libtheora/encfrag.c b/thirdparty/libtheora/encfrag.c new file mode 100644 index 0000000000..bb814c8e4a --- /dev/null +++ b/thirdparty/libtheora/encfrag.c @@ -0,0 +1,388 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encfrag.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + + +void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + (*_enc->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride); +} + +void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]); + _src+=_ystride; + _ref+=_ystride; + } +} + +void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride){ + (*_enc->opt_vtable.frag_sub_128)(_diff,_src,_ystride); +} + +void oc_enc_frag_sub_128_c(ogg_int16_t *_diff, + const unsigned char *_src,int _ystride){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128); + _src+=_ystride; + } +} + +unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_x, + const unsigned char *_y,int _ystride){ + return (*_enc->opt_vtable.frag_sad)(_x,_y,_ystride); +} + +unsigned oc_enc_frag_sad_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + unsigned sad; + int i; + sad=0; + for(i=8;i-->0;){ + int j; + for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]); + _src+=_ystride; + _ref+=_ystride; + } + return sad; +} + +unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh){ + return (*_enc->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh); +} + +unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + unsigned sad; + int i; + sad=0; + for(i=8;i-->0;){ + int j; + for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]); + if(sad>_thresh)break; + _src+=_ystride; + _ref+=_ystride; + } + return sad; +} + +unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh){ + return (*_enc->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride, + _thresh); +} + +unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + unsigned sad; + int i; + sad=0; + for(i=8;i-->0;){ + int j; + for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1)); + if(sad>_thresh)break; + _src+=_ystride; + _ref1+=_ystride; + _ref2+=_ystride; + } + return sad; +} + +static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + int i; + for(i=0;i<8;i++){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + /*Hadamard stage 1:*/ + t0=_src[0]-_ref[0]+_src[4]-_ref[4]; + t4=_src[0]-_ref[0]-_src[4]+_ref[4]; + t1=_src[1]-_ref[1]+_src[5]-_ref[5]; + t5=_src[1]-_ref[1]-_src[5]+_ref[5]; + t2=_src[2]-_ref[2]+_src[6]-_ref[6]; + t6=_src[2]-_ref[2]-_src[6]+_ref[6]; + t3=_src[3]-_ref[3]+_src[7]-_ref[7]; + t7=_src[3]-_ref[3]-_src[7]+_ref[7]; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + _buf[0*8+i]=(ogg_int16_t)(t0+t1); + _buf[1*8+i]=(ogg_int16_t)(t0-t1); + _buf[2*8+i]=(ogg_int16_t)(t2+t3); + _buf[3*8+i]=(ogg_int16_t)(t2-t3); + _buf[4*8+i]=(ogg_int16_t)(t4+t5); + _buf[5*8+i]=(ogg_int16_t)(t4-t5); + _buf[6*8+i]=(ogg_int16_t)(t6+t7); + _buf[7*8+i]=(ogg_int16_t)(t6-t7); + _src+=_ystride; + _ref+=_ystride; + } +} + +static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){ + int i; + for(i=0;i<8;i++){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + /*Hadamard stage 1:*/ + r=_ref1[0]+_ref2[0]>>1; + t4=_ref1[4]+_ref2[4]>>1; + t0=_src[0]-r+_src[4]-t4; + t4=_src[0]-r-_src[4]+t4; + r=_ref1[1]+_ref2[1]>>1; + t5=_ref1[5]+_ref2[5]>>1; + t1=_src[1]-r+_src[5]-t5; + t5=_src[1]-r-_src[5]+t5; + r=_ref1[2]+_ref2[2]>>1; + t6=_ref1[6]+_ref2[6]>>1; + t2=_src[2]-r+_src[6]-t6; + t6=_src[2]-r-_src[6]+t6; + r=_ref1[3]+_ref2[3]>>1; + t7=_ref1[7]+_ref2[7]>>1; + t3=_src[3]-r+_src[7]-t7; + t7=_src[3]-r-_src[7]+t7; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + _buf[0*8+i]=(ogg_int16_t)(t0+t1); + _buf[1*8+i]=(ogg_int16_t)(t0-t1); + _buf[2*8+i]=(ogg_int16_t)(t2+t3); + _buf[3*8+i]=(ogg_int16_t)(t2-t3); + _buf[4*8+i]=(ogg_int16_t)(t4+t5); + _buf[5*8+i]=(ogg_int16_t)(t4-t5); + _buf[6*8+i]=(ogg_int16_t)(t6+t7); + _buf[7*8+i]=(ogg_int16_t)(t6-t7); + _src+=_ystride; + _ref1+=_ystride; + _ref2+=_ystride; + } +} + +static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src, + int _ystride){ + int i; + for(i=0;i<8;i++){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + /*Hadamard stage 1:*/ + t0=_src[0]+_src[4]; + t4=_src[0]-_src[4]; + t1=_src[1]+_src[5]; + t5=_src[1]-_src[5]; + t2=_src[2]+_src[6]; + t6=_src[2]-_src[6]; + t3=_src[3]+_src[7]; + t7=_src[3]-_src[7]; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + _buf[0*8+i]=(ogg_int16_t)(t0+t1); + _buf[1*8+i]=(ogg_int16_t)(t0-t1); + _buf[2*8+i]=(ogg_int16_t)(t2+t3); + _buf[3*8+i]=(ogg_int16_t)(t2-t3); + _buf[4*8+i]=(ogg_int16_t)(t4+t5); + _buf[5*8+i]=(ogg_int16_t)(t4-t5); + _buf[6*8+i]=(ogg_int16_t)(t6+t7); + _buf[7*8+i]=(ogg_int16_t)(t6-t7); + _src+=_ystride; + } +} + +unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){ + unsigned sad; + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + int i; + sad=0; + for(i=0;i<8;i++){ + /*Hadamard stage 1:*/ + t0=_buf[i*8+0]+_buf[i*8+4]; + t4=_buf[i*8+0]-_buf[i*8+4]; + t1=_buf[i*8+1]+_buf[i*8+5]; + t5=_buf[i*8+1]-_buf[i*8+5]; + t2=_buf[i*8+2]+_buf[i*8+6]; + t6=_buf[i*8+2]-_buf[i*8+6]; + t3=_buf[i*8+3]+_buf[i*8+7]; + t7=_buf[i*8+3]-_buf[i*8+7]; + /*Hadamard stage 2:*/ + r=t0; + t0+=t2; + t2=r-t2; + r=t1; + t1+=t3; + t3=r-t3; + r=t4; + t4+=t6; + t6=r-t6; + r=t5; + t5+=t7; + t7=r-t7; + /*Hadamard stage 3:*/ + r=abs(t0+t1); + r+=abs(t0-t1); + r+=abs(t2+t3); + r+=abs(t2-t3); + r+=abs(t4+t5); + r+=abs(t4-t5); + r+=abs(t6+t7); + r+=abs(t6-t7); + sad+=r; + if(sad>_thresh)break; + } + return sad; +} + +unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh){ + return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh); +} + +unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + ogg_int16_t buf[64]; + oc_diff_hadamard(buf,_src,_ref,_ystride); + return oc_hadamard_sad_thresh(buf,_thresh); +} + +unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh){ + return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride, + _thresh); +} + +unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + ogg_int16_t buf[64]; + oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride); + return oc_hadamard_sad_thresh(buf,_thresh); +} + +unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc, + const unsigned char *_src,int _ystride){ + return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride); +} + +unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){ + ogg_int16_t buf[64]; + oc_intra_hadamard(buf,_src,_ystride); + return oc_hadamard_sad_thresh(buf,UINT_MAX) + -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); +} + +void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + (*_enc->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride); +} + +void oc_enc_frag_copy2_c(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + int i; + int j; + for(i=8;i-->0;){ + for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1; + _dst+=_ystride; + _src1+=_ystride; + _src2+=_ystride; + } +} + +void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc, + unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]){ + (*_enc->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue); +} + +void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + (*_enc->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue); +} diff --git a/thirdparty/libtheora/encinfo.c b/thirdparty/libtheora/encinfo.c new file mode 100644 index 0000000000..83be1dae72 --- /dev/null +++ b/thirdparty/libtheora/encinfo.c @@ -0,0 +1,121 @@ +#include +#include +#include "internal.h" +#include "enquant.h" +#include "huffenc.h" + + + +/*Packs a series of octets from a given byte array into the pack buffer. + _opb: The pack buffer to store the octets in. + _buf: The byte array containing the bytes to pack. + _len: The number of octets to pack.*/ +static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){ + int i; + for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8); +} + + + +int oc_state_flushheader(oc_theora_state *_state,int *_packet_state, + oggpack_buffer *_opb,const th_quant_info *_qinfo, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS], + const char *_vendor,th_comment *_tc,ogg_packet *_op){ + unsigned char *packet; + int b_o_s; + if(_op==NULL)return TH_EFAULT; + switch(*_packet_state){ + /*Codec info header.*/ + case OC_PACKET_INFO_HDR:{ + if(_state==NULL)return TH_EFAULT; + oggpackB_reset(_opb); + /*Mark this packet as the info header.*/ + oggpackB_write(_opb,0x80,8); + /*Write the codec string.*/ + oc_pack_octets(_opb,"theora",6); + /*Write the codec bitstream version.*/ + oggpackB_write(_opb,TH_VERSION_MAJOR,8); + oggpackB_write(_opb,TH_VERSION_MINOR,8); + oggpackB_write(_opb,TH_VERSION_SUB,8); + /*Describe the encoded frame.*/ + oggpackB_write(_opb,_state->info.frame_width>>4,16); + oggpackB_write(_opb,_state->info.frame_height>>4,16); + oggpackB_write(_opb,_state->info.pic_width,24); + oggpackB_write(_opb,_state->info.pic_height,24); + oggpackB_write(_opb,_state->info.pic_x,8); + oggpackB_write(_opb,_state->info.pic_y,8); + oggpackB_write(_opb,_state->info.fps_numerator,32); + oggpackB_write(_opb,_state->info.fps_denominator,32); + oggpackB_write(_opb,_state->info.aspect_numerator,24); + oggpackB_write(_opb,_state->info.aspect_denominator,24); + oggpackB_write(_opb,_state->info.colorspace,8); + oggpackB_write(_opb,_state->info.target_bitrate,24); + oggpackB_write(_opb,_state->info.quality,6); + oggpackB_write(_opb,_state->info.keyframe_granule_shift,5); + oggpackB_write(_opb,_state->info.pixel_fmt,2); + /*Spare configuration bits.*/ + oggpackB_write(_opb,0,3); + b_o_s=1; + }break; + /*Comment header.*/ + case OC_PACKET_COMMENT_HDR:{ + int vendor_len; + int i; + if(_tc==NULL)return TH_EFAULT; + vendor_len=strlen(_vendor); + oggpackB_reset(_opb); + /*Mark this packet as the comment header.*/ + oggpackB_write(_opb,0x81,8); + /*Write the codec string.*/ + oc_pack_octets(_opb,"theora",6); + /*Write the vendor string.*/ + oggpack_write(_opb,vendor_len,32); + oc_pack_octets(_opb,_vendor,vendor_len); + oggpack_write(_opb,_tc->comments,32); + for(i=0;i<_tc->comments;i++){ + if(_tc->user_comments[i]!=NULL){ + oggpack_write(_opb,_tc->comment_lengths[i],32); + oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]); + } + else oggpack_write(_opb,0,32); + } + b_o_s=0; + }break; + /*Codec setup header.*/ + case OC_PACKET_SETUP_HDR:{ + int ret; + oggpackB_reset(_opb); + /*Mark this packet as the setup header.*/ + oggpackB_write(_opb,0x82,8); + /*Write the codec string.*/ + oc_pack_octets(_opb,"theora",6); + /*Write the quantizer tables.*/ + oc_quant_params_pack(_opb,_qinfo); + /*Write the huffman codes.*/ + ret=oc_huff_codes_pack(_opb,_codes); + /*This should never happen, because we validate the tables when they + are set. + If you see, it's a good chance memory is being corrupted.*/ + if(ret<0)return ret; + b_o_s=0; + }break; + /*No more headers to emit.*/ + default:return 0; + } + /*This is kind of fugly: we hand the user a buffer which they do not own. + We will overwrite it when the next packet is output, so the user better be + done with it by then. + Vorbis is little better: it hands back buffers that it will free the next + time the headers are requested, or when the encoder is cleared. + Hopefully libogg2 will make this much cleaner.*/ + packet=oggpackB_get_buffer(_opb); + /*If there's no packet, malloc failed while writing.*/ + if(packet==NULL)return TH_EFAULT; + _op->packet=packet; + _op->bytes=oggpackB_bytes(_opb); + _op->b_o_s=b_o_s; + _op->e_o_s=0; + _op->granulepos=0; + _op->packetno=*_packet_state+3; + return ++(*_packet_state)+3; +} diff --git a/thirdparty/libtheora/encint.h b/thirdparty/libtheora/encint.h new file mode 100644 index 0000000000..97897d5a04 --- /dev/null +++ b/thirdparty/libtheora/encint.h @@ -0,0 +1,493 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#if !defined(_encint_H) +# define _encint_H (1) +# if defined(HAVE_CONFIG_H) +# include "config.h" +# endif +# include "theora/theoraenc.h" +# include "internal.h" +# include "ocintrin.h" +# include "mathops.h" +# include "enquant.h" +# include "huffenc.h" +/*# define OC_COLLECT_METRICS*/ + + + +typedef oc_mv oc_mv2[2]; + +typedef struct oc_enc_opt_vtable oc_enc_opt_vtable; +typedef struct oc_mb_enc_info oc_mb_enc_info; +typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser; +typedef struct oc_iir_filter oc_iir_filter; +typedef struct oc_frame_metrics oc_frame_metrics; +typedef struct oc_rc_state oc_rc_state; +typedef struct th_enc_ctx oc_enc_ctx; +typedef struct oc_token_checkpoint oc_token_checkpoint; + + + +/*Constants for the packet-out state machine specific to the encoder.*/ + +/*Next packet to emit: Data packet, but none are ready yet.*/ +#define OC_PACKET_EMPTY (0) +/*Next packet to emit: Data packet, and one is ready.*/ +#define OC_PACKET_READY (1) + +/*All features enabled.*/ +#define OC_SP_LEVEL_SLOW (0) +/*Enable early skip.*/ +#define OC_SP_LEVEL_EARLY_SKIP (1) +/*Disable motion compensation.*/ +#define OC_SP_LEVEL_NOMC (2) +/*Maximum valid speed level.*/ +#define OC_SP_LEVEL_MAX (2) + + +/*The bits used for each of the MB mode codebooks.*/ +extern const unsigned char OC_MODE_BITS[2][OC_NMODES]; + +/*The bits used for each of the MV codebooks.*/ +extern const unsigned char OC_MV_BITS[2][64]; + +/*The minimum value that can be stored in a SB run for each codeword. + The last entry is the upper bound on the length of a single SB run.*/ +extern const ogg_uint16_t OC_SB_RUN_VAL_MIN[8]; +/*The bits used for each SB run codeword.*/ +extern const unsigned char OC_SB_RUN_CODE_NBITS[7]; + +/*The bits used for each block run length (starting with 1).*/ +extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]; + + + +/*Encoder specific functions with accelerated variants.*/ +struct oc_enc_opt_vtable{ + unsigned (*frag_sad)(const unsigned char *_src, + const unsigned char *_ref,int _ystride); + unsigned (*frag_sad_thresh)(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); + unsigned (*frag_sad2_thresh)(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); + unsigned (*frag_satd_thresh)(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); + unsigned (*frag_satd2_thresh)(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); + unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride); + void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src, + const unsigned char *_ref,int _ystride); + void (*frag_sub_128)(ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride); + void (*frag_copy2)(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); + void (*frag_recon_intra)(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]); + void (*frag_recon_inter)(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); + void (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]); +}; + + +void oc_enc_vtable_init(oc_enc_ctx *_enc); + + + +/*Encoder-specific macroblock information.*/ +struct oc_mb_enc_info{ + /*Neighboring macro blocks that have MVs available from the current frame.*/ + unsigned cneighbors[4]; + /*Neighboring macro blocks to use for MVs from the previous frame.*/ + unsigned pneighbors[4]; + /*The number of current-frame neighbors.*/ + unsigned char ncneighbors; + /*The number of previous-frame neighbors.*/ + unsigned char npneighbors; + /*Flags indicating which MB modes have been refined.*/ + unsigned char refined; + /*Motion vectors for a macro block for the current frame and the + previous two frames. + Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which + can be used to estimate constant velocity and constant acceleration + predictors. + Uninitialized MVs are (0,0).*/ + oc_mv2 analysis_mv[3]; + /*Current unrefined analysis MVs.*/ + oc_mv unref_mv[2]; + /*Unrefined block MVs.*/ + oc_mv block_mv[4]; + /*Refined block MVs.*/ + oc_mv ref_mv[4]; + /*Minimum motion estimation error from the analysis stage.*/ + ogg_uint16_t error[2]; + /*MB error for half-pel refinement for each frame type.*/ + unsigned satd[2]; + /*Block error for half-pel refinement.*/ + unsigned block_satd[4]; +}; + + + +/*State machine to estimate the opportunity cost of coding a MB mode.*/ +struct oc_mode_scheme_chooser{ + /*Pointers to the a list containing the index of each mode in the mode + alphabet used by each scheme. + The first entry points to the dynamic scheme0_ranks, while the remaining 7 + point to the constant entries stored in OC_MODE_SCHEMES.*/ + const unsigned char *mode_ranks[8]; + /*The ranks for each mode when coded with scheme 0. + These are optimized so that the more frequent modes have lower ranks.*/ + unsigned char scheme0_ranks[OC_NMODES]; + /*The list of modes, sorted in descending order of frequency, that + corresponds to the ranks above.*/ + unsigned char scheme0_list[OC_NMODES]; + /*The number of times each mode has been chosen so far.*/ + int mode_counts[OC_NMODES]; + /*The list of mode coding schemes, sorted in ascending order of bit cost.*/ + unsigned char scheme_list[8]; + /*The number of bits used by each mode coding scheme.*/ + ptrdiff_t scheme_bits[8]; +}; + + +void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser); + + + +/*A 2nd order low-pass Bessel follower. + We use this for rate control because it has fast reaction time, but is + critically damped.*/ +struct oc_iir_filter{ + ogg_int32_t c[2]; + ogg_int64_t g; + ogg_int32_t x[2]; + ogg_int32_t y[2]; +}; + + + +/*The 2-pass metrics associated with a single frame.*/ +struct oc_frame_metrics{ + /*The log base 2 of the scale factor for this frame in Q24 format.*/ + ogg_int32_t log_scale; + /*The number of application-requested duplicates of this frame.*/ + unsigned dup_count:31; + /*The frame type from pass 1.*/ + unsigned frame_type:1; +}; + + + +/*Rate control state information.*/ +struct oc_rc_state{ + /*The target average bits per frame.*/ + ogg_int64_t bits_per_frame; + /*The current buffer fullness (bits available to be used).*/ + ogg_int64_t fullness; + /*The target buffer fullness. + This is where we'd like to be by the last keyframe the appears in the next + buf_delay frames.*/ + ogg_int64_t target; + /*The maximum buffer fullness (total size of the buffer).*/ + ogg_int64_t max; + /*The log of the number of pixels in a frame in Q57 format.*/ + ogg_int64_t log_npixels; + /*The exponent used in the rate model in Q8 format.*/ + unsigned exp[2]; + /*The number of frames to distribute the buffer usage over.*/ + int buf_delay; + /*The total drop count from the previous frame. + This includes duplicates explicitly requested via the + TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/ + ogg_uint32_t prev_drop_count; + /*The log of an estimated scale factor used to obtain the real framerate, for + VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/ + ogg_int64_t log_drop_scale; + /*The log of estimated scale factor for the rate model in Q57 format.*/ + ogg_int64_t log_scale[2]; + /*The log of the target quantizer level in Q57 format.*/ + ogg_int64_t log_qtarget; + /*Will we drop frames to meet bitrate target?*/ + unsigned char drop_frames; + /*Do we respect the maximum buffer fullness?*/ + unsigned char cap_overflow; + /*Can the reservoir go negative?*/ + unsigned char cap_underflow; + /*Second-order lowpass filters to track scale and VFR.*/ + oc_iir_filter scalefilter[2]; + int inter_count; + int inter_delay; + int inter_delay_target; + oc_iir_filter vfrfilter; + /*Two-pass mode state. + 0 => 1-pass encoding. + 1 => 1st pass of 2-pass encoding. + 2 => 2nd pass of 2-pass encoding.*/ + int twopass; + /*Buffer for current frame metrics.*/ + unsigned char twopass_buffer[48]; + /*The number of bytes in the frame metrics buffer. + When 2-pass encoding is enabled, this is set to 0 after each frame is + submitted, and must be non-zero before the next frame will be accepted.*/ + int twopass_buffer_bytes; + int twopass_buffer_fill; + /*Whether or not to force the next frame to be a keyframe.*/ + unsigned char twopass_force_kf; + /*The metrics for the previous frame.*/ + oc_frame_metrics prev_metrics; + /*The metrics for the current frame.*/ + oc_frame_metrics cur_metrics; + /*The buffered metrics for future frames.*/ + oc_frame_metrics *frame_metrics; + int nframe_metrics; + int cframe_metrics; + /*The index of the current frame in the circular metric buffer.*/ + int frame_metrics_head; + /*The frame count of each type (keyframes, delta frames, and dup frames); + 32 bits limits us to 2.268 years at 60 fps.*/ + ogg_uint32_t frames_total[3]; + /*The number of frames of each type yet to be processed.*/ + ogg_uint32_t frames_left[3]; + /*The sum of the scale values for each frame type.*/ + ogg_int64_t scale_sum[2]; + /*The start of the window over which the current scale sums are taken.*/ + int scale_window0; + /*The end of the window over which the current scale sums are taken.*/ + int scale_window_end; + /*The frame count of each type in the current 2-pass window; this does not + include dup frames.*/ + int nframes[3]; + /*The total accumulated estimation bias.*/ + ogg_int64_t rate_bias; +}; + + +void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc); +void oc_rc_state_clear(oc_rc_state *_rc); + +void oc_enc_rc_resize(oc_enc_ctx *_enc); +int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp); +void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type); +int oc_enc_update_rc_state(oc_enc_ctx *_enc, + long _bits,int _qti,int _qi,int _trial,int _droppable); +int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf); +int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes); + + + +/*The internal encoder state.*/ +struct th_enc_ctx{ + /*Shared encoder/decoder state.*/ + oc_theora_state state; + /*Buffer in which to assemble packets.*/ + oggpack_buffer opb; + /*Encoder-specific macroblock information.*/ + oc_mb_enc_info *mb_info; + /*DC coefficients after prediction.*/ + ogg_int16_t *frag_dc; + /*The list of coded macro blocks, in coded order.*/ + unsigned *coded_mbis; + /*The number of coded macro blocks.*/ + size_t ncoded_mbis; + /*Whether or not packets are ready to be emitted. + This takes on negative values while there are remaining header packets to + be emitted, reaches 0 when the codec is ready for input, and becomes + positive when a frame has been processed and data packets are ready.*/ + int packet_state; + /*The maximum distance between keyframes.*/ + ogg_uint32_t keyframe_frequency_force; + /*The number of duplicates to produce for the next frame.*/ + ogg_uint32_t dup_count; + /*The number of duplicates remaining to be emitted for the current frame.*/ + ogg_uint32_t nqueued_dups; + /*The number of duplicates emitted for the last frame.*/ + ogg_uint32_t prev_dup_count; + /*The current speed level.*/ + int sp_level; + /*Whether or not VP3 compatibility mode has been enabled.*/ + unsigned char vp3_compatible; + /*Whether or not any INTER frames have been coded.*/ + unsigned char coded_inter_frame; + /*Whether or not previous frame was dropped.*/ + unsigned char prevframe_dropped; + /*Stores most recently chosen Huffman tables for each frame type, DC and AC + coefficients, and luma and chroma tokens. + The actual Huffman table used for a given coefficient depends not only on + the choice made here, but also its index in the zig-zag ordering.*/ + unsigned char huff_idxs[2][2][2]; + /*Current count of bits used by each MV coding mode.*/ + size_t mv_bits[2]; + /*The mode scheme chooser for estimating mode coding costs.*/ + oc_mode_scheme_chooser chooser; + /*The number of vertical super blocks in an MCU.*/ + int mcu_nvsbs; + /*The SSD error for skipping each fragment in the current MCU.*/ + unsigned *mcu_skip_ssd; + /*The DCT token lists for each coefficient and each plane.*/ + unsigned char **dct_tokens[3]; + /*The extra bits associated with each DCT token.*/ + ogg_uint16_t **extra_bits[3]; + /*The number of DCT tokens for each coefficient for each plane.*/ + ptrdiff_t ndct_tokens[3][64]; + /*Pending EOB runs for each coefficient for each plane.*/ + ogg_uint16_t eob_run[3][64]; + /*The offset of the first DCT token for each coefficient for each plane.*/ + unsigned char dct_token_offs[3][64]; + /*The last DC coefficient for each plane and reference frame.*/ + int dc_pred_last[3][3]; +#if defined(OC_COLLECT_METRICS) + /*Fragment SATD statistics for MB mode estimation metrics.*/ + unsigned *frag_satd; + /*Fragment SSD statistics for MB mode estimation metrics.*/ + unsigned *frag_ssd; +#endif + /*The R-D optimization parameter.*/ + int lambda; + /*The huffman tables in use.*/ + th_huff_code huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; + /*The quantization parameters in use.*/ + th_quant_info qinfo; + oc_iquant *enquant_tables[64][3][2]; + oc_iquant_table enquant_table_data[64][3][2]; + /*An "average" quantizer for each quantizer type (INTRA or INTER) and qi + value. + This is used to paramterize the rate control decisions. + They are kept in the log domain to simplify later processing. + Keep in mind these are DCT domain quantizers, and so are scaled by an + additional factor of 4 from the pixel domain.*/ + ogg_int64_t log_qavg[2][64]; + /*The buffer state used to drive rate control.*/ + oc_rc_state rc; + /*Table for encoder acceleration functions.*/ + oc_enc_opt_vtable opt_vtable; +}; + + +void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode); +int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode); +#if defined(OC_COLLECT_METRICS) +void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc); +void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc); +#endif + + + +/*Perform fullpel motion search for a single MB against both reference frames.*/ +void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi); +/*Refine a MB MV for one frame.*/ +void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame); +/*Refine the block MVs.*/ +void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi); + + + +/*Used to rollback a tokenlog transaction when we retroactively decide to skip + a fragment. + A checkpoint is taken right before each token is added.*/ +struct oc_token_checkpoint{ + /*The color plane the token was added to.*/ + unsigned char pli; + /*The zig-zag index the token was added to.*/ + unsigned char zzi; + /*The outstanding EOB run count before the token was added.*/ + ogg_uint16_t eob_run; + /*The token count before the token was added.*/ + ptrdiff_t ndct_tokens; +}; + + + +void oc_enc_tokenize_start(oc_enc_ctx *_enc); +int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, + ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, + int _zzi,oc_token_checkpoint **_stack,int _acmin); +void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, + const oc_token_checkpoint *_stack,int _n); +void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, + int _pli,int _fragy0,int _frag_yend); +void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, + const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, + int _prev_ndct_tokens1,int _prev_eob_run1); +void oc_enc_tokenize_finish(oc_enc_ctx *_enc); + + + +/*Utility routine to encode one of the header packets.*/ +int oc_state_flushheader(oc_theora_state *_state,int *_packet_state, + oggpack_buffer *_opb,const th_quant_info *_qinfo, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS], + const char *_vendor,th_comment *_tc,ogg_packet *_op); + + + +/*Encoder-specific accelerated functions.*/ +void oc_enc_frag_sub(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride); +void oc_enc_frag_sub_128(const oc_enc_ctx *_enc,ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride); +unsigned oc_enc_frag_sad(const oc_enc_ctx *_enc,const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc, + const unsigned char *_src,const unsigned char *_ref1, + const unsigned char *_ref2,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc, + const unsigned char *_src,int _ystride); +void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +void oc_enc_frag_recon_intra(const oc_enc_ctx *_enc, + unsigned char *_dst,int _ystride,const ogg_int16_t _residue[64]); +void oc_enc_frag_recon_inter(const oc_enc_ctx *_enc,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64], + const ogg_int16_t _x[64]); + +/*Default pure-C implementations.*/ +void oc_enc_vtable_init_c(oc_enc_ctx *_enc); + +void oc_enc_frag_sub_c(ogg_int16_t _diff[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride); +void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64], + const unsigned char *_src,int _ystride); +void oc_enc_frag_copy2_c(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +unsigned oc_enc_frag_sad_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride); +void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]); + +#endif diff --git a/thirdparty/libtheora/encode.c b/thirdparty/libtheora/encode.c new file mode 100644 index 0000000000..0c5ea6a172 --- /dev/null +++ b/thirdparty/libtheora/encode.c @@ -0,0 +1,1615 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encode.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" +#if defined(OC_X86_ASM) +# include "x86/x86enc.h" +#endif + + + +/*The default quantization parameters used by VP3.1.*/ +static const int OC_VP31_RANGE_SIZES[1]={63}; +static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={ + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 58, 68,109,103, 77, + 24, 35, 55, 64, 81,104,113, 92, + 49, 64, 78, 87,103,121,120,101, + 72, 92, 95, 98,112,100,103, 99 + }, + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 58, 68,109,103, 77, + 24, 35, 55, 64, 81,104,113, 92, + 49, 64, 78, 87,103,121,120,101, + 72, 92, 95, 98,112,100,103, 99 + } +}; +static const th_quant_base OC_VP31_BASES_INTRA_C[2]={ + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + }, + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + } +}; +static const th_quant_base OC_VP31_BASES_INTER[2]={ + { + 16, 16, 16, 20, 24, 28, 32, 40, + 16, 16, 20, 24, 28, 32, 40, 48, + 16, 20, 24, 28, 32, 40, 48, 64, + 20, 24, 28, 32, 40, 48, 64, 64, + 24, 28, 32, 40, 48, 64, 64, 64, + 28, 32, 40, 48, 64, 64, 64, 96, + 32, 40, 48, 64, 64, 64, 96,128, + 40, 48, 64, 64, 64, 96,128,128 + }, + { + 16, 16, 16, 20, 24, 28, 32, 40, + 16, 16, 20, 24, 28, 32, 40, 48, + 16, 20, 24, 28, 32, 40, 48, 64, + 20, 24, 28, 32, 40, 48, 64, 64, + 24, 28, 32, 40, 48, 64, 64, 64, + 28, 32, 40, 48, 64, 64, 64, 96, + 32, 40, 48, 64, 64, 64, 96,128, + 40, 48, 64, 64, 64, 96,128,128 + } +}; + +const th_quant_info TH_VP31_QUANT_INFO={ + { + 220,200,190,180,170,170,160,160, + 150,150,140,140,130,130,120,120, + 110,110,100,100, 90, 90, 90, 80, + 80, 80, 70, 70, 70, 60, 60, 60, + 60, 50, 50, 50, 50, 40, 40, 40, + 40, 40, 30, 30, 30, 30, 30, 30, + 30, 20, 20, 20, 20, 20, 20, 20, + 20, 10, 10, 10, 10, 10, 10, 10 + }, + { + 500,450,400,370,340,310,285,265, + 245,225,210,195,185,180,170,160, + 150,145,135,130,125,115,110,107, + 100, 96, 93, 89, 85, 82, 75, 74, + 70, 68, 64, 60, 57, 56, 52, 50, + 49, 45, 44, 43, 40, 38, 37, 35, + 33, 32, 30, 29, 28, 25, 24, 22, + 21, 19, 18, 17, 15, 13, 12, 10 + }, + { + 30,25,20,20,15,15,14,14, + 13,13,12,12,11,11,10,10, + 9, 9, 8, 8, 7, 7, 7, 7, + 6, 6, 6, 6, 5, 5, 5, 5, + 4, 4, 4, 4, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C} + }, + { + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}, + {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER} + } + } +}; + +/*The current default quantization parameters.*/ +static const int OC_DEF_QRANGE_SIZES[3]={32,16,15}; +static const th_quant_base OC_DEF_BASES_INTRA_Y[4]={ + { + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + }, + { + 15, 12, 12, 15, 18, 20, 20, 21, + 13, 13, 14, 17, 18, 21, 21, 20, + 14, 14, 15, 18, 20, 21, 21, 21, + 14, 16, 17, 19, 20, 21, 21, 21, + 16, 17, 20, 21, 21, 21, 21, 21, + 18, 19, 20, 21, 21, 21, 21, 21, + 20, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21 + }, + { + 16, 12, 11, 16, 20, 25, 27, 28, + 13, 13, 14, 18, 21, 28, 28, 27, + 14, 13, 16, 20, 25, 28, 28, 28, + 14, 16, 19, 22, 27, 29, 29, 28, + 17, 19, 25, 28, 28, 30, 30, 29, + 20, 24, 27, 28, 29, 30, 30, 29, + 27, 28, 29, 29, 30, 30, 30, 30, + 29, 29, 29, 29, 30, 30, 30, 29 + }, + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 58, 68,109,103, 77, + 24, 35, 55, 64, 81,104,113, 92, + 49, 64, 78, 87,103,121,120,101, + 72, 92, 95, 98,112,100,103, 99 + } +}; +static const th_quant_base OC_DEF_BASES_INTRA_C[4]={ + { + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19 + }, + { + 18, 18, 21, 25, 26, 26, 26, 26, + 18, 20, 22, 26, 26, 26, 26, 26, + 21, 22, 25, 26, 26, 26, 26, 26, + 25, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26 + }, + { + 17, 18, 22, 31, 36, 36, 36, 36, + 18, 20, 24, 34, 36, 36, 36, 36, + 22, 24, 33, 36, 36, 36, 36, 36, + 31, 34, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36 + }, + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + } +}; +static const th_quant_base OC_DEF_BASES_INTER[4]={ + { + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21 + }, + { + 18, 18, 18, 21, 23, 24, 25, 27, + 18, 18, 21, 23, 24, 25, 27, 28, + 18, 21, 23, 24, 25, 27, 28, 29, + 21, 23, 24, 25, 27, 28, 29, 29, + 23, 24, 25, 27, 28, 29, 29, 29, + 24, 25, 27, 28, 29, 29, 29, 30, + 25, 27, 28, 29, 29, 29, 30, 30, + 27, 28, 29, 29, 29, 30, 30, 30 + }, + { + 17, 17, 17, 20, 23, 26, 28, 32, + 17, 17, 20, 23, 26, 28, 32, 34, + 17, 20, 23, 26, 28, 32, 34, 37, + 20, 23, 26, 28, 32, 34, 37, 37, + 23, 26, 28, 32, 34, 37, 37, 37, + 26, 28, 32, 34, 37, 37, 37, 41, + 28, 32, 34, 37, 37, 37, 41, 42, + 32, 34, 37, 37, 37, 41, 42, 42 + }, + { + 16, 16, 16, 20, 24, 28, 32, 40, + 16, 16, 20, 24, 28, 32, 40, 48, + 16, 20, 24, 28, 32, 40, 48, 64, + 20, 24, 28, 32, 40, 48, 64, 64, + 24, 28, 32, 40, 48, 64, 64, 64, + 28, 32, 40, 48, 64, 64, 64, 96, + 32, 40, 48, 64, 64, 64, 96,128, + 40, 48, 64, 64, 64, 96,128,128 + } +}; + +const th_quant_info TH_DEF_QUANT_INFO={ + { + 365,348,333,316,300,287,277,265, + 252,240,229,219,206,197,189,180, + 171,168,160,153,146,139,132,127, + 121,115,110,107,101, 97, 94, 89, + 85, 83, 78, 73, 72, 67, 66, 62, + 60, 59, 56, 53, 52, 48, 47, 43, + 42, 40, 36, 35, 34, 33, 31, 30, + 28, 25, 24, 22, 20, 17, 14, 10 + }, + { + 365,348,333,316,300,287,277,265, + 252,240,229,219,206,197,189,180, + 171,168,160,153,146,139,132,127, + 121,115,110,107,101, 97, 94, 89, + 85, 83, 78, 73, 72, 67, 66, 62, + 60, 59, 56, 53, 52, 48, 47, 43, + 42, 40, 36, 35, 34, 33, 31, 30, + 28, 25, 24, 22, 20, 17, 14, 10 + }, + { + 30,25,20,20,15,15,14,14, + 13,13,12,12,11,11,10,10, + 9, 9, 8, 8, 7, 7, 7, 7, + 6, 6, 6, 6, 5, 5, 5, 5, + 4, 4, 4, 4, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }, + { + { + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_Y}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C} + }, + { + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}, + {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER} + } + } +}; + + + +/*The Huffman codes used for macro block modes.*/ + +const unsigned char OC_MODE_BITS[2][OC_NMODES]={ + /*Codebook 0: a maximally skewed prefix code.*/ + {1,2,3,4,5,6,7,7}, + /*Codebook 1: a fixed-length code.*/ + {3,3,3,3,3,3,3,3} +}; + +static const unsigned char OC_MODE_CODES[2][OC_NMODES]={ + /*Codebook 0: a maximally skewed prefix code.*/ + {0x00,0x02,0x06,0x0E,0x1E,0x3E,0x7E,0x7F}, + /*Codebook 1: a fixed-length code.*/ + {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07} +}; + + +/*The Huffman codes used for motion vectors.*/ + +const unsigned char OC_MV_BITS[2][64]={ + /*Codebook 0: VLC code.*/ + { + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,7,7,7,7,7,7,7,7,6,6,6,6,4,4,3, + 3, + 3,4,4,6,6,6,6,7,7,7,7,7,7,7,7,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + }, + /*Codebook 1: (5 bit magnitude, 1 bit sign). + This wastes a code word (0x01, negative zero), or a bit (0x00, positive + zero, requires only 5 bits to uniquely decode), but is hopefully not used + very often.*/ + { + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 + } +}; + +static const unsigned char OC_MV_CODES[2][64]={ + /*Codebook 0: VLC code.*/ + { + 0xFF,0xFD,0xFB,0xF9,0xF7,0xF5,0xF3, + 0xF1,0xEF,0xED,0xEB,0xE9,0xE7,0xE5,0xE3, + 0xE1,0x6F,0x6D,0x6B,0x69,0x67,0x65,0x63, + 0x61,0x2F,0x2D,0x2B,0x29,0x09,0x07,0x02, + 0x00, + 0x01,0x06,0x08,0x28,0x2A,0x2C,0x2E,0x60, + 0x62,0x64,0x66,0x68,0x6A,0x6C,0x6E,0xE0, + 0xE2,0xE4,0xE6,0xE8,0xEA,0xEC,0xEE,0xF0, + 0xF2,0xF4,0xF6,0xF8,0xFA,0xFC,0xFE + }, + /*Codebook 1: (5 bit magnitude, 1 bit sign).*/ + { + 0x3F,0x3D,0x3B,0x39,0x37,0x35,0x33, + 0x31,0x2F,0x2D,0x2B,0x29,0x27,0x25,0x23, + 0x21,0x1F,0x1D,0x1B,0x19,0x17,0x15,0x13, + 0x11,0x0F,0x0D,0x0B,0x09,0x07,0x05,0x03, + 0x00, + 0x02,0x04,0x06,0x08,0x0A,0x0C,0x0E,0x10, + 0x12,0x14,0x16,0x18,0x1A,0x1C,0x1E,0x20, + 0x22,0x24,0x26,0x28,0x2A,0x2C,0x2E,0x30, + 0x32,0x34,0x36,0x38,0x3A,0x3C,0x3E + } +}; + + + +/*Super block run coding scheme: + Codeword Run Length + 0 1 + 10x 2-3 + 110x 4-5 + 1110xx 6-9 + 11110xxx 10-17 + 111110xxxx 18-33 + 111111xxxxxxxxxxxx 34-4129*/ +const ogg_uint16_t OC_SB_RUN_VAL_MIN[8]={1,2,4,6,10,18,34,4130}; +static const unsigned OC_SB_RUN_CODE_PREFIX[7]={ + 0,4,0xC,0x38,0xF0,0x3E0,0x3F000 +}; +const unsigned char OC_SB_RUN_CODE_NBITS[7]={1,3,4,6,8,10,18}; + + +/*Writes the bit pattern for the run length of a super block run to the given + oggpack_buffer. + _opb: The buffer to write to. + _run_count: The length of the run, which must be positive. + _flag: The current flag. + _done: Whether or not more flags are to be encoded.*/ +static void oc_sb_run_pack(oggpack_buffer *_opb,ptrdiff_t _run_count, + int _flag,int _done){ + int i; + if(_run_count>=4129){ + do{ + oggpackB_write(_opb,0x3FFFF,18); + _run_count-=4129; + if(_run_count>0)oggpackB_write(_opb,_flag,1); + else if(!_done)oggpackB_write(_opb,!_flag,1); + } + while(_run_count>=4129); + if(_run_count<=0)return; + } + for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++); + oggpackB_write(_opb,OC_SB_RUN_CODE_PREFIX[i]+_run_count-OC_SB_RUN_VAL_MIN[i], + OC_SB_RUN_CODE_NBITS[i]); +} + + + +/*Block run coding scheme: + Codeword Run Length + 0x 1-2 + 10x 3-4 + 110x 5-6 + 1110xx 7-10 + 11110xx 11-14 + 11111xxxx 15-30*/ +const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]={ + 2,2,3,3,4,4,6,6,6,6,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9 +}; +static const ogg_uint16_t OC_BLOCK_RUN_CODE_PATTERN[30]={ + 0x000,0x001,0x004,0x005,0x00C,0x00D,0x038, + 0x039,0x03A,0x03B,0x078,0x079,0x07A,0x07B,0x1F0, + 0x1F1,0x1F2,0x1F3,0x1F4,0x1F5,0x1F6,0x1F7,0x1F8, + 0x1F9,0x1FA,0x1FB,0x1FC,0x1FD,0x1FE,0x1FF +}; + + +/*Writes the bit pattern for the run length of a block run to the given + oggpack_buffer. + _opb: The buffer to write to. + _run_count: The length of the run. + This must be positive, and no more than 30.*/ +static void oc_block_run_pack(oggpack_buffer *_opb,int _run_count){ + oggpackB_write(_opb,OC_BLOCK_RUN_CODE_PATTERN[_run_count-1], + OC_BLOCK_RUN_CODE_NBITS[_run_count-1]); +} + + + +static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){ + /*Mark this as a data packet.*/ + oggpackB_write(&_enc->opb,0,1); + /*Output the frame type (key frame or delta frame).*/ + oggpackB_write(&_enc->opb,_enc->state.frame_type,1); + /*Write out the current qi list.*/ + oggpackB_write(&_enc->opb,_enc->state.qis[0],6); + if(_enc->state.nqis>1){ + oggpackB_write(&_enc->opb,1,1); + oggpackB_write(&_enc->opb,_enc->state.qis[1],6); + if(_enc->state.nqis>2){ + oggpackB_write(&_enc->opb,1,1); + oggpackB_write(&_enc->opb,_enc->state.qis[2],6); + } + else oggpackB_write(&_enc->opb,0,1); + } + else oggpackB_write(&_enc->opb,0,1); + if(_enc->state.frame_type==OC_INTRA_FRAME){ + /*Key frames have 3 unused configuration bits, holdovers from the VP3 days. + Most of the other unused bits in the VP3 headers were eliminated. + Monty kept these to leave us some wiggle room for future expansion, + though a single bit in all frames would have been far more useful.*/ + oggpackB_write(&_enc->opb,0,3); + } +} + +/*Writes the bit flags for whether or not each super block is partially coded + or not. + These flags are run-length encoded, with the flag value alternating between + each run. + Return: The number partially coded SBs.*/ +static unsigned oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){ + const oc_sb_flags *sb_flags; + unsigned nsbs; + unsigned sbi; + unsigned npartial; + int flag; + sb_flags=_enc->state.sb_flags; + nsbs=_enc->state.nsbs; + flag=sb_flags[0].coded_partially; + oggpackB_write(&_enc->opb,flag,1); + sbi=npartial=0; + do{ + unsigned run_count; + for(run_count=0;sbiopb,run_count,flag,sbi>=nsbs); + flag=!flag; + } + while(sbistate.sb_flags; + nsbs=_enc->state.nsbs; + /*Skip partially coded super blocks; their flags have already been coded.*/ + for(sbi=0;sb_flags[sbi].coded_partially;sbi++); + flag=sb_flags[sbi].coded_fully; + oggpackB_write(&_enc->opb,flag,1); + do{ + unsigned run_count; + for(run_count=0;sbiopb,run_count,flag,sbi>=nsbs); + flag=!flag; + } + while(sbistate.nsbs)oc_enc_coded_sb_flags_pack(_enc); + sb_maps=(const oc_sb_map *)_enc->state.sb_maps; + sb_flags=_enc->state.sb_flags; + nsbs=_enc->state.nsbs; + frags=_enc->state.frags; + for(sbi=0;sbiopb,flag,1); + run_count=0; + nsbs=sbi=0; + for(pli=0;pli<3;pli++){ + nsbs+=_enc->state.fplanes[pli].nsbs; + for(;sbi=0){ + if(frags[fragi].coded!=flag){ + oc_block_run_pack(&_enc->opb,run_count); + flag=!flag; + run_count=1; + } + else run_count++; + } + } + } + } + } + } + /*Flush any trailing block coded run.*/ + if(run_count>0)oc_block_run_pack(&_enc->opb,run_count); + } +} + +static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){ + const unsigned char *mode_codes; + const unsigned char *mode_bits; + const unsigned char *mode_ranks; + unsigned *coded_mbis; + size_t ncoded_mbis; + const signed char *mb_modes; + unsigned mbii; + int scheme; + int mb_mode; + scheme=_enc->chooser.scheme_list[0]; + /*Encode the best scheme.*/ + oggpackB_write(&_enc->opb,scheme,3); + /*If the chosen scheme is scheme 0, send the mode frequency ordering.*/ + if(scheme==0){ + for(mb_mode=0;mb_modeopb,_enc->chooser.scheme0_ranks[mb_mode],3); + } + } + mode_ranks=_enc->chooser.mode_ranks[scheme]; + mode_bits=OC_MODE_BITS[scheme+1>>3]; + mode_codes=OC_MODE_CODES[scheme+1>>3]; + coded_mbis=_enc->coded_mbis; + ncoded_mbis=_enc->ncoded_mbis; + mb_modes=_enc->state.mb_modes; + for(mbii=0;mbiiopb,mode_codes[rank],mode_bits[rank]); + } +} + +static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _mv_scheme,int _dx,int _dy){ + oggpackB_write(&_enc->opb, + OC_MV_CODES[_mv_scheme][_dx+31],OC_MV_BITS[_mv_scheme][_dx+31]); + oggpackB_write(&_enc->opb, + OC_MV_CODES[_mv_scheme][_dy+31],OC_MV_BITS[_mv_scheme][_dy+31]); +} + +static void oc_enc_mvs_pack(oc_enc_ctx *_enc){ + const unsigned *coded_mbis; + size_t ncoded_mbis; + const oc_mb_map *mb_maps; + const signed char *mb_modes; + const oc_fragment *frags; + const oc_mv *frag_mvs; + unsigned mbii; + int mv_scheme; + /*Choose the coding scheme.*/ + mv_scheme=_enc->mv_bits[1]<_enc->mv_bits[0]; + oggpackB_write(&_enc->opb,mv_scheme,1); + /*Encode the motion vectors. + Macro blocks are iterated in Hilbert scan order, but the MVs within the + macro block are coded in raster order.*/ + coded_mbis=_enc->coded_mbis; + ncoded_mbis=_enc->ncoded_mbis; + mb_modes=_enc->state.mb_modes; + mb_maps=(const oc_mb_map *)_enc->state.mb_maps; + frags=_enc->state.frags; + frag_mvs=(const oc_mv *)_enc->state.frag_mvs; + for(mbii=0;mbiistate.nqis<=1)return; + ncoded_fragis=_enc->state.ntotal_coded_fragis; + if(ncoded_fragis<=0)return; + coded_fragis=_enc->state.coded_fragis; + frags=_enc->state.frags; + flag=!!frags[coded_fragis[0]].qii; + oggpackB_write(&_enc->opb,flag,1); + nqi0=0; + for(fragii=0;fragiiopb,run_count,flag,fragii>=ncoded_fragis); + flag=!flag; + } + if(_enc->state.nqis<3||nqi0>=ncoded_fragis)return; + for(fragii=0;!frags[coded_fragis[fragii]].qii;fragii++); + flag=frags[coded_fragis[fragii]].qii-1; + oggpackB_write(&_enc->opb,flag,1); + while(fragiiopb,run_count,flag,fragii>=ncoded_fragis); + flag=!flag; + } +} + +/*Counts the tokens of each type used for the given range of coefficient + indices in zig-zag order. + _zzi_start: The first zig-zag index to include. + _zzi_end: The first zig-zag index to not include. + _token_counts_y: Returns the token counts for the Y' plane. + _token_counts_c: Returns the token counts for the Cb and Cr planes.*/ +static void oc_enc_count_tokens(oc_enc_ctx *_enc,int _zzi_start,int _zzi_end, + ptrdiff_t _token_counts_y[32],ptrdiff_t _token_counts_c[32]){ + const unsigned char *dct_tokens; + ptrdiff_t ndct_tokens; + int pli; + int zzi; + ptrdiff_t ti; + memset(_token_counts_y,0,32*sizeof(*_token_counts_y)); + memset(_token_counts_c,0,32*sizeof(*_token_counts_c)); + for(zzi=_zzi_start;zzi<_zzi_end;zzi++){ + dct_tokens=_enc->dct_tokens[0][zzi]; + ndct_tokens=_enc->ndct_tokens[0][zzi]; + for(ti=_enc->dct_token_offs[0][zzi];tidct_tokens[pli][zzi]; + ndct_tokens=_enc->ndct_tokens[pli][zzi]; + for(ti=_enc->dct_token_offs[pli][zzi];tihuff_codes[huffi+huff_offs][token].nbits; + } + } +} + +/*Returns the Huffman index using the fewest number of bits.*/ +static int oc_select_huff_idx(size_t _bit_counts[16]){ + int best_huffi; + int huffi; + best_huffi=0; + for(huffi=1;huffi<16;huffi++)if(_bit_counts[huffi]<_bit_counts[best_huffi]){ + best_huffi=huffi; + } + return best_huffi; +} + +static void oc_enc_huff_group_pack(oc_enc_ctx *_enc, + int _zzi_start,int _zzi_end,const int _huff_idxs[2]){ + int zzi; + for(zzi=_zzi_start;zzi<_zzi_end;zzi++){ + int pli; + for(pli=0;pli<3;pli++){ + const unsigned char *dct_tokens; + const ogg_uint16_t *extra_bits; + ptrdiff_t ndct_tokens; + const th_huff_code *huff_codes; + ptrdiff_t ti; + dct_tokens=_enc->dct_tokens[pli][zzi]; + extra_bits=_enc->extra_bits[pli][zzi]; + ndct_tokens=_enc->ndct_tokens[pli][zzi]; + huff_codes=_enc->huff_codes[_huff_idxs[pli+1>>1]]; + for(ti=_enc->dct_token_offs[pli][zzi];tiopb,huff_codes[token].pattern, + huff_codes[token].nbits); + neb=OC_DCT_TOKEN_EXTRA_BITS[token]; + if(neb)oggpackB_write(&_enc->opb,extra_bits[ti],neb); + } + } + } +} + +static void oc_enc_residual_tokens_pack(oc_enc_ctx *_enc){ + static const unsigned char OC_HUFF_GROUP_MIN[6]={0,1,6,15,28,64}; + static const unsigned char *OC_HUFF_GROUP_MAX=OC_HUFF_GROUP_MIN+1; + ptrdiff_t token_counts_y[32]; + ptrdiff_t token_counts_c[32]; + size_t bits_y[16]; + size_t bits_c[16]; + int huff_idxs[2]; + int frame_type; + int hgi; + frame_type=_enc->state.frame_type; + /*Choose which Huffman tables to use for the DC token list.*/ + oc_enc_count_tokens(_enc,0,1,token_counts_y,token_counts_c); + memset(bits_y,0,sizeof(bits_y)); + memset(bits_c,0,sizeof(bits_c)); + oc_enc_count_bits(_enc,0,token_counts_y,bits_y); + oc_enc_count_bits(_enc,0,token_counts_c,bits_c); + huff_idxs[0]=oc_select_huff_idx(bits_y); + huff_idxs[1]=oc_select_huff_idx(bits_c); + /*Write the DC token list with the chosen tables.*/ + oggpackB_write(&_enc->opb,huff_idxs[0],4); + oggpackB_write(&_enc->opb,huff_idxs[1],4); + _enc->huff_idxs[frame_type][0][0]=(unsigned char)huff_idxs[0]; + _enc->huff_idxs[frame_type][0][1]=(unsigned char)huff_idxs[1]; + oc_enc_huff_group_pack(_enc,0,1,huff_idxs); + /*Choose which Huffman tables to use for the AC token lists.*/ + memset(bits_y,0,sizeof(bits_y)); + memset(bits_c,0,sizeof(bits_c)); + for(hgi=1;hgi<5;hgi++){ + oc_enc_count_tokens(_enc,OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi], + token_counts_y,token_counts_c); + oc_enc_count_bits(_enc,hgi,token_counts_y,bits_y); + oc_enc_count_bits(_enc,hgi,token_counts_c,bits_c); + } + huff_idxs[0]=oc_select_huff_idx(bits_y); + huff_idxs[1]=oc_select_huff_idx(bits_c); + /*Write the AC token lists using the chosen tables.*/ + oggpackB_write(&_enc->opb,huff_idxs[0],4); + oggpackB_write(&_enc->opb,huff_idxs[1],4); + _enc->huff_idxs[frame_type][1][0]=(unsigned char)huff_idxs[0]; + _enc->huff_idxs[frame_type][1][1]=(unsigned char)huff_idxs[1]; + for(hgi=1;hgi<5;hgi++){ + huff_idxs[0]+=16; + huff_idxs[1]+=16; + oc_enc_huff_group_pack(_enc, + OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],huff_idxs); + } +} + +static void oc_enc_frame_pack(oc_enc_ctx *_enc){ + oggpackB_reset(&_enc->opb); + /*Only proceed if we have some coded blocks. + If there are no coded blocks, we can drop this frame simply by emitting a + 0 byte packet.*/ + if(_enc->state.ntotal_coded_fragis>0){ + oc_enc_frame_header_pack(_enc); + if(_enc->state.frame_type==OC_INTER_FRAME){ + /*Coded block flags, MB modes, and MVs are only needed for delta frames.*/ + oc_enc_coded_flags_pack(_enc); + oc_enc_mb_modes_pack(_enc); + oc_enc_mvs_pack(_enc); + } + oc_enc_block_qis_pack(_enc); + oc_enc_tokenize_finish(_enc); + oc_enc_residual_tokens_pack(_enc); + } + /*Success: Mark the packet as ready to be flushed.*/ + _enc->packet_state=OC_PACKET_READY; +#if defined(OC_COLLECT_METRICS) + oc_enc_mode_metrics_collect(_enc); +#endif +} + + +void oc_enc_vtable_init_c(oc_enc_ctx *_enc){ + /*The implementations prefixed with oc_enc_ are encoder-specific. + The rest we re-use from the decoder.*/ + _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c; + _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c; + _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c; + _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c; + _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c; + _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c; + _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c; + _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c; + _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c; + _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; + _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c; +} + +/*Initialize the macro block neighbor lists for MC analysis. + This assumes that the entire mb_info memory region has been initialized with + zeros.*/ +static void oc_enc_mb_info_init(oc_enc_ctx *_enc){ + oc_mb_enc_info *embs; + const signed char *mb_modes; + unsigned nhsbs; + unsigned nvsbs; + unsigned nhmbs; + unsigned nvmbs; + unsigned sby; + mb_modes=_enc->state.mb_modes; + embs=_enc->mb_info; + nhsbs=_enc->state.fplanes[0].nhsbs; + nvsbs=_enc->state.fplanes[0].nvsbs; + nhmbs=_enc->state.nhmbs; + nvmbs=_enc->state.nvmbs; + for(sby=0;sby>1); + mby=2*sby+(quadi+1>>1&1); + /*Fill in the neighbors with current motion vectors available.*/ + for(ni=0;ni=nhmbs||nmby<0||nmby>=nvmbs)continue; + nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1]; + if(mb_modes[nmbi]==OC_MODE_INVALID)continue; + embs[mbi].cneighbors[embs[mbi].ncneighbors++]=nmbi; + } + /*Fill in the neighbors with previous motion vectors available.*/ + for(ni=0;ni<4;ni++){ + nmbx=mbx+PDX[ni]; + nmby=mby+PDY[ni]; + if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue; + nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1]; + if(mb_modes[nmbi]==OC_MODE_INVALID)continue; + embs[mbi].pneighbors[embs[mbi].npneighbors++]=nmbi; + } + } + } + } +} + +static int oc_enc_set_huffman_codes(oc_enc_ctx *_enc, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){ + int ret; + if(_enc==NULL)return TH_EFAULT; + if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL; + if(_codes==NULL)_codes=TH_VP31_HUFF_CODES; + /*Validate the codes.*/ + oggpackB_reset(&_enc->opb); + ret=oc_huff_codes_pack(&_enc->opb,_codes); + if(ret<0)return ret; + memcpy(_enc->huff_codes,_codes,sizeof(_enc->huff_codes)); + return 0; +} + +/*Sets the quantization parameters to use. + This may only be called before the setup header is written. + If it is called multiple times, only the last call has any effect. + _qinfo: The quantization parameters. + These are described in more detail in theoraenc.h. + This can be NULL, in which case the default quantization parameters + will be used.*/ +static int oc_enc_set_quant_params(oc_enc_ctx *_enc, + const th_quant_info *_qinfo){ + int qi; + int pli; + int qti; + if(_enc==NULL)return TH_EFAULT; + if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL; + if(_qinfo==NULL)_qinfo=&TH_DEF_QUANT_INFO; + /*TODO: Analyze for packing purposes instead of just doing a shallow copy.*/ + memcpy(&_enc->qinfo,_qinfo,sizeof(_enc->qinfo)); + for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ + _enc->state.dequant_tables[qi][pli][qti]= + _enc->state.dequant_table_data[qi][pli][qti]; + _enc->enquant_tables[qi][pli][qti]=_enc->enquant_table_data[qi][pli][qti]; + } + oc_enquant_tables_init(_enc->state.dequant_tables, + _enc->enquant_tables,_qinfo); + memcpy(_enc->state.loop_filter_limits,_qinfo->loop_filter_limits, + sizeof(_enc->state.loop_filter_limits)); + oc_enquant_qavg_init(_enc->log_qavg,_enc->state.dequant_tables, + _enc->state.info.pixel_fmt); + return 0; +} + +static void oc_enc_clear(oc_enc_ctx *_enc); + +static int oc_enc_init(oc_enc_ctx *_enc,const th_info *_info){ + th_info info; + size_t mcu_nmbs; + ptrdiff_t mcu_nfrags; + int hdec; + int vdec; + int ret; + int pli; + /*Clean up the requested settings.*/ + memcpy(&info,_info,sizeof(info)); + info.version_major=TH_VERSION_MAJOR; + info.version_minor=TH_VERSION_MINOR; + info.version_subminor=TH_VERSION_SUB; + if(info.quality>63)info.quality=63; + if(info.quality<0)info.quality=32; + if(info.target_bitrate<0)info.target_bitrate=0; + /*Initialize the shared encoder/decoder state.*/ + ret=oc_state_init(&_enc->state,&info,4); + if(ret<0)return ret; + _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info)); + _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc)); + _enc->coded_mbis= + (unsigned *)_ogg_malloc(_enc->state.nmbs*sizeof(*_enc->coded_mbis)); + hdec=!(_enc->state.info.pixel_fmt&1); + vdec=!(_enc->state.info.pixel_fmt&2); + /*If chroma is sub-sampled in the vertical direction, we have to encode two + super block rows of Y' for each super block row of Cb and Cr.*/ + _enc->mcu_nvsbs=1<mcu_nvsbs*_enc->state.fplanes[0].nhsbs*(size_t)4; + mcu_nfrags=4*mcu_nmbs+(8*mcu_nmbs>>hdec+vdec); + _enc->mcu_skip_ssd=(unsigned *)_ogg_malloc( + mcu_nfrags*sizeof(*_enc->mcu_skip_ssd)); + for(pli=0;pli<3;pli++){ + _enc->dct_tokens[pli]=(unsigned char **)oc_malloc_2d(64, + _enc->state.fplanes[pli].nfrags,sizeof(**_enc->dct_tokens)); + _enc->extra_bits[pli]=(ogg_uint16_t **)oc_malloc_2d(64, + _enc->state.fplanes[pli].nfrags,sizeof(**_enc->extra_bits)); + } +#if defined(OC_COLLECT_METRICS) + _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd)); + _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd)); +#endif +#if defined(OC_X86_ASM) + oc_enc_vtable_init_x86(_enc); +#else + oc_enc_vtable_init_c(_enc); +#endif + _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift; + _enc->state.qis[0]=_enc->state.info.quality; + _enc->state.nqis=1; + oc_rc_state_init(&_enc->rc,_enc); + oggpackB_writeinit(&_enc->opb); + if(_enc->mb_info==NULL||_enc->frag_dc==NULL||_enc->coded_mbis==NULL|| + _enc->mcu_skip_ssd==NULL||_enc->dct_tokens[0]==NULL|| + _enc->dct_tokens[1]==NULL||_enc->dct_tokens[2]==NULL|| + _enc->extra_bits[0]==NULL||_enc->extra_bits[1]==NULL|| + _enc->extra_bits[2]==NULL +#if defined(OC_COLLECT_METRICS) + ||_enc->frag_satd==NULL||_enc->frag_ssd==NULL +#endif + ){ + oc_enc_clear(_enc); + return TH_EFAULT; + } + oc_mode_scheme_chooser_init(&_enc->chooser); + oc_enc_mb_info_init(_enc); + memset(_enc->huff_idxs,0,sizeof(_enc->huff_idxs)); + /*Reset the packet-out state machine.*/ + _enc->packet_state=OC_PACKET_INFO_HDR; + _enc->dup_count=0; + _enc->nqueued_dups=0; + _enc->prev_dup_count=0; + /*Enable speed optimizations up through early skip by default.*/ + _enc->sp_level=OC_SP_LEVEL_EARLY_SKIP; + /*Disable VP3 compatibility by default.*/ + _enc->vp3_compatible=0; + /*No INTER frames coded yet.*/ + _enc->coded_inter_frame=0; + memcpy(_enc->huff_codes,TH_VP31_HUFF_CODES,sizeof(_enc->huff_codes)); + oc_enc_set_quant_params(_enc,NULL); + return 0; +} + +static void oc_enc_clear(oc_enc_ctx *_enc){ + int pli; + oc_rc_state_clear(&_enc->rc); +#if defined(OC_COLLECT_METRICS) + oc_enc_mode_metrics_dump(_enc); +#endif + oggpackB_writeclear(&_enc->opb); +#if defined(OC_COLLECT_METRICS) + _ogg_free(_enc->frag_ssd); + _ogg_free(_enc->frag_satd); +#endif + for(pli=3;pli-->0;){ + oc_free_2d(_enc->extra_bits[pli]); + oc_free_2d(_enc->dct_tokens[pli]); + } + _ogg_free(_enc->mcu_skip_ssd); + _ogg_free(_enc->coded_mbis); + _ogg_free(_enc->frag_dc); + _ogg_free(_enc->mb_info); + oc_state_clear(&_enc->state); +} + +static void oc_enc_drop_frame(th_enc_ctx *_enc){ + /*Use the previous frame's reconstruction.*/ + _enc->state.ref_frame_idx[OC_FRAME_SELF]= + _enc->state.ref_frame_idx[OC_FRAME_PREV]; + /*Flag motion vector analysis about the frame drop.*/ + _enc->prevframe_dropped=1; + /*Zero the packet.*/ + oggpackB_reset(&_enc->opb); +} + +static void oc_enc_compress_keyframe(oc_enc_ctx *_enc,int _recode){ + if(_enc->state.info.target_bitrate>0){ + _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTRA_FRAME, + _enc->state.curframe_num>0); + _enc->state.nqis=1; + } + oc_enc_calc_lambda(_enc,OC_INTRA_FRAME); + oc_enc_analyze_intra(_enc,_recode); + oc_enc_frame_pack(_enc); + /*On the first frame, the previous call was an initial dry-run to prime + feed-forward statistics.*/ + if(!_recode&&_enc->state.curframe_num==0){ + if(_enc->state.info.target_bitrate>0){ + oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, + OC_INTRA_FRAME,_enc->state.qis[0],1,0); + } + oc_enc_compress_keyframe(_enc,1); + } +} + +static void oc_enc_compress_frame(oc_enc_ctx *_enc,int _recode){ + if(_enc->state.info.target_bitrate>0){ + _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTER_FRAME,1); + _enc->state.nqis=1; + } + oc_enc_calc_lambda(_enc,OC_INTER_FRAME); + if(oc_enc_analyze_inter(_enc,_enc->rc.twopass!=2,_recode)){ + /*Mode analysis thinks this should have been a keyframe; start over.*/ + oc_enc_compress_keyframe(_enc,1); + } + else{ + oc_enc_frame_pack(_enc); + if(!_enc->coded_inter_frame){ + /*On the first INTER frame, the previous call was an initial dry-run to + prime feed-forward statistics.*/ + _enc->coded_inter_frame=1; + if(_enc->state.info.target_bitrate>0){ + /*Rate control also needs to prime.*/ + oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, + OC_INTER_FRAME,_enc->state.qis[0],1,0); + } + oc_enc_compress_frame(_enc,1); + } + } +} + +/*Set the granule position for the next packet to output based on the current + internal state.*/ +static void oc_enc_set_granpos(oc_enc_ctx *_enc){ + unsigned dup_offs; + /*Add an offset for the number of duplicate frames we've emitted so far.*/ + dup_offs=_enc->prev_dup_count-_enc->nqueued_dups; + /*If the current frame was a keyframe, use it for the high part.*/ + if(_enc->state.frame_type==OC_INTRA_FRAME){ + _enc->state.granpos=(_enc->state.curframe_num+_enc->state.granpos_bias<< + _enc->state.info.keyframe_granule_shift)+dup_offs; + } + /*Otherwise use the last keyframe in the high part and put the current frame + in the low part.*/ + else{ + _enc->state.granpos= + (_enc->state.keyframe_num+_enc->state.granpos_bias<< + _enc->state.info.keyframe_granule_shift) + +_enc->state.curframe_num-_enc->state.keyframe_num+dup_offs; + } +} + + +th_enc_ctx *th_encode_alloc(const th_info *_info){ + oc_enc_ctx *enc; + if(_info==NULL)return NULL; + enc=_ogg_malloc(sizeof(*enc)); + if(enc==NULL||oc_enc_init(enc,_info)<0){ + _ogg_free(enc); + return NULL; + } + return enc; +} + +void th_encode_free(th_enc_ctx *_enc){ + if(_enc!=NULL){ + oc_enc_clear(_enc); + _ogg_free(_enc); + } +} + +int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){ + switch(_req){ + case TH_ENCCTL_SET_HUFFMAN_CODES:{ + if(_buf==NULL&&_buf_sz!=0|| + _buf!=NULL&&_buf_sz!=sizeof(th_huff_table)*TH_NHUFFMAN_TABLES){ + return TH_EINVAL; + } + return oc_enc_set_huffman_codes(_enc,(const th_huff_table *)_buf); + }break; + case TH_ENCCTL_SET_QUANT_PARAMS:{ + if(_buf==NULL&&_buf_sz!=0|| + _buf!=NULL&&_buf_sz!=sizeof(th_quant_info)){ + return TH_EINVAL; + } + return oc_enc_set_quant_params(_enc,(th_quant_info *)_buf); + }break; + case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:{ + ogg_uint32_t keyframe_frequency_force; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(keyframe_frequency_force))return TH_EINVAL; + keyframe_frequency_force=*(ogg_uint32_t *)_buf; + if(keyframe_frequency_force<=0)keyframe_frequency_force=1; + if(_enc->packet_state==OC_PACKET_INFO_HDR){ + /*It's still early enough to enlarge keyframe_granule_shift.*/ + _enc->state.info.keyframe_granule_shift=OC_CLAMPI( + _enc->state.info.keyframe_granule_shift, + OC_ILOG_32(keyframe_frequency_force-1),31); + } + _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force, + (ogg_uint32_t)1U<<_enc->state.info.keyframe_granule_shift); + *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force; + return 0; + }break; + case TH_ENCCTL_SET_VP3_COMPATIBLE:{ + int vp3_compatible; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(vp3_compatible))return TH_EINVAL; + vp3_compatible=*(int *)_buf; + _enc->vp3_compatible=vp3_compatible; + if(oc_enc_set_huffman_codes(_enc,TH_VP31_HUFF_CODES)<0)vp3_compatible=0; + if(oc_enc_set_quant_params(_enc,&TH_VP31_QUANT_INFO)<0)vp3_compatible=0; + if(_enc->state.info.pixel_fmt!=TH_PF_420|| + _enc->state.info.pic_width<_enc->state.info.frame_width|| + _enc->state.info.pic_height<_enc->state.info.frame_height|| + /*If we have more than 4095 super blocks, VP3's RLE coding might + overflow. + We could overcome this by ensuring we flip the coded/not-coded flags on + at least one super block in the frame, but we pick the simple solution + of just telling the user the stream will be incompatible instead. + It's unlikely the old VP3 codec would be able to decode streams at this + resolution in real time in the first place.*/ + _enc->state.nsbs>4095){ + vp3_compatible=0; + } + *(int *)_buf=vp3_compatible; + return 0; + }break; + case TH_ENCCTL_GET_SPLEVEL_MAX:{ + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + *(int *)_buf=OC_SP_LEVEL_MAX; + return 0; + }break; + case TH_ENCCTL_SET_SPLEVEL:{ + int speed; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(speed))return TH_EINVAL; + speed=*(int *)_buf; + if(speed<0||speed>OC_SP_LEVEL_MAX)return TH_EINVAL; + _enc->sp_level=speed; + return 0; + }break; + case TH_ENCCTL_GET_SPLEVEL:{ + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(int))return TH_EINVAL; + *(int *)_buf=_enc->sp_level; + return 0; + } + case TH_ENCCTL_SET_DUP_COUNT:{ + int dup_count; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(dup_count))return TH_EINVAL; + dup_count=*(int *)_buf; + if(dup_count>=_enc->keyframe_frequency_force)return TH_EINVAL; + _enc->dup_count=OC_MAXI(dup_count,0); + return 0; + }break; + case TH_ENCCTL_SET_QUALITY:{ + int qi; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_enc->state.info.target_bitrate>0)return TH_EINVAL; + qi=*(int *)_buf; + if(qi<0||qi>63)return TH_EINVAL; + _enc->state.info.quality=qi; + _enc->state.qis[0]=(unsigned char)qi; + _enc->state.nqis=1; + return 0; + }break; + case TH_ENCCTL_SET_BITRATE:{ + long bitrate; + int reset; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + bitrate=*(long *)_buf; + if(bitrate<=0)return TH_EINVAL; + reset=_enc->state.info.target_bitrate<=0; + _enc->state.info.target_bitrate=bitrate>INT_MAX?INT_MAX:bitrate; + if(reset)oc_rc_state_init(&_enc->rc,_enc); + else oc_enc_rc_resize(_enc); + return 0; + }break; + case TH_ENCCTL_SET_RATE_FLAGS:{ + int set; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(set))return TH_EINVAL; + if(_enc->state.info.target_bitrate<=0)return TH_EINVAL; + set=*(int *)_buf; + _enc->rc.drop_frames=set&TH_RATECTL_DROP_FRAMES; + _enc->rc.cap_overflow=set&TH_RATECTL_CAP_OVERFLOW; + _enc->rc.cap_underflow=set&TH_RATECTL_CAP_UNDERFLOW; + return 0; + }break; + case TH_ENCCTL_SET_RATE_BUFFER:{ + int set; + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_buf_sz!=sizeof(set))return TH_EINVAL; + if(_enc->state.info.target_bitrate<=0)return TH_EINVAL; + set=*(int *)_buf; + _enc->rc.buf_delay=set; + oc_enc_rc_resize(_enc); + *(int *)_buf=_enc->rc.buf_delay; + return 0; + }break; + case TH_ENCCTL_2PASS_OUT:{ + if(_enc==NULL||_buf==NULL)return TH_EFAULT; + if(_enc->state.info.target_bitrate<=0|| + _enc->state.curframe_num>=0&&_enc->rc.twopass!=1|| + _buf_sz!=sizeof(unsigned char *)){ + return TH_EINVAL; + } + return oc_enc_rc_2pass_out(_enc,(unsigned char **)_buf); + }break; + case TH_ENCCTL_2PASS_IN:{ + if(_enc==NULL)return TH_EFAULT; + if(_enc->state.info.target_bitrate<=0|| + _enc->state.curframe_num>=0&&_enc->rc.twopass!=2){ + return TH_EINVAL; + } + return oc_enc_rc_2pass_in(_enc,_buf,_buf_sz); + }break; + default:return TH_EIMPL; + } +} + +int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){ + if(_enc==NULL)return TH_EFAULT; + return oc_state_flushheader(&_enc->state,&_enc->packet_state,&_enc->opb, + &_enc->qinfo,(const th_huff_table *)_enc->huff_codes,th_version_string(), + _tc,_op); +} + +static void oc_img_plane_copy_pad(th_img_plane *_dst,th_img_plane *_src, + ogg_int32_t _pic_x,ogg_int32_t _pic_y, + ogg_int32_t _pic_width,ogg_int32_t _pic_height){ + unsigned char *dst; + int dstride; + ogg_uint32_t frame_width; + ogg_uint32_t frame_height; + ogg_uint32_t y; + frame_width=_dst->width; + frame_height=_dst->height; + /*If we have _no_ data, just encode a dull green.*/ + if(_pic_width==0||_pic_height==0){ + dst=_dst->data; + dstride=_dst->stride; + for(y=0;ystride; + sstride=_src->stride; + dst_data=_dst->data; + src_data=_src->data; + dst=dst_data+_pic_y*(ptrdiff_t)dstride+_pic_x; + src=src_data+_pic_y*(ptrdiff_t)sstride+_pic_x; + for(y=0;y<_pic_height;y++){ + memcpy(dst,src,_pic_width); + dst+=dstride; + src+=sstride; + } + /*Step 2: Perform a low-pass extension into the padding region.*/ + /*Left side.*/ + for(x=_pic_x;x-->0;){ + dst=dst_data+_pic_y*(ptrdiff_t)dstride+x; + for(y=0;y<_pic_height;y++){ + dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1] + +(dst+(dstride&-(y+1<_pic_height)))[1]+2>>2; + dst+=dstride; + } + } + /*Right side.*/ + for(x=_pic_x+_pic_width;x0)))[0] + +(dst+(dstride&-(y+1<_pic_height)))[0]+2>>2; + dst+=dstride; + } + } + /*Top.*/ + dst=dst_data+_pic_y*(ptrdiff_t)dstride; + for(y=_pic_y;y-->0;){ + for(x=0;x0)] + +dst[x+(x+1>2; + } + dst-=dstride; + } + /*Bottom.*/ + dst=dst_data+(_pic_y+_pic_height)*(ptrdiff_t)dstride; + for(y=_pic_y+_pic_height;y0)] + +(dst-dstride)[x+(x+1>2; + } + dst+=dstride; + } + } +} + +int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){ + th_ycbcr_buffer img; + int cframe_width; + int cframe_height; + int cpic_width; + int cpic_height; + int cpic_x; + int cpic_y; + int hdec; + int vdec; + int pli; + int refi; + int drop; + /*Step 1: validate parameters.*/ + if(_enc==NULL||_img==NULL)return TH_EFAULT; + if(_enc->packet_state==OC_PACKET_DONE)return TH_EINVAL; + if(_enc->rc.twopass&&_enc->rc.twopass_buffer_bytes==0)return TH_EINVAL; + if((ogg_uint32_t)_img[0].width!=_enc->state.info.frame_width|| + (ogg_uint32_t)_img[0].height!=_enc->state.info.frame_height){ + return TH_EINVAL; + } + hdec=!(_enc->state.info.pixel_fmt&1); + vdec=!(_enc->state.info.pixel_fmt&2); + cframe_width=_enc->state.info.frame_width>>hdec; + cframe_height=_enc->state.info.frame_height>>vdec; + if(_img[1].width!=cframe_width||_img[2].width!=cframe_width|| + _img[1].height!=cframe_height||_img[2].height!=cframe_height){ + return TH_EINVAL; + } + /*Step 2: Copy the input to our internal buffer. + This lets us add padding, if necessary, so we don't have to worry about + dereferencing possibly invalid addresses, and allows us to use the same + strides and fragment offsets for both the input frame and the reference + frames.*/ + /*Flip the input buffer upside down.*/ + oc_ycbcr_buffer_flip(img,_img); + oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+0,img+0, + _enc->state.info.pic_x,_enc->state.info.pic_y, + _enc->state.info.pic_width,_enc->state.info.pic_height); + cpic_x=_enc->state.info.pic_x>>hdec; + cpic_y=_enc->state.info.pic_y>>vdec; + cpic_width=(_enc->state.info.pic_x+_enc->state.info.pic_width+hdec>>hdec) + -cpic_x; + cpic_height=(_enc->state.info.pic_y+_enc->state.info.pic_height+vdec>>vdec) + -cpic_y; + for(pli=1;pli<3;pli++){ + oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[OC_FRAME_IO]+pli,img+pli, + cpic_x,cpic_y,cpic_width,cpic_height); + } + /*Step 3: Update the buffer state.*/ + if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){ + _enc->state.ref_frame_idx[OC_FRAME_PREV]= + _enc->state.ref_frame_idx[OC_FRAME_SELF]; + if(_enc->state.frame_type==OC_INTRA_FRAME){ + /*The new frame becomes both the previous and gold reference frames.*/ + _enc->state.keyframe_num=_enc->state.curframe_num; + _enc->state.ref_frame_idx[OC_FRAME_GOLD]= + _enc->state.ref_frame_idx[OC_FRAME_SELF]; + } + } + /*Select a free buffer to use for the reconstructed version of this frame.*/ + for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]|| + refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++); + _enc->state.ref_frame_idx[OC_FRAME_SELF]=refi; + _enc->state.curframe_num+=_enc->prev_dup_count+1; + /*Step 4: Compress the frame.*/ + /*Start with a keyframe, and don't allow the generation of invalid files that + overflow the keyframe_granule_shift.*/ + if(_enc->rc.twopass_force_kf||_enc->state.curframe_num==0|| + _enc->state.curframe_num-_enc->state.keyframe_num+_enc->dup_count>= + _enc->keyframe_frequency_force){ + oc_enc_compress_keyframe(_enc,0); + drop=0; + } + else{ + oc_enc_compress_frame(_enc,0); + drop=1; + } + oc_restore_fpu(&_enc->state); + /*drop currently indicates if the frame is droppable.*/ + if(_enc->state.info.target_bitrate>0){ + drop=oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3, + _enc->state.frame_type,_enc->state.qis[0],0,drop); + } + else drop=0; + /*drop now indicates if the frame was dropped.*/ + if(drop)oc_enc_drop_frame(_enc); + else _enc->prevframe_dropped=0; + _enc->packet_state=OC_PACKET_READY; + _enc->prev_dup_count=_enc->nqueued_dups=_enc->dup_count; + _enc->dup_count=0; +#if defined(OC_DUMP_IMAGES) + oc_enc_set_granpos(_enc); + oc_state_dump_frame(&_enc->state,OC_FRAME_IO,"src"); + oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec"); +#endif + return 0; +} + +int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){ + if(_enc==NULL||_op==NULL)return TH_EFAULT; + if(_enc->packet_state==OC_PACKET_READY){ + _enc->packet_state=OC_PACKET_EMPTY; + if(_enc->rc.twopass!=1){ + unsigned char *packet; + packet=oggpackB_get_buffer(&_enc->opb); + /*If there's no packet, malloc failed while writing; it's lost forever.*/ + if(packet==NULL)return TH_EFAULT; + _op->packet=packet; + _op->bytes=oggpackB_bytes(&_enc->opb); + } + /*For the first pass in 2-pass mode, don't emit any packet data.*/ + else{ + _op->packet=NULL; + _op->bytes=0; + } + } + else if(_enc->packet_state==OC_PACKET_EMPTY){ + if(_enc->nqueued_dups>0){ + _enc->nqueued_dups--; + _op->packet=NULL; + _op->bytes=0; + } + else{ + if(_last_p)_enc->packet_state=OC_PACKET_DONE; + return 0; + } + } + else return 0; + _last_p=_last_p&&_enc->nqueued_dups<=0; + _op->b_o_s=0; + _op->e_o_s=_last_p; + oc_enc_set_granpos(_enc); + _op->packetno=th_granule_frame(_enc,_enc->state.granpos)+3; + _op->granulepos=_enc->state.granpos; + if(_last_p)_enc->packet_state=OC_PACKET_DONE; + return 1+_enc->nqueued_dups; +} diff --git a/thirdparty/libtheora/encoder_disabled.c b/thirdparty/libtheora/encoder_disabled.c new file mode 100644 index 0000000000..0cbf6645ac --- /dev/null +++ b/thirdparty/libtheora/encoder_disabled.c @@ -0,0 +1,67 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: encoder_disabled.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include "apiwrapper.h" +#include "encint.h" + +th_enc_ctx *th_encode_alloc(const th_info *_info){ + return NULL; +} + +void th_encode_free(th_enc_ctx *_enc){} + + +int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){ + return OC_DISABLED; +} + +int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){ + return OC_DISABLED; +} + +int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){ + return OC_DISABLED; +} + +int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){ + return OC_DISABLED; +} + + + +int theora_encode_init(theora_state *_te,theora_info *_ci){ + return OC_DISABLED; +} + +int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){ + return OC_DISABLED; +} + +int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){ + return OC_DISABLED; +} + +int theora_encode_header(theora_state *_te,ogg_packet *_op){ + return OC_DISABLED; +} + +int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){ + return OC_DISABLED; +} + +int theora_encode_tables(theora_state *_te,ogg_packet *_op){ + return OC_DISABLED; +} diff --git a/thirdparty/libtheora/enquant.c b/thirdparty/libtheora/enquant.c new file mode 100644 index 0000000000..3372fed221 --- /dev/null +++ b/thirdparty/libtheora/enquant.c @@ -0,0 +1,274 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: enquant.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + + + +void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){ + const th_quant_ranges *qranges; + const th_quant_base *base_mats[2*3*64]; + int indices[2][3][64]; + int nbase_mats; + int nbits; + int ci; + int qi; + int qri; + int qti; + int pli; + int qtj; + int plj; + int bmi; + int i; + i=_qinfo->loop_filter_limits[0]; + for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]); + nbits=OC_ILOG_32(i); + oggpackB_write(_opb,nbits,3); + for(qi=0;qi<64;qi++){ + oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits); + } + /*580 bits for VP3.*/ + i=1; + for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i); + nbits=OC_ILOGNZ_32(i); + oggpackB_write(_opb,nbits-1,4); + for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits); + /*516 bits for VP3.*/ + i=1; + for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i); + nbits=OC_ILOGNZ_32(i); + oggpackB_write(_opb,nbits-1,4); + for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits); + /*Consolidate any duplicate base matrices.*/ + nbase_mats=0; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + qranges=_qinfo->qi_ranges[qti]+pli; + for(qri=0;qri<=qranges->nranges;qri++){ + for(bmi=0;;bmi++){ + if(bmi>=nbase_mats){ + base_mats[bmi]=qranges->base_matrices+qri; + indices[qti][pli][qri]=nbase_mats++; + break; + } + else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri], + sizeof(base_mats[bmi][0]))==0){ + indices[qti][pli][qri]=bmi; + break; + } + } + } + } + /*Write out the list of unique base matrices. + 1545 bits for VP3 matrices.*/ + oggpackB_write(_opb,nbase_mats-1,9); + for(bmi=0;bmiqi_ranges[qti]+pli; + if(i>0){ + if(qti>0){ + if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&& + memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes, + qranges->nranges*sizeof(qranges->sizes[0]))==0&& + memcmp(indices[qti][pli],indices[qti-1][pli], + (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ + oggpackB_write(_opb,1,2); + continue; + } + } + qtj=(i-1)/3; + plj=(i-1)%3; + if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&& + memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes, + qranges->nranges*sizeof(qranges->sizes[0]))==0&& + memcmp(indices[qti][pli],indices[qtj][plj], + (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){ + oggpackB_write(_opb,0,1+(qti>0)); + continue; + } + oggpackB_write(_opb,1,1); + } + oggpackB_write(_opb,indices[qti][pli][0],nbits); + for(qi=qri=0;qi<63;qri++){ + oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi)); + qi+=qranges->sizes[qri]; + oggpackB_write(_opb,indices[qti][pli][qri+1],nbits); + } + } +} + +static void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){ + ogg_uint32_t t; + int l; + _d<<=1; + l=OC_ILOGNZ_32(_d)-1; + t=1+((ogg_uint32_t)1<<16+l)/_d; + _this->m=(ogg_int16_t)(t-0x10000); + _this->l=l; +} + +/*See comments at oc_dequant_tables_init() for how the quantization tables' + storage should be initialized.*/ +void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], + oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo){ + int qi; + int pli; + int qti; + /*Initialize the dequantization tables first.*/ + oc_dequant_tables_init(_dequant,NULL,_qinfo); + /*Derive the quantization tables directly from the dequantization tables.*/ + for(qi=0;qi<64;qi++)for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + int zzi; + int plj; + int qtj; + int dupe; + dupe=0; + for(qtj=0;qtj<=qti;qtj++){ + for(plj=0;plj<(qtj>1))/qd; + qp+=rq*(ogg_uint32_t)rq; + } + q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp; + } + /*qavg=1.0/sqrt(q2).*/ + _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1; + } +} diff --git a/thirdparty/libtheora/enquant.h b/thirdparty/libtheora/enquant.h new file mode 100644 index 0000000000..d62df10d1a --- /dev/null +++ b/thirdparty/libtheora/enquant.h @@ -0,0 +1,27 @@ +#if !defined(_enquant_H) +# define _enquant_H (1) +# include "quant.h" + +typedef struct oc_iquant oc_iquant; + +#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1)) + +/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0)) + (i.e., one 16x16->16 mul, 2 shifts, and 2 adds). + This is not an approximation; for 16-bit x and d, it is exact.*/ +struct oc_iquant{ + ogg_int16_t m; + ogg_int16_t l; +}; + +typedef oc_iquant oc_iquant_table[64]; + + + +void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo); +void oc_enquant_tables_init(ogg_uint16_t *_dequant[64][3][2], + oc_iquant *_enquant[64][3][2],const th_quant_info *_qinfo); +void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64], + ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt); + +#endif diff --git a/thirdparty/libtheora/fdct.c b/thirdparty/libtheora/fdct.c new file mode 100644 index 0000000000..dc3a66f245 --- /dev/null +++ b/thirdparty/libtheora/fdct.c @@ -0,0 +1,422 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: fdct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include "encint.h" +#include "dct.h" + + + +/*Performs a forward 8 point Type-II DCT transform. + The output is scaled by a factor of 2 from the orthonormal version of the + transform. + _y: The buffer to store the result in. + Data will be placed the first 8 entries (e.g., in a row of an 8x8 block). + _x: The input coefficients. + Every 8th entry is used (e.g., from a column of an 8x8 block).*/ +static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int r; + int s; + int u; + int v; + /*Stage 1:*/ + /*0-7 butterfly.*/ + t0=_x[0<<3]+(int)_x[7<<3]; + t7=_x[0<<3]-(int)_x[7<<3]; + /*1-6 butterfly.*/ + t1=_x[1<<3]+(int)_x[6<<3]; + t6=_x[1<<3]-(int)_x[6<<3]; + /*2-5 butterfly.*/ + t2=_x[2<<3]+(int)_x[5<<3]; + t5=_x[2<<3]-(int)_x[5<<3]; + /*3-4 butterfly.*/ + t3=_x[3<<3]+(int)_x[4<<3]; + t4=_x[3<<3]-(int)_x[4<<3]; + /*Stage 2:*/ + /*0-3 butterfly.*/ + r=t0+t3; + t3=t0-t3; + t0=r; + /*1-2 butterfly.*/ + r=t1+t2; + t2=t1-t2; + t1=r; + /*6-5 butterfly.*/ + r=t6+t5; + t5=t6-t5; + t6=r; + /*Stages 3 and 4 are where all the approximation occurs. + These are chosen to be as close to an exact inverse of the approximations + made in the iDCT as possible, while still using mostly 16-bit arithmetic. + We use some 16x16->32 signed MACs, but those still commonly execute in 1 + cycle on a 16-bit DSP. + For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of + t5=(OC_C4S4*s>>16). + That is, applying the latter to the output of the former will recover t5 + exactly (over the valid input range of t5, -23171...23169). + We increase the rounding bias to 0xB500 in this particular case so that + errors inverting the subsequent butterfly are not one-sided (e.g., the + mean error is very close to zero). + The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0. + The fDCT of an all-zeros block will still not be zero, because of the + biases we added at the very beginning of the process, but it will be close + enough that it is guaranteed to round to zero.*/ + /*Stage 3:*/ + /*4-5 butterfly.*/ + s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1; + r=t4+s; + t5=t4-s; + t4=r; + /*7-6 butterfly.*/ + s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1; + r=t7+s; + t6=t7-s; + t7=r; + /*Stage 4:*/ + /*0-1 butterfly.*/ + r=(27146*t0+0x4000>>16)+t0+(t0!=0); + s=(27146*t1+0xB500>>16)+t1+(t1!=0); + u=r+s>>1; + v=r-u; + _y[0]=u; + _y[4]=v; + /*3-2 rotation by 6pi/16*/ + u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0); + s=(OC_C6S2*u>>16)-t2; + v=(s*21600+0x2800>>18)+s+(s!=0); + _y[2]=u; + _y[6]=v; + /*6-5 rotation by 3pi/16*/ + u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0); + s=t6-(OC_C5S3*u>>16); + v=(s*26568+0x3400>>17)+s+(s!=0); + _y[5]=u; + _y[3]=v; + /*7-4 rotation by 7pi/16*/ + u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0); + s=(OC_C7S1*u>>16)-t4; + v=(s*20539+0x3000>>20)+s+(s!=0); + _y[1]=u; + _y[7]=v; +} + +void oc_enc_fdct8x8(const oc_enc_ctx *_enc,ogg_int16_t _y[64], + const ogg_int16_t _x[64]){ + (*_enc->opt_vtable.fdct8x8)(_y,_x); +} + +/*Performs a forward 8x8 Type-II DCT transform. + The output is scaled by a factor of 4 relative to the orthonormal version + of the transform. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients. */ +void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + int i; + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + for(i=0;i<64;i++)w[i]=_x[i]<<2; + /*These biases correct for some systematic error that remains in the full + fDCT->iDCT round trip.*/ + w[0]+=(w[0]!=0)+1; + w[1]++; + w[8]--; + /*Transform columns of w into rows of _y.*/ + for(in=w,out=_y,end=out+64;out>2; +} + + + +/*This does not seem to outperform simple LFE border padding before MC. + It yields higher PSNR, but much higher bitrate usage.*/ +#if 0 +typedef struct oc_extension_info oc_extension_info; + + + +/*Information needed to pad boundary blocks. + We multiply each row/column by an extension matrix that fills in the padding + values as a linear combination of the active values, so that an equivalent + number of coefficients are forced to zero. + This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as + little as 7 multiplies. + We compute the extension matrices for every possible shape in advance, as + there are only 35. + The coefficients for all matrices are stored in a single array to take + advantage of the overlap and repetitiveness of many of the shapes. + A similar technique is applied to the offsets into this array. + This reduces the required table storage by about 48%. + See tools/extgen.c for details. + We could conceivably do the same for all 256 possible shapes.*/ +struct oc_extension_info{ + /*The mask of the active pixels in the shape.*/ + short mask; + /*The number of active pixels in the shape.*/ + short na; + /*The extension matrix. + This is (8-na)xna*/ + const ogg_int16_t *const *ext; + /*The pixel indices: na active pixels followed by 8-na padding pixels.*/ + unsigned char pi[8]; + /*The coefficient indices: na unconstrained coefficients followed by 8-na + coefficients to be forced to zero.*/ + unsigned char ci[8]; +}; + + +/*The number of shapes we need.*/ +#define OC_NSHAPES (35) + +static const ogg_int16_t OC_EXT_COEFFS[229]={ + 0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF, + 0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000, + 0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000, + 0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345, + 0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59, + 0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59, + 0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225, + 0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C, + 0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882, + 0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768, + 0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185, + 0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59, + 0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC, + 0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C, + 0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7, + 0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C, + 0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF, + 0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C, + 0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D, + 0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04, + 0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546, + 0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587, + 0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4, + 0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC, + 0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB, + 0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08, + 0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12, + 0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A, + 0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC +}; + +static const ogg_int16_t *const OC_EXT_ROWS[96]={ + OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0, + OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 6, + OC_EXT_COEFFS+ 27,OC_EXT_COEFFS+ 38,OC_EXT_COEFFS+ 43,OC_EXT_COEFFS+ 32, + OC_EXT_COEFFS+ 49,OC_EXT_COEFFS+ 58,OC_EXT_COEFFS+ 67,OC_EXT_COEFFS+ 71, + OC_EXT_COEFFS+ 62,OC_EXT_COEFFS+ 53,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15, + OC_EXT_COEFFS+ 14,OC_EXT_COEFFS+ 13,OC_EXT_COEFFS+ 76,OC_EXT_COEFFS+ 81, + OC_EXT_COEFFS+ 86,OC_EXT_COEFFS+ 91,OC_EXT_COEFFS+ 96,OC_EXT_COEFFS+ 98, + OC_EXT_COEFFS+ 93,OC_EXT_COEFFS+ 88,OC_EXT_COEFFS+ 83,OC_EXT_COEFFS+ 78, + OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12, + OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15, + OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108, + OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+ 16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141, + OC_EXT_COEFFS+ 20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116, + OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167, + OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149, + OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177, + OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197, + OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215, + OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+ 24,OC_EXT_COEFFS+ 204, + OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199, + OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135, + OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 134, + OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223, + OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221 +}; + +static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={ + {0x7F,7,OC_EXT_ROWS+ 0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}}, + {0xFE,7,OC_EXT_ROWS+ 7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}}, + {0x3F,6,OC_EXT_ROWS+ 8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}}, + {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}}, + {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}}, + {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}}, + {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}}, + {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}}, + {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}}, + {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}}, + {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}}, + {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}}, + {0x01,1,OC_EXT_ROWS+ 0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}}, + {0x80,1,OC_EXT_ROWS+ 0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}}, + {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}}, + {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}}, + {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}}, + {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}}, + {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}}, + {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}}, + {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}}, + {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}}, + {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}}, + {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}}, + {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}}, + {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}}, + {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}}, + {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}}, + {0x40,1,OC_EXT_ROWS+ 0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x20,1,OC_EXT_ROWS+ 0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x10,1,OC_EXT_ROWS+ 0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x08,1,OC_EXT_ROWS+ 0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}}, + {0x04,1,OC_EXT_ROWS+ 0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}}, + {0x02,1,OC_EXT_ROWS+ 0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}} +}; + + + +/*Pads a single column of a partial block and then performs a forward Type-II + DCT on the result. + The input is scaled by a factor of 4 and biased appropriately for the current + fDCT implementation. + The output is scaled by an additional factor of 2 from the orthonormal + version of the transform. + _y: The buffer to store the result in. + Data will be placed the first 8 entries (e.g., in a row of an 8x8 block). + _x: The input coefficients. + Every 8th entry is used (e.g., from a column of an 8x8 block). + _e: The extension information for the shape.*/ +static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x, + const oc_extension_info *_e){ + const unsigned char *pi; + int na; + na=_e->na; + pi=_e->pi; + if(na==1){ + int ci; + /*While the branch below is still correct for shapes with na==1, we can + perform the entire transform with just 1 multiply in this case instead + of 23.*/ + _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]]))); + for(ci=1;ci<8;ci++)_y[ci]=0; + } + else{ + const ogg_int16_t *const *ext; + int zpi; + int api; + int nz; + /*First multiply by the extension matrix to compute the padding values.*/ + nz=8-na; + ext=_e->ext; + for(zpi=0;zpi>16)+1>>1; + } + oc_fdct8(_y,_x); + } +} + +/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the + border of the picture region. + This method ONLY works with rectangular regions. + _border: A description of which pixels are inside the border. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input pixel values. + Pixel values outside the border will be ignored.*/ +void oc_fdct8x8_border(const oc_border_info *_border, + ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ogg_int16_t *in; + ogg_int16_t *out; + ogg_int16_t w[64]; + ogg_int64_t mask; + const oc_extension_info *cext; + const oc_extension_info *rext; + int cmask; + int rmask; + int ri; + int ci; + /*Identify the shapes of the non-zero rows and columns.*/ + rmask=cmask=0; + mask=_border->mask; + for(ri=0;ri<8;ri++){ + /*This aggregation is _only_ correct for rectangular masks.*/ + cmask|=((mask&0xFF)!=0)<>=8; + } + /*Find the associated extension info for these shapes.*/ + if(cmask==0xFF)cext=NULL; + else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){ + /*If we somehow can't find the shape, then just do an unpadded fDCT. + It won't be efficient, but it should still be correct.*/ + if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){ + oc_enc_fdct8x8_c(_y,_x); + return; + } + } + if(rmask==0xFF)rext=NULL; + else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){ + /*If we somehow can't find the shape, then just do an unpadded fDCT. + It won't be efficient, but it should still be correct.*/ + if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){ + oc_enc_fdct8x8_c(_y,_x); + return; + } + } + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2; + /*These biases correct for some systematic error that remains in the full + fDCT->iDCT round trip. + We can safely add them before padding, since if these pixel values are + overwritten, we didn't care what they were anyway (and the unbiased values + will usually yield smaller DCT coefficient magnitudes).*/ + w[0]+=(w[0]!=0)+1; + w[1]++; + w[8]--; + /*Transform the columns. + We can ignore zero columns without a problem.*/ + in=w; + out=_y; + if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci); + else for(ci=0;ci<8;ci++)if(rmask&(1<>2; +} +#endif diff --git a/thirdparty/libtheora/fragment.c b/thirdparty/libtheora/fragment.c new file mode 100644 index 0000000000..15372e9d9f --- /dev/null +++ b/thirdparty/libtheora/fragment.c @@ -0,0 +1,87 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: fragment.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include "internal.h" + +void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride){ + (*_state->opt_vtable.frag_copy)(_dst,_src,_ystride); +} + +void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){ + int i; + for(i=8;i-->0;){ + memcpy(_dst,_src,8*sizeof(*_dst)); + _dst+=_ystride; + _src+=_ystride; + } +} + +void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst, + int _ystride,const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_intra(_dst,_ystride,_residue); +} + +void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128); + _dst+=_ystride; + } +} + +void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_inter(_dst,_src,_ystride,_residue); +} + +void oc_frag_recon_inter_c(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]); + _dst+=_ystride; + _src+=_ystride; + } +} + +void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride, + const ogg_int16_t _residue[64]){ + _state->opt_vtable.frag_recon_inter2(_dst,_src1,_src2,_ystride,_residue); +} + +void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){ + int i; + for(i=0;i<8;i++){ + int j; + for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1)); + _dst+=_ystride; + _src1+=_ystride; + _src2+=_ystride; + } +} + +void oc_restore_fpu(const oc_theora_state *_state){ + _state->opt_vtable.restore_fpu(); +} + +void oc_restore_fpu_c(void){} diff --git a/thirdparty/libtheora/huffdec.c b/thirdparty/libtheora/huffdec.c new file mode 100644 index 0000000000..8cf27f0341 --- /dev/null +++ b/thirdparty/libtheora/huffdec.c @@ -0,0 +1,489 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: huffdec.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "huffdec.h" +#include "decint.h" + + +/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/ +#define _ogg_offsetof(_type,_field)\ + ((size_t)((char *)&((_type *)0)->_field-(char *)0)) + +/*The number of internal tokens associated with each of the spec tokens.*/ +static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={ + 1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8 +}; + +/*The map from external spec-defined tokens to internal tokens. + This is constructed so that any extra bits read with the original token value + can be masked off the least significant bits of its internal token index. + In addition, all of the tokens which require additional extra bits are placed + at the start of the list, and grouped by type. + OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so + giving it index 0 may simplify comparisons on some architectures. + These requirements require some substantial reordering.*/ +static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={ + /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ + 15, + /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ + 16, + /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ + 17, + /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/ + 88, + /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/ + 80, + /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ + 1, + /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ + 0, + /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/ + 48, + /*OC_DCT_ZRL_TOKEN (6 extra bits)*/ + 14, + /*OC_ONE_TOKEN (0 extra bits)*/ + 56, + /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ + 57, + /*OC_TWO_TOKEN (0 extra bits)*/ + 58, + /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ + 59, + /*OC_DCT_VAL_CAT2 (1 extra bit)*/ + 60, + 62, + 64, + 66, + /*OC_DCT_VAL_CAT3 (2 extra bits)*/ + 68, + /*OC_DCT_VAL_CAT4 (3 extra bits)*/ + 72, + /*OC_DCT_VAL_CAT5 (4 extra bits)*/ + 2, + /*OC_DCT_VAL_CAT6 (5 extra bits)*/ + 4, + /*OC_DCT_VAL_CAT7 (6 extra bits)*/ + 6, + /*OC_DCT_VAL_CAT8 (10 extra bits)*/ + 8, + /*OC_DCT_RUN_CAT1A (1 extra bit)*/ + 18, + 20, + 22, + 24, + 26, + /*OC_DCT_RUN_CAT1B (3 extra bits)*/ + 32, + /*OC_DCT_RUN_CAT1C (4 extra bits)*/ + 12, + /*OC_DCT_RUN_CAT2A (2 extra bits)*/ + 28, + /*OC_DCT_RUN_CAT2B (3 extra bits)*/ + 40 +}; + +/*These three functions are really part of the bitpack.c module, but + they are only used here. + Declaring local static versions so they can be inlined saves considerable + function call overhead.*/ + +static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){ + const unsigned char *ptr; + const unsigned char *stop; + oc_pb_window window; + int available; + window=_b->window; + available=_b->bits; + ptr=_b->ptr; + stop=_b->stop; + /*This version of _refill() doesn't bother setting eof because we won't + check for it after we've started decoding DCT tokens.*/ + if(ptr>=stop)available=OC_LOTS_OF_BITS; + while(available<=OC_PB_WINDOW_SIZE-8){ + available+=8; + window|=(oc_pb_window)*ptr++<=stop)available=OC_LOTS_OF_BITS; + } + _b->ptr=ptr; + if(_bits>available)window|=*ptr>>(available&7); + _b->bits=available; + return window; +} + + +/*Read in bits without advancing the bit pointer. + Here we assume 0<=_bits&&_bits<=32.*/ +static long oc_pack_look(oc_pack_buf *_b,int _bits){ + oc_pb_window window; + int available; + long result; + window=_b->window; + available=_b->bits; + if(_bits==0)return 0; + if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits); + result=window>>OC_PB_WINDOW_SIZE-_bits; + return result; +} + +/*Advance the bit pointer.*/ +static void oc_pack_adv(oc_pack_buf *_b,int _bits){ + /*We ignore the special cases for _bits==0 and _bits==32 here, since they are + never used actually used. + OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read + 32 bits in a single go, and would require a 32 GB lookup table (assuming + 8 byte pointers, since 4 byte pointers couldn't fit such a table).*/ + _b->window<<=_bits; + _b->bits-=_bits; +} + + +/*The log_2 of the size of a lookup table is allowed to grow to relative to + the number of unique nodes it contains. + E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is + wasted (each node will have an amortized cost of at most 20 bytes when using + 4-byte pointers). + Larger numbers can decode tokens with fewer read operations, while smaller + numbers may save more space (requiring as little as 8 bytes amortized per + node, though there will be more nodes). + With a sample file: + 32233473 read calls are required when no tree collapsing is done (100.0%). + 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%). + 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%). + 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%). + 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%). + Since a value of 1 gets us the vast majority of the speed-up with only a + small amount of wasted memory, this is what we use.*/ +#define OC_HUFF_SLUSH (1) + + +/*Determines the size in bytes of a Huffman tree node that represents a + subtree of depth _nbits. + _nbits: The depth of the subtree. + If this is 0, the node is a leaf node. + Otherwise 1<<_nbits pointers are allocated for children. + Return: The number of bytes required to store the node.*/ +static size_t oc_huff_node_size(int _nbits){ + size_t size; + size=_ogg_offsetof(oc_huff_node,nodes); + if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits); + return size; +} + +static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){ + oc_huff_node *ret; + ret=(oc_huff_node *)*_storage; + ret->nbits=(unsigned char)_nbits; + (*_storage)+=_size; + return ret; +} + + +/*Determines the size in bytes of a Huffman tree. + _nbits: The depth of the subtree. + If this is 0, the node is a leaf node. + Otherwise storage for 1<<_nbits pointers are added for children. + Return: The number of bytes required to store the tree.*/ +static size_t oc_huff_tree_size(const oc_huff_node *_node){ + size_t size; + size=oc_huff_node_size(_node->nbits); + if(_node->nbits){ + int nchildren; + int i; + nchildren=1<<_node->nbits; + for(i=0;inbits-_node->nodes[i]->depth){ + size+=oc_huff_tree_size(_node->nodes[i]); + } + } + return size; +} + + +/*Unpacks a sub-tree from the given buffer. + _opb: The buffer to unpack from. + _binodes: The nodes to store the sub-tree in. + _nbinodes: The number of nodes available for the sub-tree. + Return: 0 on success, or a negative value on error.*/ +static int oc_huff_tree_unpack(oc_pack_buf *_opb, + oc_huff_node *_binodes,int _nbinodes){ + oc_huff_node *binode; + long bits; + int nused; + if(_nbinodes<1)return TH_EBADHEADER; + binode=_binodes; + nused=0; + bits=oc_pack_read1(_opb); + if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + /*Read an internal node:*/ + if(!bits){ + int ret; + nused++; + binode->nbits=1; + binode->depth=1; + binode->nodes[0]=_binodes+nused; + ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); + if(ret>=0){ + nused+=ret; + binode->nodes[1]=_binodes+nused; + ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused); + } + if(ret<0)return ret; + nused+=ret; + } + /*Read a leaf node:*/ + else{ + int ntokens; + int token; + int i; + bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS); + if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; + /*Find out how many internal tokens we translate this external token into.*/ + ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits]; + if(_nbinodes<2*ntokens-1)return TH_EBADHEADER; + /*Fill in a complete binary tree pointing to the internal tokens.*/ + for(i=1;inbits=0; + binode->depth=1; + binode->token=token+i; + } + } + return nused; +} + +/*Finds the depth of shortest branch of the given sub-tree. + The tree must be binary. + _binode: The root of the given sub-tree. + _binode->nbits must be 0 or 1. + Return: The smallest depth of a leaf node in this sub-tree. + 0 indicates this sub-tree is a leaf node.*/ +static int oc_huff_tree_mindepth(oc_huff_node *_binode){ + int depth0; + int depth1; + if(_binode->nbits==0)return 0; + depth0=oc_huff_tree_mindepth(_binode->nodes[0]); + depth1=oc_huff_tree_mindepth(_binode->nodes[1]); + return OC_MINI(depth0,depth1)+1; +} + +/*Finds the number of internal nodes at a given depth, plus the number of + leaves at that depth or shallower. + The tree must be binary. + _binode: The root of the given sub-tree. + _binode->nbits must be 0 or 1. + Return: The number of entries that would be contained in a jump table of the + given depth.*/ +static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){ + if(_binode->nbits==0||_depth<=0)return 1; + else{ + return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+ + oc_huff_tree_occupancy(_binode->nodes[1],_depth-1); + } +} + +/*Makes a copy of the given Huffman tree. + _node: The Huffman tree to copy. + Return: The copy of the Huffman tree.*/ +static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node, + char **_storage){ + oc_huff_node *ret; + ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits); + ret->depth=_node->depth; + if(_node->nbits){ + int nchildren; + int i; + int inext; + nchildren=1<<_node->nbits; + for(i=0;inodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage); + inext=i+(1<<_node->nbits-ret->nodes[i]->depth); + while(++inodes[i]=ret->nodes[i-1]; + } + } + else ret->token=_node->token; + return ret; +} + +static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){ + size_t size; + int mindepth; + int depth; + int loccupancy; + int occupancy; + if(_binode->nbits!=0&&_depth>0){ + return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+ + oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1); + } + depth=mindepth=oc_huff_tree_mindepth(_binode); + occupancy=1<loccupancy&&occupancy>=1<0){ + size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1); + size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1); + } + return size; +} + +static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, + char **_storage); + +/*Fills the given nodes table with all the children in the sub-tree at the + given depth. + The nodes in the sub-tree with a depth less than that stored in the table + are freed. + The sub-tree must be binary and complete up until the given depth. + _nodes: The nodes table to fill. + _binode: The root of the sub-tree to fill it with. + _binode->nbits must be 0 or 1. + _level: The current level in the table. + 0 indicates that the current node should be stored, regardless of + whether it is a leaf node or an internal node. + _depth: The depth of the nodes to fill the table with, relative to their + parent.*/ +static void oc_huff_node_fill(oc_huff_node **_nodes, + oc_huff_node *_binode,int _level,int _depth,char **_storage){ + if(_level<=0||_binode->nbits==0){ + int i; + _binode->depth=(unsigned char)(_depth-_level); + _nodes[0]=oc_huff_tree_collapse(_binode,_storage); + for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0]; + } + else{ + _level--; + oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage); + _nodes+=1<<_level; + oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage); + } +} + +/*Finds the largest complete sub-tree rooted at the current node and collapses + it into a single node. + This procedure is then applied recursively to all the children of that node. + _binode: The root of the sub-tree to collapse. + _binode->nbits must be 0 or 1. + Return: The new root of the collapsed sub-tree.*/ +static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode, + char **_storage){ + oc_huff_node *root; + size_t size; + int mindepth; + int depth; + int loccupancy; + int occupancy; + depth=mindepth=oc_huff_tree_mindepth(_binode); + occupancy=1<loccupancy&&occupancy>=1<depth=_binode->depth; + oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage); + return root; +} + +/*Unpacks a set of Huffman trees, and reduces them to a collapsed + representation. + _opb: The buffer to unpack the trees from. + _nodes: The table to fill with the Huffman trees. + Return: 0 on success, or a negative value on error.*/ +int oc_huff_trees_unpack(oc_pack_buf *_opb, + oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ + int i; + for(i=0;i0)_ogg_free(_dst[i]); + return TH_EFAULT; + } + _dst[i]=oc_huff_tree_copy(_src[i],&storage); + } + return 0; +} + +/*Frees the memory used by a set of Huffman trees. + _nodes: The array of trees to free.*/ +void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){ + int i; + for(i=0;inbits!=0){ + bits=oc_pack_look(_opb,_node->nbits); + _node=_node->nodes[bits]; + oc_pack_adv(_opb,_node->depth); + } + return _node->token; +} diff --git a/thirdparty/libtheora/huffdec.h b/thirdparty/libtheora/huffdec.h new file mode 100644 index 0000000000..d7ffa0e99b --- /dev/null +++ b/thirdparty/libtheora/huffdec.h @@ -0,0 +1,92 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: huffdec.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_huffdec_H) +# define _huffdec_H (1) +# include "huffman.h" +# include "bitpack.h" + + + +typedef struct oc_huff_node oc_huff_node; + +/*A node in the Huffman tree. + Instead of storing every branching in the tree, subtrees can be collapsed + into one node, with a table of size 1< +#include +#include +#include "huffenc.h" + + + +/*The default Huffman codes used for VP3.1.*/ +const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={ + { + {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8}, + {0x02CE,10},{0x059E,11},{0x027D,11},{0x0008, 5}, + {0x04F9,12},{0x000F, 4},{0x000E, 4},{0x001B, 5}, + {0x0006, 4},{0x0008, 4},{0x0005, 4},{0x001A, 5}, + {0x0015, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3}, + {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x0029, 6}, + {0x0028, 6},{0x00B2, 8},{0x04F8,12},{0x059F,11}, + {0x009E, 9},{0x013F,10},{0x0012, 6},{0x0058, 7} + }, + { + {0x0010, 5},{0x0047, 7},{0x01FF, 9},{0x008C, 8}, + {0x03FC,10},{0x046A,11},{0x0469,11},{0x0022, 6}, + {0x11A1,13},{0x000E, 4},{0x000D, 4},{0x0004, 4}, + {0x0005, 4},{0x0009, 4},{0x0006, 4},{0x001E, 5}, + {0x0016, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3}, + {0x0000, 3},{0x000A, 4},{0x0017, 5},{0x007D, 7}, + {0x007E, 7},{0x011B, 9},{0x08D1,12},{0x03FD,10}, + {0x046B,11},{0x11A0,13},{0x007C, 7},{0x00FE, 8} + }, + { + {0x0016, 5},{0x0020, 6},{0x0086, 8},{0x0087, 8}, + {0x0367,10},{0x06CC,11},{0x06CB,11},{0x006E, 7}, + {0x366D,14},{0x000F, 4},{0x000E, 4},{0x0004, 4}, + {0x0005, 4},{0x000A, 4},{0x0006, 4},{0x001A, 5}, + {0x0011, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3}, + {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x006F, 7}, + {0x006D, 7},{0x0364,10},{0x0D9A,12},{0x06CA,11}, + {0x1B37,13},{0x366C,14},{0x0042, 7},{0x00D8, 8} + }, + { + {0x0000, 4},{0x002D, 6},{0x00F7, 8},{0x0058, 7}, + {0x0167, 9},{0x02CB,10},{0x02CA,10},{0x000E, 6}, + {0x1661,13},{0x0003, 3},{0x0002, 3},{0x0008, 4}, + {0x0009, 4},{0x000D, 4},{0x0002, 4},{0x001F, 5}, + {0x0017, 5},{0x0001, 4},{0x000C, 4},{0x000E, 4}, + {0x000A, 4},{0x0006, 5},{0x0078, 7},{0x000F, 6}, + {0x007A, 7},{0x0164, 9},{0x0599,11},{0x02CD,10}, + {0x0B31,12},{0x1660,13},{0x0079, 7},{0x00F6, 8} + }, + { + {0x0003, 4},{0x003C, 6},{0x000F, 7},{0x007A, 7}, + {0x001D, 8},{0x0020, 9},{0x0072,10},{0x0006, 6}, + {0x0399,13},{0x0004, 3},{0x0005, 3},{0x0005, 4}, + {0x0006, 4},{0x000E, 4},{0x0004, 4},{0x0000, 4}, + {0x0019, 5},{0x0002, 4},{0x000D, 4},{0x0007, 4}, + {0x001F, 5},{0x0030, 6},{0x0011, 8},{0x0031, 6}, + {0x0005, 6},{0x0021, 9},{0x00E7,11},{0x0038, 9}, + {0x01CD,12},{0x0398,13},{0x007B, 7},{0x0009, 7} + }, + { + {0x0009, 4},{0x0002, 5},{0x0074, 7},{0x0007, 6}, + {0x00EC, 8},{0x00D1, 9},{0x01A6,10},{0x0006, 6}, + {0x0D21,13},{0x0005, 3},{0x0006, 3},{0x0008, 4}, + {0x0007, 4},{0x000F, 4},{0x0004, 4},{0x0000, 4}, + {0x001C, 5},{0x0002, 4},{0x0005, 4},{0x0003, 4}, + {0x000C, 5},{0x0035, 7},{0x01A7,10},{0x001B, 6}, + {0x0077, 7},{0x01A5,10},{0x0349,11},{0x00D0, 9}, + {0x0691,12},{0x0D20,13},{0x0075, 7},{0x00ED, 8} + }, + { + {0x000A, 4},{0x000C, 5},{0x0012, 6},{0x001B, 6}, + {0x00B7, 8},{0x016C, 9},{0x0099, 9},{0x005A, 7}, + {0x16D8,13},{0x0007, 3},{0x0006, 3},{0x0009, 4}, + {0x0008, 4},{0x0000, 3},{0x0005, 4},{0x0017, 5}, + {0x000E, 5},{0x0002, 4},{0x0003, 4},{0x000F, 5}, + {0x001A, 6},{0x004D, 8},{0x2DB3,14},{0x002C, 6}, + {0x0011, 6},{0x02DA,10},{0x05B7,11},{0x0098, 9}, + {0x0B6D,12},{0x2DB2,14},{0x0010, 6},{0x0027, 7} + }, + { + {0x000D, 4},{0x000F, 5},{0x001D, 6},{0x0008, 5}, + {0x0051, 7},{0x0056, 8},{0x00AF, 9},{0x002A, 7}, + {0x148A,13},{0x0007, 3},{0x0000, 2},{0x0008, 4}, + {0x0009, 4},{0x000C, 4},{0x0006, 4},{0x0017, 5}, + {0x000B, 5},{0x0016, 5},{0x0015, 5},{0x0009, 5}, + {0x0050, 7},{0x00AE, 9},{0x2917,14},{0x001C, 6}, + {0x0014, 6},{0x0290,10},{0x0523,11},{0x0149, 9}, + {0x0A44,12},{0x2916,14},{0x0053, 7},{0x00A5, 8} + }, + { + {0x0001, 4},{0x001D, 6},{0x00F5, 8},{0x00F4, 8}, + {0x024D,10},{0x0499,11},{0x0498,11},{0x0001, 5}, + {0x0021, 6},{0x0006, 3},{0x0005, 3},{0x0006, 4}, + {0x0005, 4},{0x0002, 4},{0x0007, 5},{0x0025, 6}, + {0x007B, 7},{0x001C, 6},{0x0020, 6},{0x000D, 6}, + {0x0048, 7},{0x0092, 8},{0x0127, 9},{0x000E, 4}, + {0x0004, 4},{0x0011, 5},{0x000C, 6},{0x003C, 6}, + {0x000F, 5},{0x0000, 5},{0x001F, 5},{0x0013, 5} + }, + { + {0x0005, 4},{0x003C, 6},{0x0040, 7},{0x000D, 7}, + {0x0031, 9},{0x0061,10},{0x0060,10},{0x0002, 5}, + {0x00F5, 8},{0x0006, 3},{0x0005, 3},{0x0007, 4}, + {0x0006, 4},{0x0002, 4},{0x0009, 5},{0x0025, 6}, + {0x0007, 6},{0x0021, 6},{0x0024, 6},{0x0010, 6}, + {0x0041, 7},{0x00F4, 8},{0x0019, 8},{0x000E, 4}, + {0x0003, 4},{0x0011, 5},{0x0011, 6},{0x003F, 6}, + {0x003E, 6},{0x007B, 7},{0x0000, 4},{0x0013, 5} + }, + { + {0x000A, 4},{0x0007, 5},{0x0001, 6},{0x0009, 6}, + {0x0131, 9},{0x0261,10},{0x0260,10},{0x0015, 6}, + {0x0001, 7},{0x0007, 3},{0x0006, 3},{0x0008, 4}, + {0x0007, 4},{0x0006, 4},{0x0012, 5},{0x002F, 6}, + {0x0014, 6},{0x0027, 6},{0x002D, 6},{0x0016, 6}, + {0x004D, 7},{0x0099, 8},{0x0000, 7},{0x0004, 4}, + {0x0001, 4},{0x0005, 5},{0x0017, 6},{0x002E, 6}, + {0x002C, 6},{0x0008, 6},{0x0006, 5},{0x0001, 5} + }, + { + {0x0000, 3},{0x000E, 5},{0x0017, 6},{0x002A, 6}, + {0x0010, 7},{0x00F9,10},{0x00F8,10},{0x001E, 7}, + {0x003F, 8},{0x0007, 3},{0x0006, 3},{0x0009, 4}, + {0x0008, 4},{0x0006, 4},{0x000F, 5},{0x0005, 5}, + {0x0016, 6},{0x0029, 6},{0x002B, 6},{0x0015, 6}, + {0x0050, 7},{0x0011, 7},{0x007D, 9},{0x0004, 4}, + {0x0017, 5},{0x0006, 5},{0x0014, 6},{0x002C, 6}, + {0x002D, 6},{0x000E, 6},{0x0009, 6},{0x0051, 7} + }, + { + {0x0002, 3},{0x0018, 5},{0x002F, 6},{0x000D, 5}, + {0x0053, 7},{0x0295,10},{0x0294,10},{0x00A4, 8}, + {0x007C, 8},{0x0000, 2},{0x0007, 3},{0x0009, 4}, + {0x0008, 4},{0x001B, 5},{0x000C, 5},{0x0028, 6}, + {0x006A, 7},{0x001E, 6},{0x001D, 6},{0x0069, 7}, + {0x00D7, 8},{0x007D, 8},{0x014B, 9},{0x0019, 5}, + {0x0016, 5},{0x002E, 6},{0x001C, 6},{0x002B, 6}, + {0x002A, 6},{0x0068, 7},{0x003F, 7},{0x00D6, 8} + }, + { + {0x0002, 3},{0x001B, 5},{0x000C, 5},{0x0018, 5}, + {0x0029, 6},{0x007F, 8},{0x02F0,10},{0x0198, 9}, + {0x0179, 9},{0x0000, 2},{0x0007, 3},{0x0009, 4}, + {0x0008, 4},{0x001A, 5},{0x000D, 5},{0x002A, 6}, + {0x0064, 7},{0x001E, 6},{0x0067, 7},{0x005F, 7}, + {0x00CD, 8},{0x007E, 8},{0x02F1,10},{0x0016, 5}, + {0x000E, 5},{0x002E, 6},{0x0065, 7},{0x002B, 6}, + {0x0028, 6},{0x003E, 7},{0x00BD, 8},{0x0199, 9} + }, + { + {0x0002, 3},{0x0007, 4},{0x0016, 5},{0x0006, 4}, + {0x0036, 6},{0x005C, 7},{0x015D, 9},{0x015C, 9}, + {0x02BF,10},{0x0000, 2},{0x0007, 3},{0x0009, 4}, + {0x0008, 4},{0x0018, 5},{0x0034, 6},{0x002A, 6}, + {0x005E, 7},{0x006A, 7},{0x0064, 7},{0x005D, 7}, + {0x00CB, 8},{0x00AD, 8},{0x02BE,10},{0x0014, 5}, + {0x0033, 6},{0x006E, 7},{0x005F, 7},{0x006F, 7}, + {0x006B, 7},{0x00CA, 8},{0x00AC, 8},{0x015E, 9} + }, + { + {0x000F, 4},{0x001D, 5},{0x0018, 5},{0x000B, 4}, + {0x0019, 5},{0x0029, 6},{0x00D6, 8},{0x0551,11}, + {0x0AA1,12},{0x0001, 2},{0x0000, 2},{0x0009, 4}, + {0x0008, 4},{0x001B, 5},{0x0038, 6},{0x0028, 6}, + {0x0057, 7},{0x006A, 7},{0x0068, 7},{0x0056, 7}, + {0x00E5, 8},{0x0155, 9},{0x0AA0,12},{0x0073, 7}, + {0x0069, 7},{0x00D7, 8},{0x00AB, 8},{0x00E4, 8}, + {0x00A9, 8},{0x0151, 9},{0x0150, 9},{0x02A9,10} + }, + { + {0x0008, 5},{0x0025, 7},{0x017A, 9},{0x02F7,10}, + {0x0BDB,12},{0x17B4,13},{0x2F6B,14},{0x001D, 5}, + {0x2F6A,14},{0x0008, 4},{0x0007, 4},{0x0001, 4}, + {0x0002, 4},{0x000A, 4},{0x0006, 4},{0x0000, 4}, + {0x001C, 5},{0x0009, 4},{0x000D, 4},{0x000F, 4}, + {0x000C, 4},{0x0003, 4},{0x000A, 5},{0x0016, 5}, + {0x0013, 6},{0x005D, 7},{0x0024, 7},{0x00BC, 8}, + {0x005C, 7},{0x05EC,11},{0x000B, 5},{0x005F, 7} + }, + { + {0x000F, 5},{0x0010, 6},{0x004B, 8},{0x00C6, 8}, + {0x031D,10},{0x0C71,12},{0x0C70,12},{0x0001, 4}, + {0x0C73,12},{0x0008, 4},{0x0009, 4},{0x0002, 4}, + {0x0003, 4},{0x000B, 4},{0x0006, 4},{0x0000, 4}, + {0x001C, 5},{0x0005, 4},{0x000D, 4},{0x000F, 4}, + {0x000A, 4},{0x0019, 5},{0x0013, 6},{0x001D, 5}, + {0x0030, 6},{0x0062, 7},{0x0024, 7},{0x004A, 8}, + {0x018F, 9},{0x0C72,12},{0x000E, 5},{0x0011, 6} + }, + { + {0x001B, 5},{0x0003, 6},{0x008D, 8},{0x0040, 7}, + {0x0239,10},{0x0471,11},{0x08E0,12},{0x0003, 4}, + {0x11C3,13},{0x000A, 4},{0x0009, 4},{0x0004, 4}, + {0x0005, 4},{0x000E, 4},{0x0007, 4},{0x0001, 4}, + {0x001E, 5},{0x0006, 4},{0x000C, 4},{0x000B, 4}, + {0x0002, 4},{0x0000, 5},{0x0041, 7},{0x001F, 5}, + {0x0022, 6},{0x0002, 6},{0x008F, 8},{0x008C, 8}, + {0x011D, 9},{0x11C2,13},{0x001A, 5},{0x0021, 6} + }, + { + {0x001F, 5},{0x0003, 6},{0x0003, 7},{0x0043, 7}, + {0x000B, 9},{0x0015,10},{0x0051,12},{0x0003, 4}, + {0x0050,12},{0x000D, 4},{0x000C, 4},{0x0004, 4}, + {0x0006, 4},{0x000E, 4},{0x000A, 4},{0x0001, 4}, + {0x001E, 5},{0x0005, 4},{0x0009, 4},{0x0007, 4}, + {0x0011, 5},{0x0002, 6},{0x0004, 8},{0x0002, 4}, + {0x002D, 6},{0x0020, 6},{0x0042, 7},{0x0001, 7}, + {0x0000, 7},{0x0029,11},{0x0017, 5},{0x002C, 6} + }, + { + {0x0003, 4},{0x001F, 6},{0x003A, 7},{0x005D, 7}, + {0x0173, 9},{0x02E4,10},{0x172D,13},{0x0004, 4}, + {0x172C,13},{0x000F, 4},{0x000E, 4},{0x0009, 4}, + {0x0008, 4},{0x000C, 4},{0x000A, 4},{0x0001, 4}, + {0x0016, 5},{0x0002, 4},{0x0005, 4},{0x001A, 5}, + {0x002F, 6},{0x0038, 7},{0x05CA,11},{0x0006, 4}, + {0x0037, 6},{0x001E, 6},{0x003B, 7},{0x0039, 7}, + {0x00B8, 8},{0x0B97,12},{0x0000, 4},{0x0036, 6} + }, + { + {0x0006, 4},{0x0037, 6},{0x005D, 7},{0x000C, 6}, + {0x00B9, 8},{0x02E3,10},{0x05C4,11},{0x0004, 4}, + {0x1715,13},{0x0000, 3},{0x000F, 4},{0x0008, 4}, + {0x0007, 4},{0x000C, 4},{0x0009, 4},{0x001D, 5}, + {0x0016, 5},{0x001C, 5},{0x001A, 5},{0x000B, 5}, + {0x005E, 7},{0x0170, 9},{0x1714,13},{0x000A, 4}, + {0x000A, 5},{0x0036, 6},{0x005F, 7},{0x001B, 7}, + {0x001A, 7},{0x0B8B,12},{0x0002, 4},{0x0007, 5} + }, + { + {0x000C, 4},{0x000B, 5},{0x0079, 7},{0x0022, 6}, + {0x00F0, 8},{0x0119, 9},{0x0230,10},{0x001D, 5}, + {0x08C4,12},{0x0001, 3},{0x0000, 3},{0x000A, 4}, + {0x0009, 4},{0x000B, 4},{0x0007, 4},{0x001C, 5}, + {0x003D, 6},{0x000D, 5},{0x0008, 5},{0x0015, 6}, + {0x008D, 8},{0x118B,13},{0x118A,13},{0x000D, 4}, + {0x0010, 5},{0x0009, 5},{0x0014, 6},{0x0047, 7}, + {0x00F1, 8},{0x0463,11},{0x001F, 5},{0x000C, 5} + }, + { + {0x0000, 3},{0x001A, 5},{0x0033, 6},{0x000C, 5}, + {0x0046, 7},{0x01E3, 9},{0x03C5,10},{0x0017, 5}, + {0x1E21,13},{0x0002, 3},{0x0001, 3},{0x0009, 4}, + {0x000A, 4},{0x0007, 4},{0x001B, 5},{0x003D, 6}, + {0x001B, 6},{0x0022, 6},{0x0079, 7},{0x00F0, 8}, + {0x1E20,13},{0x1E23,13},{0x1E22,13},{0x000E, 4}, + {0x0016, 5},{0x0018, 5},{0x0032, 6},{0x001A, 6}, + {0x0047, 7},{0x0789,11},{0x001F, 5},{0x0010, 5} + }, + { + {0x001D, 5},{0x0061, 7},{0x004E, 8},{0x009E, 9}, + {0x027C,11},{0x09F5,13},{0x09F4,13},{0x0003, 4}, + {0x0060, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4}, + {0x000A, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5}, + {0x0031, 6},{0x0008, 5},{0x0038, 6},{0x0012, 6}, + {0x0026, 7},{0x013F,10},{0x04FB,12},{0x000D, 4}, + {0x0002, 4},{0x000C, 5},{0x0039, 6},{0x001C, 6}, + {0x000F, 5},{0x001D, 6},{0x0008, 4},{0x0019, 5} + }, + { + {0x0007, 4},{0x0019, 6},{0x00AB, 8},{0x00AA, 8}, + {0x0119,10},{0x0461,12},{0x0460,12},{0x001B, 5}, + {0x0047, 8},{0x0001, 3},{0x0000, 3},{0x000C, 4}, + {0x000B, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5}, + {0x0035, 6},{0x003D, 6},{0x003C, 6},{0x0018, 6}, + {0x0022, 7},{0x008D, 9},{0x0231,11},{0x000E, 4}, + {0x001F, 5},{0x0009, 5},{0x002B, 6},{0x0010, 6}, + {0x0034, 6},{0x0054, 7},{0x0008, 4},{0x0014, 5} + }, + { + {0x000C, 4},{0x0005, 5},{0x0008, 6},{0x005B, 7}, + {0x004D, 9},{0x0131,11},{0x0261,12},{0x001A, 5}, + {0x0012, 7},{0x0000, 3},{0x000F, 4},{0x000A, 4}, + {0x0009, 4},{0x0006, 4},{0x001B, 5},{0x0006, 5}, + {0x001C, 6},{0x002C, 6},{0x0015, 6},{0x005A, 7}, + {0x0027, 8},{0x0099,10},{0x0260,12},{0x000E, 4}, + {0x0004, 4},{0x000F, 5},{0x0007, 5},{0x001D, 6}, + {0x000B, 5},{0x0014, 6},{0x0008, 4},{0x0017, 5} + }, + { + {0x000F, 4},{0x0013, 5},{0x0075, 7},{0x0024, 6}, + {0x0095, 8},{0x0251,10},{0x04A0,11},{0x0010, 5}, + {0x00C8, 8},{0x0002, 3},{0x0001, 3},{0x0001, 4}, + {0x0000, 4},{0x001A, 5},{0x0011, 5},{0x002C, 6}, + {0x0065, 7},{0x0074, 7},{0x004B, 7},{0x00C9, 8}, + {0x0129, 9},{0x0943,12},{0x0942,12},{0x0003, 3}, + {0x000A, 4},{0x001C, 5},{0x0018, 5},{0x0033, 6}, + {0x0017, 5},{0x002D, 6},{0x001B, 5},{0x003B, 6} + }, + { + {0x0003, 3},{0x001A, 5},{0x002D, 6},{0x0038, 6}, + {0x0028, 7},{0x0395,10},{0x0E51,12},{0x0037, 6}, + {0x00E4, 8},{0x0001, 3},{0x0000, 3},{0x001F, 5}, + {0x001E, 5},{0x0017, 5},{0x003A, 6},{0x0073, 7}, + {0x002A, 7},{0x002B, 7},{0x0029, 7},{0x01CB, 9}, + {0x0729,11},{0x1CA1,13},{0x1CA0,13},{0x0004, 3}, + {0x000A, 4},{0x0004, 4},{0x0018, 5},{0x0036, 6}, + {0x000B, 5},{0x002C, 6},{0x0019, 5},{0x003B, 6} + }, + { + {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0017, 5}, + {0x0075, 7},{0x01F5, 9},{0x07D1,11},{0x0017, 6}, + {0x01F6, 9},{0x0001, 3},{0x0000, 3},{0x001B, 5}, + {0x001A, 5},{0x000A, 5},{0x0032, 6},{0x0074, 7}, + {0x00F8, 8},{0x00F9, 8},{0x01F7, 9},{0x03E9,10}, + {0x0FA0,12},{0x1F43,13},{0x1F42,13},{0x0003, 3}, + {0x000A, 4},{0x001E, 5},{0x001C, 5},{0x003B, 6}, + {0x0018, 5},{0x0016, 6},{0x0016, 5},{0x0033, 6} + }, + { + {0x0004, 3},{0x0007, 4},{0x0018, 5},{0x001E, 5}, + {0x0036, 6},{0x0031, 7},{0x0177, 9},{0x0077, 7}, + {0x0176, 9},{0x0001, 3},{0x0000, 3},{0x001A, 5}, + {0x0019, 5},{0x003A, 6},{0x0019, 6},{0x005C, 7}, + {0x00BA, 8},{0x0061, 8},{0x00C1, 9},{0x0180,10}, + {0x0302,11},{0x0607,12},{0x0606,12},{0x0002, 3}, + {0x000A, 4},{0x001F, 5},{0x001C, 5},{0x0037, 6}, + {0x0016, 5},{0x0076, 7},{0x000D, 5},{0x002F, 6} + }, + { + {0x0000, 3},{0x000A, 4},{0x001A, 5},{0x000C, 4}, + {0x001D, 5},{0x0039, 6},{0x0078, 7},{0x005E, 7}, + {0x0393,11},{0x0002, 3},{0x0001, 3},{0x0016, 5}, + {0x000F, 5},{0x002E, 6},{0x005F, 7},{0x0073, 8}, + {0x00E5, 9},{0x01C8,10},{0x0E4A,13},{0x1C97,14}, + {0x1C96,14},{0x0E49,13},{0x0E48,13},{0x0004, 3}, + {0x0006, 4},{0x001F, 5},{0x001B, 5},{0x001D, 6}, + {0x0038, 6},{0x0038, 7},{0x003D, 6},{0x0079, 7} + }, + { + {0x000B, 5},{0x002B, 7},{0x0054, 8},{0x01B7, 9}, + {0x06D9,11},{0x0DB1,12},{0x0DB0,12},{0x0002, 4}, + {0x00AB, 9},{0x0009, 4},{0x000A, 4},{0x0007, 4}, + {0x0008, 4},{0x000F, 4},{0x000C, 4},{0x0003, 4}, + {0x001D, 5},{0x0004, 4},{0x000B, 4},{0x0006, 4}, + {0x001A, 5},{0x0003, 6},{0x00AA, 9},{0x0001, 4}, + {0x0000, 5},{0x0014, 6},{0x006C, 7},{0x00DA, 8}, + {0x0002, 6},{0x036D,10},{0x001C, 5},{0x0037, 6} + }, + { + {0x001D, 5},{0x0004, 6},{0x00B6, 8},{0x006A, 8}, + {0x05B9,11},{0x16E1,13},{0x16E0,13},{0x0007, 4}, + {0x016F, 9},{0x000C, 4},{0x000D, 4},{0x0009, 4}, + {0x0008, 4},{0x000F, 4},{0x000A, 4},{0x0003, 4}, + {0x0017, 5},{0x0002, 4},{0x0004, 4},{0x001C, 5}, + {0x002C, 6},{0x006B, 8},{0x0B71,12},{0x0005, 4}, + {0x0003, 5},{0x001B, 6},{0x005A, 7},{0x0034, 7}, + {0x0005, 6},{0x02DD,10},{0x0000, 4},{0x000C, 5} + }, + { + {0x0003, 4},{0x007F, 7},{0x00A1, 8},{0x00A0, 8}, + {0x020C,10},{0x0834,12},{0x106B,13},{0x0007, 4}, + {0x0082, 8},{0x000E, 4},{0x000D, 4},{0x000B, 4}, + {0x000C, 4},{0x0000, 3},{0x0009, 4},{0x0002, 4}, + {0x0011, 5},{0x001E, 5},{0x0015, 5},{0x003E, 6}, + {0x0040, 7},{0x041B,11},{0x106A,13},{0x0006, 4}, + {0x000A, 5},{0x0029, 6},{0x007E, 7},{0x0051, 7}, + {0x0021, 6},{0x0107, 9},{0x0004, 4},{0x000B, 5} + }, + { + {0x0007, 4},{0x001B, 6},{0x00F6, 8},{0x00E9, 8}, + {0x03A1,10},{0x0740,11},{0x0E82,12},{0x001F, 5}, + {0x01EF, 9},{0x0001, 3},{0x0002, 3},{0x000B, 4}, + {0x000C, 4},{0x000D, 4},{0x0008, 4},{0x001C, 5}, + {0x0003, 5},{0x0012, 5},{0x0002, 5},{0x0075, 7}, + {0x01D1, 9},{0x1D07,13},{0x1D06,13},{0x000A, 4}, + {0x0013, 5},{0x003B, 6},{0x001A, 6},{0x007A, 7}, + {0x003C, 6},{0x01EE, 9},{0x0000, 4},{0x000C, 5} + }, + { + {0x000D, 4},{0x003D, 6},{0x0042, 7},{0x0037, 7}, + {0x00D9, 9},{0x0362,11},{0x06C6,12},{0x001F, 5}, + {0x0086, 8},{0x0001, 3},{0x0002, 3},{0x000C, 4}, + {0x000B, 4},{0x000A, 4},{0x0001, 4},{0x000F, 5}, + {0x0025, 6},{0x003C, 6},{0x001A, 6},{0x0087, 8}, + {0x01B0,10},{0x0D8F,13},{0x0D8E,13},{0x000E, 4}, + {0x0013, 5},{0x000C, 5},{0x0024, 6},{0x0020, 6}, + {0x0011, 5},{0x006D, 8},{0x0000, 4},{0x000E, 5} + }, + { + {0x0000, 3},{0x0012, 5},{0x0076, 7},{0x0077, 7}, + {0x014D, 9},{0x0533,11},{0x14C9,13},{0x0013, 5}, + {0x00A5, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4}, + {0x000C, 4},{0x0008, 4},{0x001A, 5},{0x002B, 6}, + {0x0075, 7},{0x0074, 7},{0x00A7, 8},{0x0298,10}, + {0x14C8,13},{0x14CB,13},{0x14CA,13},{0x000F, 4}, + {0x001C, 5},{0x0007, 5},{0x002A, 6},{0x0028, 6}, + {0x001B, 5},{0x00A4, 8},{0x0002, 4},{0x0006, 5} + }, + { + {0x0002, 3},{0x001A, 5},{0x002B, 6},{0x003A, 6}, + {0x00ED, 8},{0x0283,10},{0x0A0A,12},{0x0004, 5}, + {0x00A1, 8},{0x0004, 3},{0x0003, 3},{0x000B, 4}, + {0x000C, 4},{0x001F, 5},{0x0006, 5},{0x0077, 7}, + {0x00A3, 8},{0x00A2, 8},{0x0140, 9},{0x1417,13}, + {0x1416,13},{0x0A09,12},{0x0A08,12},{0x0000, 3}, + {0x001E, 5},{0x0007, 5},{0x002A, 6},{0x0029, 6}, + {0x001C, 5},{0x00EC, 8},{0x001B, 5},{0x0005, 5} + }, + { + {0x0002, 3},{0x0002, 4},{0x0018, 5},{0x001D, 5}, + {0x0035, 6},{0x00E4, 8},{0x01CF,11},{0x001D, 7}, + {0x0072, 9},{0x0004, 3},{0x0005, 3},{0x0006, 4}, + {0x0007, 4},{0x0006, 5},{0x0073, 7},{0x0038, 8}, + {0x01CE,11},{0x039B,12},{0x0398,12},{0x0733,13}, + {0x0732,13},{0x0735,13},{0x0734,13},{0x0000, 3}, + {0x001F, 5},{0x001B, 5},{0x0034, 6},{0x000F, 6}, + {0x001E, 5},{0x00E5, 8},{0x0019, 5},{0x0038, 6} + }, + { + {0x0016, 5},{0x0050, 7},{0x0172, 9},{0x02E7,10}, + {0x1732,13},{0x2E67,14},{0x2E66,14},{0x0006, 4}, + {0x0051, 7},{0x0001, 3},{0x0000, 3},{0x000D, 4}, + {0x000C, 4},{0x0009, 4},{0x001C, 5},{0x0009, 5}, + {0x001C, 6},{0x001D, 6},{0x005D, 7},{0x00B8, 8}, + {0x05CD,11},{0x1731,13},{0x1730,13},{0x000F, 4}, + {0x0005, 4},{0x000F, 5},{0x0008, 5},{0x0029, 6}, + {0x001D, 5},{0x002F, 6},{0x0008, 4},{0x0015, 5} + }, + { + {0x0009, 4},{0x0021, 6},{0x0040, 7},{0x00AD, 8}, + {0x02B0,10},{0x1589,13},{0x1588,13},{0x001C, 5}, + {0x005F, 7},{0x0000, 3},{0x000F, 4},{0x000D, 4}, + {0x000C, 4},{0x0006, 4},{0x0011, 5},{0x002A, 6}, + {0x0057, 7},{0x005E, 7},{0x0041, 7},{0x0159, 9}, + {0x0563,11},{0x158B,13},{0x158A,13},{0x0001, 3}, + {0x0005, 4},{0x0014, 5},{0x003B, 6},{0x002E, 6}, + {0x0004, 4},{0x003A, 6},{0x0007, 4},{0x0016, 5} + }, + { + {0x000E, 4},{0x0007, 5},{0x0046, 7},{0x0045, 7}, + {0x0064, 9},{0x032A,12},{0x0657,13},{0x0018, 5}, + {0x000D, 6},{0x0000, 3},{0x000F, 4},{0x000A, 4}, + {0x000B, 4},{0x001A, 5},{0x0036, 6},{0x0047, 7}, + {0x0044, 7},{0x0018, 7},{0x0033, 8},{0x00CB,10}, + {0x0656,13},{0x0329,12},{0x0328,12},{0x0002, 3}, + {0x0006, 4},{0x0019, 5},{0x000E, 5},{0x0037, 6}, + {0x0009, 4},{0x000F, 5},{0x0002, 4},{0x0010, 5} + }, + { + {0x0003, 3},{0x0018, 5},{0x0023, 6},{0x0077, 7}, + {0x0194, 9},{0x1956,13},{0x32AF,14},{0x003A, 6}, + {0x0076, 7},{0x0002, 3},{0x0001, 3},{0x001F, 5}, + {0x001E, 5},{0x0014, 5},{0x0022, 6},{0x0064, 7}, + {0x0197, 9},{0x0196, 9},{0x032B,10},{0x0654,11}, + {0x32AE,14},{0x1955,13},{0x1954,13},{0x0000, 3}, + {0x0009, 4},{0x001C, 5},{0x0015, 5},{0x0010, 5}, + {0x000D, 4},{0x0017, 5},{0x0016, 5},{0x0033, 6} + }, + { + {0x0005, 3},{0x0006, 4},{0x003E, 6},{0x0010, 5}, + {0x0048, 7},{0x093F,12},{0x24FA,14},{0x0032, 6}, + {0x0067, 7},{0x0002, 3},{0x0001, 3},{0x001B, 5}, + {0x001E, 5},{0x0034, 6},{0x0066, 7},{0x0092, 8}, + {0x0126, 9},{0x024E,10},{0x049E,11},{0x49F7,15}, + {0x49F6,15},{0x24F9,14},{0x24F8,14},{0x0000, 3}, + {0x0007, 4},{0x0018, 5},{0x0011, 5},{0x003F, 6}, + {0x000E, 4},{0x0013, 5},{0x0035, 6},{0x0025, 6} + }, + { + {0x0005, 3},{0x0008, 4},{0x0012, 5},{0x001C, 5}, + {0x001C, 6},{0x00EA, 9},{0x1D75,14},{0x001E, 6}, + {0x0066, 7},{0x0001, 3},{0x0002, 3},{0x001B, 5}, + {0x001A, 5},{0x001F, 6},{0x003B, 7},{0x0074, 8}, + {0x01D6,10},{0x03AF,11},{0x1D74,14},{0x1D77,14}, + {0x1D76,14},{0x0EB9,13},{0x0EB8,13},{0x000F, 4}, + {0x0006, 4},{0x0013, 5},{0x003B, 6},{0x003A, 6}, + {0x0000, 3},{0x0018, 5},{0x0032, 6},{0x0067, 7} + }, + { + {0x0004, 3},{0x000A, 4},{0x001B, 5},{0x000C, 4}, + {0x000D, 5},{0x00E6, 8},{0x0684,11},{0x0072, 7}, + {0x00E7, 8},{0x0002, 3},{0x0001, 3},{0x0017, 5}, + {0x0016, 5},{0x0018, 6},{0x00D1, 8},{0x01A0, 9}, + {0x0686,11},{0x0D0F,12},{0x0D0A,12},{0x1A17,13}, + {0x1A16,13},{0x1A1D,13},{0x1A1C,13},{0x000F, 4}, + {0x001D, 5},{0x000E, 5},{0x0035, 6},{0x0038, 6}, + {0x0000, 3},{0x000F, 5},{0x0019, 6},{0x0069, 7} + }, + { + {0x0003, 3},{0x000C, 4},{0x001B, 5},{0x0000, 3}, + {0x0003, 4},{0x002E, 6},{0x0051, 9},{0x00BC, 8}, + {0x0053, 9},{0x0004, 3},{0x0002, 3},{0x0016, 5}, + {0x0015, 5},{0x0015, 7},{0x0050, 9},{0x00A4,10}, + {0x0294,12},{0x052B,13},{0x052A,13},{0x052D,13}, + {0x052C,13},{0x052F,13},{0x052E,13},{0x000E, 4}, + {0x001A, 5},{0x0004, 5},{0x0028, 6},{0x0029, 6}, + {0x000F, 4},{0x000B, 6},{0x005F, 7},{0x00BD, 8} + }, + { + {0x0003, 4},{0x0009, 6},{0x00D0, 8},{0x01A3, 9}, + {0x0344,10},{0x0D14,12},{0x1A2B,13},{0x0004, 4}, + {0x0015, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4}, + {0x000C, 4},{0x000E, 4},{0x0009, 4},{0x001B, 5}, + {0x000A, 5},{0x0014, 5},{0x000D, 5},{0x002A, 6}, + {0x0014, 7},{0x068B,11},{0x1A2A,13},{0x0008, 4}, + {0x000B, 5},{0x002B, 6},{0x000B, 6},{0x0069, 7}, + {0x0035, 6},{0x0008, 6},{0x0007, 4},{0x000C, 5} + }, + { + {0x000A, 4},{0x003C, 6},{0x0032, 7},{0x0030, 7}, + {0x00C5, 9},{0x0621,12},{0x0620,12},{0x001F, 5}, + {0x0033, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4}, + {0x000D, 4},{0x000C, 4},{0x0004, 4},{0x000D, 5}, + {0x0026, 6},{0x0027, 6},{0x0014, 6},{0x0063, 8}, + {0x0189,10},{0x0623,12},{0x0622,12},{0x000B, 4}, + {0x0012, 5},{0x003D, 6},{0x0022, 6},{0x0015, 6}, + {0x000B, 5},{0x0023, 6},{0x0007, 4},{0x0010, 5} + }, + { + {0x000F, 4},{0x000C, 5},{0x0043, 7},{0x0010, 6}, + {0x0044, 8},{0x0114,10},{0x0455,12},{0x0018, 5}, + {0x0023, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4}, + {0x000D, 4},{0x0009, 4},{0x0019, 5},{0x0009, 5}, + {0x0017, 6},{0x0016, 6},{0x0042, 7},{0x008B, 9}, + {0x0454,12},{0x0457,12},{0x0456,12},{0x000B, 4}, + {0x0015, 5},{0x000A, 5},{0x0029, 6},{0x0020, 6}, + {0x000D, 5},{0x0028, 6},{0x0007, 4},{0x0011, 5} + }, + { + {0x0001, 3},{0x001A, 5},{0x0029, 6},{0x002A, 6}, + {0x00A0, 8},{0x0285,10},{0x1425,13},{0x0002, 5}, + {0x0000, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4}, + {0x000B, 4},{0x0008, 4},{0x0012, 5},{0x0001, 6}, + {0x0051, 7},{0x0001, 7},{0x0143, 9},{0x0508,11}, + {0x1424,13},{0x1427,13},{0x1426,13},{0x000F, 4}, + {0x001C, 5},{0x0003, 5},{0x0037, 6},{0x002B, 6}, + {0x0013, 5},{0x0036, 6},{0x001D, 5},{0x0001, 5} + }, + { + {0x0004, 3},{0x001F, 5},{0x003D, 6},{0x0006, 5}, + {0x0016, 7},{0x0053, 9},{0x014A,11},{0x0034, 6}, + {0x002A, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4}, + {0x000C, 4},{0x001C, 5},{0x0037, 6},{0x0017, 7}, + {0x002B, 8},{0x0028, 8},{0x00A4,10},{0x052D,13}, + {0x052C,13},{0x052F,13},{0x052E,13},{0x0000, 3}, + {0x001D, 5},{0x0007, 5},{0x0004, 5},{0x0035, 6}, + {0x0014, 5},{0x0036, 6},{0x0015, 5},{0x003C, 6} + }, + { + {0x0004, 3},{0x000A, 4},{0x0007, 5},{0x001D, 5}, + {0x0009, 6},{0x01F3, 9},{0x07C7,11},{0x0008, 6}, + {0x01F0, 9},{0x0003, 3},{0x0002, 3},{0x000D, 4}, + {0x000C, 4},{0x0017, 5},{0x007D, 7},{0x01F2, 9}, + {0x07C6,11},{0x07C5,11},{0x1F12,13},{0x3E27,14}, + {0x3E26,14},{0x1F11,13},{0x1F10,13},{0x0000, 3}, + {0x001E, 5},{0x0006, 5},{0x0039, 6},{0x0038, 6}, + {0x003F, 6},{0x002C, 6},{0x0005, 5},{0x002D, 6} + }, + { + {0x0002, 3},{0x0007, 4},{0x0018, 5},{0x0003, 4}, + {0x0005, 5},{0x0035, 7},{0x004F, 9},{0x0012, 7}, + {0x04E5,13},{0x0005, 3},{0x0004, 3},{0x000D, 4}, + {0x000E, 4},{0x0033, 6},{0x0026, 8},{0x009D,10}, + {0x04E4,13},{0x04E7,13},{0x04E6,13},{0x04E1,13}, + {0x04E0,13},{0x04E3,13},{0x04E2,13},{0x0000, 3}, + {0x001F, 5},{0x000C, 5},{0x003D, 6},{0x003C, 6}, + {0x0032, 6},{0x0034, 7},{0x001B, 6},{0x0008, 6} + }, + { + {0x0000, 3},{0x0004, 4},{0x001C, 5},{0x000F, 4}, + {0x0002, 4},{0x0007, 5},{0x0075, 7},{0x00E8, 8}, + {0x1D2A,13},{0x0005, 3},{0x0004, 3},{0x000D, 4}, + {0x000C, 4},{0x0077, 7},{0x0E96,12},{0x3A57,14}, + {0x3A56,14},{0x3A5D,14},{0x3A5C,14},{0x3A5F,14}, + {0x3A5E,14},{0x1D29,13},{0x1D28,13},{0x0003, 3}, + {0x0006, 5},{0x000A, 5},{0x002C, 7},{0x0017, 6}, + {0x0076, 7},{0x01D3, 9},{0x03A4,10},{0x002D, 7} + }, + { + {0x000A, 4},{0x0024, 6},{0x00BF, 8},{0x0085, 8}, + {0x0211,10},{0x0842,12},{0x1087,13},{0x0018, 5}, + {0x0020, 6},{0x0001, 3},{0x0002, 3},{0x000E, 4}, + {0x000D, 4},{0x0007, 4},{0x0013, 5},{0x0025, 6}, + {0x005E, 7},{0x0043, 7},{0x00BE, 8},{0x0109, 9}, + {0x1086,13},{0x0841,12},{0x0840,12},{0x000F, 4}, + {0x0001, 4},{0x0011, 5},{0x0000, 5},{0x002E, 6}, + {0x0019, 5},{0x0001, 5},{0x0006, 4},{0x0016, 5} + }, + { + {0x0002, 3},{0x000F, 5},{0x006F, 7},{0x0061, 7}, + {0x0374,10},{0x1BA8,13},{0x3753,14},{0x0012, 5}, + {0x0036, 6},{0x0000, 3},{0x0001, 3},{0x000A, 4}, + {0x000B, 4},{0x001A, 5},{0x0031, 6},{0x0060, 7}, + {0x00DC, 8},{0x01BB, 9},{0x06EB,11},{0x1BAB,13}, + {0x3752,14},{0x3755,14},{0x3754,14},{0x000E, 4}, + {0x0006, 4},{0x0013, 5},{0x000E, 5},{0x003E, 6}, + {0x0008, 4},{0x001E, 5},{0x0019, 5},{0x003F, 6} + }, + { + {0x0003, 3},{0x001C, 5},{0x0025, 6},{0x0024, 6}, + {0x01DA, 9},{0x1DBD,13},{0x3B7C,14},{0x003C, 6}, + {0x003D, 6},{0x0000, 3},{0x0001, 3},{0x000B, 4}, + {0x000A, 4},{0x000B, 5},{0x0077, 7},{0x00EC, 8}, + {0x03B6,10},{0x076E,11},{0x1DBF,13},{0x76FB,15}, + {0x76FA,15},{0x3B79,14},{0x3B78,14},{0x000D, 4}, + {0x001F, 5},{0x0013, 5},{0x000A, 5},{0x0008, 5}, + {0x000C, 4},{0x0008, 4},{0x0009, 5},{0x003A, 6} + }, + { + {0x0005, 3},{0x0003, 4},{0x0004, 5},{0x0010, 5}, + {0x008F, 8},{0x0475,11},{0x11D1,13},{0x0079, 7}, + {0x0027, 6},{0x0002, 3},{0x0003, 3},{0x0001, 4}, + {0x0000, 4},{0x0026, 6},{0x0046, 7},{0x011C, 9}, + {0x0477,11},{0x08ED,12},{0x11D0,13},{0x11D3,13}, + {0x11D2,13},{0x11D9,13},{0x11D8,13},{0x000D, 4}, + {0x001F, 5},{0x0012, 5},{0x0005, 5},{0x003D, 6}, + {0x000C, 4},{0x000E, 4},{0x0022, 6},{0x0078, 7} + }, + { + {0x0005, 3},{0x000C, 4},{0x001B, 5},{0x0000, 4}, + {0x0006, 6},{0x03E2,10},{0x3E3D,14},{0x000F, 7}, + {0x0034, 6},{0x0003, 3},{0x0002, 3},{0x001E, 5}, + {0x001D, 5},{0x007D, 7},{0x01F0, 9},{0x07C6,11}, + {0x3E3C,14},{0x3E3F,14},{0x3E3E,14},{0x3E39,14}, + {0x3E38,14},{0x3E3B,14},{0x3E3A,14},{0x0008, 4}, + {0x001C, 5},{0x0002, 5},{0x003F, 6},{0x0035, 6}, + {0x0009, 4},{0x0001, 3},{0x000E, 7},{0x00F9, 8} + }, + { + {0x0004, 3},{0x000B, 4},{0x0001, 4},{0x000A, 4}, + {0x001E, 6},{0x00E0, 9},{0x0E1E,13},{0x0071, 8}, + {0x0039, 7},{0x0007, 3},{0x0006, 3},{0x000D, 5}, + {0x000C, 5},{0x0020, 7},{0x01C2,10},{0x1C3F,14}, + {0x1C3E,14},{0x0E19,13},{0x0E18,13},{0x0E1B,13}, + {0x0E1A,13},{0x0E1D,13},{0x0E1C,13},{0x0000, 4}, + {0x0009, 5},{0x001D, 6},{0x001F, 6},{0x0011, 6}, + {0x0005, 4},{0x0001, 3},{0x0043, 8},{0x0042, 8} + }, + { + {0x0004, 3},{0x000D, 4},{0x0007, 4},{0x0002, 3}, + {0x0014, 5},{0x016C, 9},{0x16D1,13},{0x02DF,10}, + {0x016E, 9},{0x0000, 2},{0x0007, 3},{0x002C, 6}, + {0x002B, 6},{0x02DE,10},{0x16D0,13},{0x16D3,13}, + {0x16D2,13},{0x2DB5,14},{0x2DB4,14},{0x2DB7,14}, + {0x2DB6,14},{0x16D9,13},{0x16D8,13},{0x000C, 5}, + {0x002A, 6},{0x005A, 7},{0x001B, 6},{0x001A, 6}, + {0x0017, 5},{0x000C, 4},{0x05B7,11},{0x05B5,11} + }, + { + {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4}, + {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14}, + {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7}, + {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14}, + {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14}, + {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9}, + {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8}, + {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14} + }, + { + {0x0000, 3},{0x0010, 5},{0x0072, 7},{0x0071, 7}, + {0x0154, 9},{0x0AAB,12},{0x0AA8,12},{0x0014, 5}, + {0x0070, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4}, + {0x000B, 4},{0x0003, 4},{0x0011, 5},{0x0073, 7}, + {0x0054, 7},{0x00AB, 8},{0x02AB,10},{0x1553,13}, + {0x1552,13},{0x1555,13},{0x1554,13},{0x000D, 4}, + {0x001E, 5},{0x0012, 5},{0x003E, 6},{0x002B, 6}, + {0x0002, 4},{0x003F, 6},{0x001D, 5},{0x0013, 5} + }, + { + {0x0003, 3},{0x001F, 5},{0x0029, 6},{0x003D, 6}, + {0x000C, 7},{0x0069,10},{0x0345,13},{0x0002, 5}, + {0x0028, 6},{0x0002, 3},{0x0001, 3},{0x000E, 4}, + {0x000C, 4},{0x0015, 5},{0x0007, 6},{0x001B, 8}, + {0x006B,10},{0x006A,10},{0x0344,13},{0x0347,13}, + {0x0346,13},{0x01A1,12},{0x01A0,12},{0x000B, 4}, + {0x001A, 5},{0x0012, 5},{0x0000, 5},{0x003C, 6}, + {0x0008, 4},{0x001B, 5},{0x0013, 5},{0x0001, 5} + }, + { + {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0014, 5}, + {0x0056, 7},{0x015C, 9},{0x15D5,13},{0x003C, 6}, + {0x002A, 6},{0x0000, 3},{0x0001, 3},{0x000E, 4}, + {0x000D, 4},{0x000C, 5},{0x00AF, 8},{0x02BB,10}, + {0x15D4,13},{0x15D7,13},{0x15D6,13},{0x15D1,13}, + {0x15D0,13},{0x15D3,13},{0x15D2,13},{0x000B, 4}, + {0x0019, 5},{0x000D, 5},{0x003E, 6},{0x0031, 6}, + {0x0007, 4},{0x0005, 4},{0x003D, 6},{0x0030, 6} + }, + { + {0x0005, 3},{0x0008, 4},{0x001A, 5},{0x0000, 4}, + {0x0036, 6},{0x0011, 8},{0x0106,12},{0x000A, 7}, + {0x006E, 7},{0x0002, 3},{0x0003, 3},{0x0003, 4}, + {0x0002, 4},{0x006F, 7},{0x0021, 9},{0x020F,13}, + {0x020E,13},{0x0101,12},{0x0100,12},{0x0103,12}, + {0x0102,12},{0x0105,12},{0x0104,12},{0x000C, 4}, + {0x001E, 5},{0x0003, 5},{0x003E, 6},{0x003F, 6}, + {0x0009, 4},{0x000E, 4},{0x000B, 7},{0x0009, 7} + }, + { + {0x0002, 3},{0x000E, 4},{0x001E, 5},{0x000C, 4}, + {0x001F, 5},{0x006E, 7},{0x00AD,10},{0x00AF,10}, + {0x0014, 7},{0x0004, 3},{0x0003, 3},{0x001A, 5}, + {0x0017, 5},{0x002A, 8},{0x0576,13},{0x0AEF,14}, + {0x0AEE,14},{0x0571,13},{0x0570,13},{0x0573,13}, + {0x0572,13},{0x0575,13},{0x0574,13},{0x0003, 4}, + {0x0016, 5},{0x0004, 5},{0x0036, 6},{0x000B, 6}, + {0x000A, 4},{0x0000, 3},{0x006F, 7},{0x00AC,10} + }, + { + {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3}, + {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13}, + {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6}, + {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13}, + {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13}, + {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3}, + {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8}, + {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14} + }, + { + {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3}, + {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13}, + {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6}, + {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13}, + {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13}, + {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3}, + {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8}, + {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14} + }, + { + {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3}, + {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13}, + {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6}, + {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13}, + {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13}, + {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3}, + {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8}, + {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14} + }, + { + {0x0003, 3},{0x0011, 5},{0x0020, 6},{0x0074, 7}, + {0x010D, 9},{0x0863,12},{0x0860,12},{0x000A, 5}, + {0x0075, 7},{0x0001, 3},{0x0000, 3},{0x000B, 4}, + {0x000A, 4},{0x0018, 5},{0x0038, 6},{0x0042, 7}, + {0x010F, 9},{0x010E, 9},{0x0219,10},{0x10C3,13}, + {0x10C2,13},{0x10C5,13},{0x10C4,13},{0x000F, 4}, + {0x0004, 4},{0x0019, 5},{0x000B, 5},{0x0039, 6}, + {0x0009, 4},{0x001B, 5},{0x001A, 5},{0x003B, 6} + }, + { + {0x0005, 3},{0x0001, 4},{0x003E, 6},{0x0001, 5}, + {0x00E2, 8},{0x1C6F,13},{0x38D9,14},{0x0039, 6}, + {0x001F, 6},{0x0002, 3},{0x0001, 3},{0x0009, 4}, + {0x0008, 4},{0x0000, 5},{0x0070, 7},{0x01C7, 9}, + {0x038C,10},{0x071A,11},{0x38D8,14},{0x38DB,14}, + {0x38DA,14},{0x38DD,14},{0x38DC,14},{0x000D, 4}, + {0x001D, 5},{0x000E, 5},{0x003F, 6},{0x003C, 6}, + {0x000C, 4},{0x0006, 4},{0x003D, 6},{0x001E, 6} + }, + { + {0x0006, 3},{0x000B, 4},{0x0011, 5},{0x001E, 5}, + {0x0074, 7},{0x03AA,10},{0x1D5C,13},{0x0001, 6}, + {0x0021, 6},{0x0001, 3},{0x0002, 3},{0x0007, 4}, + {0x0006, 4},{0x003E, 6},{0x00EB, 8},{0x01D4, 9}, + {0x0EAF,12},{0x3ABB,14},{0x3ABA,14},{0x1D59,13}, + {0x1D58,13},{0x1D5B,13},{0x1D5A,13},{0x000A, 4}, + {0x001C, 5},{0x0001, 5},{0x003F, 6},{0x003B, 6}, + {0x0001, 4},{0x0009, 4},{0x0020, 6},{0x0000, 6} + }, + { + {0x0004, 3},{0x000A, 4},{0x0017, 5},{0x0004, 4}, + {0x0016, 6},{0x016A, 9},{0x16B1,13},{0x0017, 7}, + {0x005B, 7},{0x0006, 3},{0x0007, 3},{0x0001, 4}, + {0x0000, 4},{0x000A, 6},{0x02D7,10},{0x0B5A,12}, + {0x16B0,13},{0x16B3,13},{0x16B2,13},{0x2D6D,14}, + {0x2D6C,14},{0x2D6F,14},{0x2D6E,14},{0x0006, 4}, + {0x000A, 5},{0x0004, 5},{0x002C, 6},{0x0017, 6}, + {0x0003, 4},{0x0007, 4},{0x0016, 7},{0x00B4, 8} + }, + { + {0x0005, 3},{0x000D, 4},{0x0005, 4},{0x0009, 4}, + {0x0033, 6},{0x0193, 9},{0x192C,13},{0x0061, 8}, + {0x0031, 7},{0x0000, 2},{0x0007, 3},{0x0010, 5}, + {0x0011, 5},{0x00C8, 8},{0x192F,13},{0x325B,14}, + {0x325A,14},{0x1929,13},{0x1928,13},{0x192B,13}, + {0x192A,13},{0x325D,14},{0x325C,14},{0x0018, 5}, + {0x001A, 6},{0x001B, 6},{0x0065, 7},{0x0019, 6}, + {0x0004, 4},{0x0007, 4},{0x0060, 8},{0x0324,10} + }, + { + {0x0006, 3},{0x0000, 3},{0x0002, 4},{0x000F, 4}, + {0x0039, 6},{0x01D9, 9},{0x1D82,13},{0x0761,11}, + {0x03BE,10},{0x0001, 2},{0x0002, 2},{0x000F, 6}, + {0x000E, 6},{0x0762,11},{0x3B07,14},{0x3B06,14}, + {0x3B1D,14},{0x3B1C,14},{0x3B1F,14},{0x3B1E,14}, + {0x3B19,14},{0x3B18,14},{0x3B1B,14},{0x0038, 6}, + {0x01DE, 9},{0x00ED, 8},{0x03BF,10},{0x00EE, 8}, + {0x003A, 6},{0x0006, 5},{0x0EC0,12},{0x3B1A,14} + }, + { + {0x0000, 2},{0x0002, 3},{0x000F, 5},{0x0006, 4}, + {0x001C, 6},{0x01D0,10},{0x0E8C,13},{0x1D1B,14}, + {0x1D1A,14},{0x0003, 2},{0x0002, 2},{0x00EA, 9}, + {0x00E9, 9},{0x0E89,13},{0x0E88,13},{0x0E8B,13}, + {0x0E8A,13},{0x1D65,14},{0x1D64,14},{0x1D67,14}, + {0x1D66,14},{0x1D61,14},{0x1D60,14},{0x03AD,11}, + {0x1D63,14},{0x1D62,14},{0x1D1D,14},{0x1D1C,14}, + {0x003B, 7},{0x01D7,10},{0x1D1F,14},{0x1D1E,14} + }, + { + {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4}, + {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14}, + {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7}, + {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14}, + {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14}, + {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9}, + {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8}, + {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14} + } +}; + + + +/*A description of a Huffman code value used when encoding the tree.*/ +typedef struct{ + /*The bit pattern, left-shifted so that the MSB of all patterns is + aligned.*/ + ogg_uint32_t pattern; + /*The amount the bit pattern was shifted.*/ + int shift; + /*The token this bit pattern represents.*/ + int token; +}oc_huff_entry; + + + +/*Compares two oc_huff_entry structures by their bit patterns. + _c1: The first entry to compare. + _c2: The second entry to compare. + Return: <0 if _c1<_c2, >0 if _c1>_c2.*/ +static int huff_entry_cmp(const void *_c1,const void *_c2){ + ogg_uint32_t b1; + ogg_uint32_t b2; + b1=((const oc_huff_entry *)_c1)->pattern; + b2=((const oc_huff_entry *)_c2)->pattern; + return b1b2?1:0; +} + +/*Encodes a description of the given Huffman tables. + Although the codes are stored in the encoder as flat arrays, in the bit + stream and in the decoder they are structured as a tree. + This function recovers the tree structure from the flat array and then + writes it out. + Note that the codes MUST form a Huffman code, and not merely a prefix-free + code, since the binary tree is assumed to be full. + _opb: The buffer to store the tree in. + _codes: The Huffman tables to pack. + Return: 0 on success, or a negative value if one of the given Huffman tables + does not form a full, prefix-free code.*/ +int oc_huff_codes_pack(oggpack_buffer *_opb, + const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){ + int i; + for(i=0;i>1)<<(maxlen+1>>1))-1; + /*Copy over the codes into our temporary workspace. + The bit patterns are aligned, and the original entry each code is from + is stored as well.*/ + for(j=0;jentries[j].shift;bpos--)oggpackB_write(_opb,0,1); + /*Mark this as a leaf node, and write its value.*/ + oggpackB_write(_opb,1,1); + oggpackB_write(_opb,entries[j].token,5); + /*For each 1 branch we've descended, back up the tree until we reach a + 0 branch.*/ + bit=1< +#include "internal.h" +#include "dct.h" + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + The first 8 entries are used (e.g., from a row of an 8x8 block).*/ +static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + /*0-1 butterfly.*/ + t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; + t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; + /*2-3 rotation by 6pi/16.*/ + t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); + t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); + /*4-7 rotation by 7pi/16.*/ + t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); + /*5-6 rotation by 3pi/16.*/ + t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); + t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); + t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); + /*Stage 2:*/ + /*4-5 butterfly.*/ + r=t[4]+t[5]; + t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; + t[4]=r; + /*7-6 butterfly.*/ + r=t[7]+t[6]; + t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; + t[7]=r; + /*Stage 3:*/ + /*0-3 butterfly.*/ + r=t[0]+t[3]; + t[3]=t[0]-t[3]; + t[0]=r; + /*1-2 butterfly.*/ + r=t[1]+t[2]; + t[2]=t[1]-t[2]; + t[1]=r; + /*6-5 butterfly.*/ + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + /*0-7 butterfly.*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + /*1-6 butterfly.*/ + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); + /*2-5 butterfly.*/ + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); + /*3-4 butterfly.*/ + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first 4 entries are used. + The other 4 are assumed to be 0.*/ +static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + t[0]=OC_C4S4*_x[0]>>16; + t[2]=OC_C6S2*_x[2]>>16; + t[3]=OC_C2S6*_x[2]>>16; + t[4]=OC_C7S1*_x[1]>>16; + t[5]=-(OC_C5S3*_x[3]>>16); + t[6]=OC_C3S5*_x[3]>>16; + t[7]=OC_C1S7*_x[1]>>16; + /*Stage 2:*/ + r=t[4]+t[5]; + t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; + t[4]=r; + r=t[7]+t[6]; + t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; + t[7]=r; + /*Stage 3:*/ + t[1]=t[0]+t[2]; + t[2]=t[0]-t[2]; + r=t[0]+t[3]; + t[3]=t[0]-t[3]; + t[0]=r; + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first 3 entries are used. + The other 5 are assumed to be 0.*/ +static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + t[0]=OC_C4S4*_x[0]>>16; + t[2]=OC_C6S2*_x[2]>>16; + t[3]=OC_C2S6*_x[2]>>16; + t[4]=OC_C7S1*_x[1]>>16; + t[7]=OC_C1S7*_x[1]>>16; + /*Stage 2:*/ + t[5]=OC_C4S4*t[4]>>16; + t[6]=OC_C4S4*t[7]>>16; + /*Stage 3:*/ + t[1]=t[0]+t[2]; + t[2]=t[0]-t[2]; + r=t[0]+t[3]; + t[3]=t[0]-t[3]; + t[0]=r; + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first 2 entries are used. + The other 6 are assumed to be 0.*/ +static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ + ogg_int32_t t[8]; + ogg_int32_t r; + /*Stage 1:*/ + t[0]=OC_C4S4*_x[0]>>16; + t[4]=OC_C7S1*_x[1]>>16; + t[7]=OC_C1S7*_x[1]>>16; + /*Stage 2:*/ + t[5]=OC_C4S4*t[4]>>16; + t[6]=OC_C4S4*t[7]>>16; + /*Stage 3:*/ + r=t[6]+t[5]; + t[5]=t[6]-t[5]; + t[6]=r; + /*Stage 4:*/ + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); + _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); + _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); + _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); + _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); + _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); + _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); +} + +/*Performs an inverse 8 point Type-II DCT transform. + The output is scaled by a factor of 2 relative to the orthonormal version of + the transform. + _y: The buffer to store the result in. + Data will be placed in every 8th entry (e.g., in a column of an 8x8 + block). + _x: The input coefficients. + Only the first entry is used. + The other 7 are assumed to be 0.*/ +static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ + _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= + _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + All coefficients but the first 3 in zig-zag scan order are assumed to be 0: + x x 0 0 0 0 0 0 + x 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_3(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + idct8_2(w,_x); + idct8_1(w+1,_x+8); + /*Transform rows of w into columns of y.*/ + for(in=w,out=_y,end=out+8;out>4); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + All coefficients but the first 10 in zig-zag scan order are assumed to be 0: + x x x x 0 0 0 0 + x x x 0 0 0 0 0 + x x 0 0 0 0 0 0 + x 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_10(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + idct8_4(w,_x); + idct8_3(w+1,_x+8); + idct8_2(w+2,_x+16); + idct8_1(w+3,_x+24); + /*Transform rows of w into columns of y.*/ + for(in=w,out=_y,end=out+8;out>4); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform. + _y: The buffer to store the result in. + This may be the same as _x. + _x: The input coefficients.*/ +static void oc_idct8x8_slow(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + const ogg_int16_t *in; + ogg_int16_t *end; + ogg_int16_t *out; + ogg_int16_t w[64]; + /*Transform rows of x into columns of w.*/ + for(in=_x,out=w,end=out+8;out>4); +} + +void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64], + int _last_zzi){ + (*_state->opt_vtable.idct8x8)(_y,_last_zzi); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Then perform the iDCT.*/ + if(_last_zzi<3)oc_idct8x8_3(_y,_y); + else if(_last_zzi<10)oc_idct8x8_10(_y,_y); + else oc_idct8x8_slow(_y,_y); +} diff --git a/thirdparty/libtheora/info.c b/thirdparty/libtheora/info.c new file mode 100644 index 0000000000..6b9762978b --- /dev/null +++ b/thirdparty/libtheora/info.c @@ -0,0 +1,131 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: info.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "internal.h" + + + +/*This is more or less the same as strncasecmp, but that doesn't exist + everywhere, and this is a fairly trivial function, so we include it. + Note: We take advantage of the fact that we know _n is less than or equal to + the length of at least one of the strings.*/ +static int oc_tagcompare(const char *_s1,const char *_s2,int _n){ + int c; + for(c=0;c<_n;c++){ + if(toupper(_s1[c])!=toupper(_s2[c]))return !0; + } + return _s1[c]!='='; +} + + + +void th_info_init(th_info *_info){ + memset(_info,0,sizeof(*_info)); + _info->version_major=TH_VERSION_MAJOR; + _info->version_minor=TH_VERSION_MINOR; + _info->version_subminor=TH_VERSION_SUB; + _info->keyframe_granule_shift=6; +} + +void th_info_clear(th_info *_info){ + memset(_info,0,sizeof(*_info)); +} + + + +void th_comment_init(th_comment *_tc){ + memset(_tc,0,sizeof(*_tc)); +} + +void th_comment_add(th_comment *_tc,char *_comment){ + char **user_comments; + int *comment_lengths; + int comment_len; + user_comments=_ogg_realloc(_tc->user_comments, + (_tc->comments+2)*sizeof(*_tc->user_comments)); + if(user_comments==NULL)return; + _tc->user_comments=user_comments; + comment_lengths=_ogg_realloc(_tc->comment_lengths, + (_tc->comments+2)*sizeof(*_tc->comment_lengths)); + if(comment_lengths==NULL)return; + _tc->comment_lengths=comment_lengths; + comment_len=strlen(_comment); + comment_lengths[_tc->comments]=comment_len; + user_comments[_tc->comments]=_ogg_malloc(comment_len+1); + if(user_comments[_tc->comments]==NULL)return; + memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1); + _tc->comments++; + _tc->user_comments[_tc->comments]=NULL; +} + +void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){ + char *comment; + int tag_len; + int val_len; + tag_len=strlen(_tag); + val_len=strlen(_val); + /*+2 for '=' and '\0'.*/ + comment=_ogg_malloc(tag_len+val_len+2); + if(comment==NULL)return; + memcpy(comment,_tag,tag_len); + comment[tag_len]='='; + memcpy(comment+tag_len+1,_val,val_len+1); + th_comment_add(_tc,comment); + _ogg_free(comment); +} + +char *th_comment_query(th_comment *_tc,char *_tag,int _count){ + long i; + int found; + int tag_len; + tag_len=strlen(_tag); + found=0; + for(i=0;i<_tc->comments;i++){ + if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){ + /*We return a pointer to the data, not a copy.*/ + if(_count==found++)return _tc->user_comments[i]+tag_len+1; + } + } + /*Didn't find anything.*/ + return NULL; +} + +int th_comment_query_count(th_comment *_tc,char *_tag){ + long i; + int tag_len; + int count; + tag_len=strlen(_tag); + count=0; + for(i=0;i<_tc->comments;i++){ + if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++; + } + return count; +} + +void th_comment_clear(th_comment *_tc){ + if(_tc!=NULL){ + long i; + for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]); + _ogg_free(_tc->user_comments); + _ogg_free(_tc->comment_lengths); + _ogg_free(_tc->vendor); + memset(_tc,0,sizeof(*_tc)); + } +} diff --git a/thirdparty/libtheora/internal.c b/thirdparty/libtheora/internal.c new file mode 100644 index 0000000000..0fe4f63e72 --- /dev/null +++ b/thirdparty/libtheora/internal.c @@ -0,0 +1,262 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: internal.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "internal.h" + + + +/*A map from the index in the zig zag scan to the coefficient number in a + block. + All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs + past the end of a block in bogus streams get mapped to a known location.*/ +const unsigned char OC_FZIG_ZAG[128]={ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 +}; + +/*A map from the coefficient number in a block to its index in the zig zag + scan.*/ +const unsigned char OC_IZIG_ZAG[64]={ + 0, 1, 5, 6,14,15,27,28, + 2, 4, 7,13,16,26,29,42, + 3, 8,12,17,25,30,41,43, + 9,11,18,24,31,40,44,53, + 10,19,23,32,39,45,52,54, + 20,22,33,38,46,51,55,60, + 21,34,37,47,50,56,59,61, + 35,36,48,49,57,58,62,63 +}; + +/*A map from physical macro block ordering to bitstream macro block + ordering within a super block.*/ +const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}}; + +/*A list of the indices in the oc_mb.map array that can be valid for each of + the various chroma decimation types.*/ +const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={ + {0,1,2,3,4,8}, + {0,1,2,3,4,5,8,9}, + {0,1,2,3,4,6,8,10}, + {0,1,2,3,4,5,6,7,8,9,10,11} +}; + +/*The number of indices in the oc_mb.map array that can be valid for each of + the various chroma decimation types.*/ +const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12}; + +/*The number of extra bits that are coded with each of the DCT tokens. + Each DCT token has some fixed number of additional bits (possibly 0) stored + after the token itself, containing, for example, coefficient magnitude, + sign bits, etc.*/ +const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={ + 0,0,0,2,3,4,12,3,6, + 0,0,0,0, + 1,1,1,1,2,3,4,5,6,10, + 1,1,1,1,1,3,4, + 2,3 +}; + + + +int oc_ilog(unsigned _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + + + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with chroma decimated in the X and Y directions + (4:2:0). + _cbmvs: The chroma block-level motion vectors to fill in. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + int dx; + int dy; + dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0]; + dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1]; + _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2); + _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2); +} + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with chroma decimated in the Y direction. + _cbmvs: The chroma block-level motion vectors to fill in. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + int dx; + int dy; + dx=_lbmvs[0][0]+_lbmvs[2][0]; + dy=_lbmvs[0][1]+_lbmvs[2][1]; + _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); + dx=_lbmvs[1][0]+_lbmvs[3][0]; + dy=_lbmvs[1][1]+_lbmvs[3][1]; + _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); +} + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with chroma decimated in the X direction (4:2:2). + _cbmvs: The chroma block-level motion vectors to fill in. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + int dx; + int dy; + dx=_lbmvs[0][0]+_lbmvs[1][0]; + dy=_lbmvs[0][1]+_lbmvs[1][1]; + _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); + dx=_lbmvs[2][0]+_lbmvs[3][0]; + dy=_lbmvs[2][1]+_lbmvs[3][1]; + _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1); + _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1); +} + +/*The function used to fill in the chroma plane motion vectors for a macro + block when 4 different motion vectors are specified in the luma plane. + This version is for use with no chroma decimation (4:4:4). + _cbmvs: The chroma block-level motion vectors to fill in. + _lmbmv: The luma macro-block level motion vector to fill in for use in + prediction. + _lbmvs: The luma block-level motion vectors.*/ +static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ + memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0])); +} + +/*A table of functions used to fill in the chroma plane motion vectors for a + macro block when 4 different motion vectors are specified in the luma + plane.*/ +const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ + (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, + (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, + (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, + (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 +}; + + + +void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){ + size_t rowsz; + size_t colsz; + size_t datsz; + char *ret; + colsz=_height*sizeof(void *); + rowsz=_sz*_width; + datsz=rowsz*_height; + /*Alloc array and row pointers.*/ + ret=(char *)_ogg_malloc(datsz+colsz); + if(ret==NULL)return NULL; + /*Initialize the array.*/ + if(ret!=NULL){ + size_t i; + void **p; + char *datptr; + p=(void **)ret; + i=_height; + for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr; + } + return (void **)ret; +} + +void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){ + size_t colsz; + size_t rowsz; + size_t datsz; + char *ret; + colsz=_height*sizeof(void *); + rowsz=_sz*_width; + datsz=rowsz*_height; + /*Alloc array and row pointers.*/ + ret=(char *)_ogg_calloc(datsz+colsz,1); + if(ret==NULL)return NULL; + /*Initialize the array.*/ + if(ret!=NULL){ + size_t i; + void **p; + char *datptr; + p=(void **)ret; + i=_height; + for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr; + } + return (void **)ret; +} + +void oc_free_2d(void *_ptr){ + _ogg_free(_ptr); +} + +/*Fills in a Y'CbCr buffer with a pointer to the image data in the first + buffer, but with the opposite vertical orientation. + _dst: The destination buffer. + This can be the same as _src. + _src: The source buffer.*/ +void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, + const th_ycbcr_buffer _src){ + int pli; + for(pli=0;pli<3;pli++){ + _dst[pli].width=_src[pli].width; + _dst[pli].height=_src[pli].height; + _dst[pli].stride=-_src[pli].stride; + _dst[pli].data=_src[pli].data + +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride; + } +} + +const char *th_version_string(void){ + return OC_VENDOR_STRING; +} + +ogg_uint32_t th_version_number(void){ + return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB; +} + +/*Determines the packet type. + Note that this correctly interprets a 0-byte packet as a video data packet. + Return: 1 for a header packet, 0 for a data packet.*/ +int th_packet_isheader(ogg_packet *_op){ + return _op->bytes>0?_op->packet[0]>>7:0; +} + +/*Determines the frame type of a video data packet. + Note that this correctly interprets a 0-byte packet as a delta frame. + Return: 1 for a key frame, 0 for a delta frame, and -1 for a header + packet.*/ +int th_packet_iskeyframe(ogg_packet *_op){ + return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40); +} diff --git a/thirdparty/libtheora/internal.h b/thirdparty/libtheora/internal.h new file mode 100644 index 0000000000..d81263e13e --- /dev/null +++ b/thirdparty/libtheora/internal.h @@ -0,0 +1,509 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: internal.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#if !defined(_internal_H) +# define _internal_H (1) +# include +# include +# if defined(HAVE_CONFIG_H) +# include +# endif +# include "theora/codec.h" +# include "theora/theora.h" + +# if defined(_MSC_VER) +/*Disable missing EMMS warnings.*/ +# pragma warning(disable:4799) +/*Thank you Microsoft, I know the order of operations.*/ +# pragma warning(disable:4554) +# endif +/*You, too, gcc.*/ +# if defined(__GNUC_PREREQ) +# if __GNUC_PREREQ(4,2) +# pragma GCC diagnostic ignored "-Wparentheses" +# endif +# endif + +# include "ocintrin.h" +# include "huffman.h" +# include "quant.h" + +/*Some assembly constructs require aligned operands.*/ +# if defined(OC_X86_ASM) +# if defined(__GNUC__) +# define OC_ALIGN8(expr) expr __attribute__((aligned(8))) +# define OC_ALIGN16(expr) expr __attribute__((aligned(16))) +# elif defined(_MSC_VER) +# define OC_ALIGN8(expr) __declspec (align(8)) expr +# define OC_ALIGN16(expr) __declspec (align(16)) expr +# endif +# endif +# if !defined(OC_ALIGN8) +# define OC_ALIGN8(expr) expr +# endif +# if !defined(OC_ALIGN16) +# define OC_ALIGN16(expr) expr +# endif + + + +typedef struct oc_sb_flags oc_sb_flags; +typedef struct oc_border_info oc_border_info; +typedef struct oc_fragment oc_fragment; +typedef struct oc_fragment_plane oc_fragment_plane; +typedef struct oc_base_opt_vtable oc_base_opt_vtable; +typedef struct oc_base_opt_data oc_base_opt_data; +typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable; +typedef struct oc_theora_state oc_theora_state; + + + +/*This library's version.*/ +# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1 20090822 (Thusnelda)" + +/*Theora bitstream version.*/ +# define TH_VERSION_MAJOR (3) +# define TH_VERSION_MINOR (2) +# define TH_VERSION_SUB (1) +# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ + ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \ + ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \ + (_info)->version_subminor>=(_sub))) + +/*A keyframe.*/ +#define OC_INTRA_FRAME (0) +/*A predicted frame.*/ +#define OC_INTER_FRAME (1) +/*A frame of unknown type (frame type decision has not yet been made).*/ +#define OC_UNKWN_FRAME (-1) + +/*The amount of padding to add to the reconstructed frame buffers on all + sides. + This is used to allow unrestricted motion vectors without special casing. + This must be a multiple of 2.*/ +#define OC_UMV_PADDING (16) + +/*Frame classification indices.*/ +/*The previous golden frame.*/ +#define OC_FRAME_GOLD (0) +/*The previous frame.*/ +#define OC_FRAME_PREV (1) +/*The current frame.*/ +#define OC_FRAME_SELF (2) + +/*The input or output buffer.*/ +#define OC_FRAME_IO (3) + +/*Macroblock modes.*/ +/*Macro block is invalid: It is never coded.*/ +#define OC_MODE_INVALID (-1) +/*Encoded difference from the same macro block in the previous frame.*/ +#define OC_MODE_INTER_NOMV (0) +/*Encoded with no motion compensated prediction.*/ +#define OC_MODE_INTRA (1) +/*Encoded difference from the previous frame offset by the given motion + vector.*/ +#define OC_MODE_INTER_MV (2) +/*Encoded difference from the previous frame offset by the last coded motion + vector.*/ +#define OC_MODE_INTER_MV_LAST (3) +/*Encoded difference from the previous frame offset by the second to last + coded motion vector.*/ +#define OC_MODE_INTER_MV_LAST2 (4) +/*Encoded difference from the same macro block in the previous golden + frame.*/ +#define OC_MODE_GOLDEN_NOMV (5) +/*Encoded difference from the previous golden frame offset by the given motion + vector.*/ +#define OC_MODE_GOLDEN_MV (6) +/*Encoded difference from the previous frame offset by the individual motion + vectors given for each block.*/ +#define OC_MODE_INTER_MV_FOUR (7) +/*The number of (coded) modes.*/ +#define OC_NMODES (8) + +/*Determines the reference frame used for a given MB mode.*/ +#define OC_FRAME_FOR_MODE(_x) \ + OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \ + OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x)) + +/*Constants for the packet state machine common between encoder and decoder.*/ + +/*Next packet to emit/read: Codec info header.*/ +#define OC_PACKET_INFO_HDR (-3) +/*Next packet to emit/read: Comment header.*/ +#define OC_PACKET_COMMENT_HDR (-2) +/*Next packet to emit/read: Codec setup header.*/ +#define OC_PACKET_SETUP_HDR (-1) +/*No more packets to emit/read.*/ +#define OC_PACKET_DONE (INT_MAX) + + + +/*Super blocks are 32x32 segments of pixels in a single color plane indexed + in image order. + Internally, super blocks are broken up into four quadrants, each of which + contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels. + Quadrants, and the blocks within them, are indexed in a special order called + a "Hilbert curve" within the super block. + + In order to differentiate between the Hilbert-curve indexing strategy and + the regular image order indexing strategy, blocks indexed in image order + are called "fragments". + Fragments are indexed in image order, left to right, then bottom to top, + from Y' plane to Cb plane to Cr plane. + + The co-located fragments in all image planes corresponding to the location + of a single quadrant of a luma plane super block form a macro block. + Thus there is only a single set of macro blocks for all planes, each of which + contains between 6 and 12 fragments, depending on the pixel format. + Therefore macro block information is kept in a separate set of arrays from + super blocks to avoid unused space in the other planes. + The lists are indexed in super block order. + That is, the macro block corresponding to the macro block mbi in (luma plane) + super block sbi is at index (sbi<<2|mbi). + Thus the number of macro blocks in each dimension is always twice the number + of super blocks, even when only an odd number fall inside the coded frame. + These "extra" macro blocks are just an artifact of our internal data layout, + and not part of the coded stream; they are flagged with a negative MB mode.*/ + + + +/*A single quadrant of the map from a super block to fragment numbers.*/ +typedef ptrdiff_t oc_sb_map_quad[4]; +/*A map from a super block to fragment numbers.*/ +typedef oc_sb_map_quad oc_sb_map[4]; +/*A single plane of the map from a macro block to fragment numbers.*/ +typedef ptrdiff_t oc_mb_map_plane[4]; +/*A map from a macro block to fragment numbers.*/ +typedef oc_mb_map_plane oc_mb_map[3]; +/*A motion vector.*/ +typedef signed char oc_mv[2]; + + + +/*Super block information.*/ +struct oc_sb_flags{ + unsigned char coded_fully:1; + unsigned char coded_partially:1; + unsigned char quad_valid:4; +}; + + + +/*Information about a fragment which intersects the border of the displayable + region. + This marks which pixels belong to the displayable region.*/ +struct oc_border_info{ + /*A bit mask marking which pixels are in the displayable region. + Pixel (x,y) corresponds to bit (y<<3|x).*/ + ogg_int64_t mask; + /*The number of pixels in the displayable region. + This is always positive, and always less than 64.*/ + int npixels; +}; + + + +/*Fragment information.*/ +struct oc_fragment{ + /*A flag indicating whether or not this fragment is coded.*/ + unsigned coded:1; + /*A flag indicating that this entire fragment lies outside the displayable + region of the frame. + Note the contrast with an invalid macro block, which is outside the coded + frame, not just the displayable one. + There are no fragments outside the coded frame by construction.*/ + unsigned invalid:1; + /*The index of the quality index used for this fragment's AC coefficients.*/ + unsigned qii:6; + /*The mode of the macroblock this fragment belongs to.*/ + unsigned mb_mode:3; + /*The index of the associated border information for fragments which lie + partially outside the displayable region. + For fragments completely inside or outside this region, this is -1. + Note that the C standard requires an explicit signed keyword for bitfield + types, since some compilers may treat them as unsigned without it.*/ + signed int borderi:5; + /*The prediction-corrected DC component. + Note that the C standard requires an explicit signed keyword for bitfield + types, since some compilers may treat them as unsigned without it.*/ + signed int dc:16; +}; + + + +/*A description of each fragment plane.*/ +struct oc_fragment_plane{ + /*The number of fragments in the horizontal direction.*/ + int nhfrags; + /*The number of fragments in the vertical direction.*/ + int nvfrags; + /*The offset of the first fragment in the plane.*/ + ptrdiff_t froffset; + /*The total number of fragments in the plane.*/ + ptrdiff_t nfrags; + /*The number of super blocks in the horizontal direction.*/ + unsigned nhsbs; + /*The number of super blocks in the vertical direction.*/ + unsigned nvsbs; + /*The offset of the first super block in the plane.*/ + unsigned sboffset; + /*The total number of super blocks in the plane.*/ + unsigned nsbs; +}; + + + +/*The shared (encoder and decoder) functions that have accelerated variants.*/ +struct oc_base_opt_vtable{ + void (*frag_copy)(unsigned char *_dst, + const unsigned char *_src,int _ystride); + void (*frag_recon_intra)(unsigned char *_dst,int _ystride, + const ogg_int16_t _residue[64]); + void (*frag_recon_inter)(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); + void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); + void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi); + void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); + void (*state_frag_copy_list)(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); + void (*state_loop_filter_frag_rows)(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); + void (*restore_fpu)(void); +}; + +/*The shared (encoder and decoder) tables that vary according to which variants + of the above functions are used.*/ +struct oc_base_opt_data{ + const unsigned char *dct_fzig_zag; +}; + + +/*State information common to both the encoder and decoder.*/ +struct oc_theora_state{ + /*The stream information.*/ + th_info info; + /*Table for shared accelerated functions.*/ + oc_base_opt_vtable opt_vtable; + /*Table for shared data used by accelerated functions.*/ + oc_base_opt_data opt_data; + /*CPU flags to detect the presence of extended instruction sets.*/ + ogg_uint32_t cpu_flags; + /*The fragment plane descriptions.*/ + oc_fragment_plane fplanes[3]; + /*The list of fragments, indexed in image order.*/ + oc_fragment *frags; + /*The the offset into the reference frame buffer to the upper-left pixel of + each fragment.*/ + ptrdiff_t *frag_buf_offs; + /*The motion vector for each fragment.*/ + oc_mv *frag_mvs; + /*The total number of fragments in a single frame.*/ + ptrdiff_t nfrags; + /*The list of super block maps, indexed in image order.*/ + oc_sb_map *sb_maps; + /*The list of super block flags, indexed in image order.*/ + oc_sb_flags *sb_flags; + /*The total number of super blocks in a single frame.*/ + unsigned nsbs; + /*The fragments from each color plane that belong to each macro block. + Fragments are stored in image order (left to right then top to bottom). + When chroma components are decimated, the extra fragments have an index of + -1.*/ + oc_mb_map *mb_maps; + /*The list of macro block modes. + A negative number indicates the macro block lies entirely outside the + coded frame.*/ + signed char *mb_modes; + /*The number of macro blocks in the X direction.*/ + unsigned nhmbs; + /*The number of macro blocks in the Y direction.*/ + unsigned nvmbs; + /*The total number of macro blocks.*/ + size_t nmbs; + /*The list of coded fragments, in coded order. + Uncoded fragments are stored in reverse order from the end of the list.*/ + ptrdiff_t *coded_fragis; + /*The number of coded fragments in each plane.*/ + ptrdiff_t ncoded_fragis[3]; + /*The total number of coded fragments.*/ + ptrdiff_t ntotal_coded_fragis; + /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ + int ref_frame_idx[4]; + /*The actual buffers used for the previously decoded frames.*/ + th_ycbcr_buffer ref_frame_bufs[4]; + /*The storage for the reference frame buffers.*/ + unsigned char *ref_frame_data[4]; + /*The strides for each plane in the reference frames.*/ + int ref_ystride[3]; + /*The number of unique border patterns.*/ + int nborders; + /*The unique border patterns for all border fragments. + The borderi field of fragments which straddle the border indexes this + list.*/ + oc_border_info borders[16]; + /*The frame number of the last keyframe.*/ + ogg_int64_t keyframe_num; + /*The frame number of the current frame.*/ + ogg_int64_t curframe_num; + /*The granpos of the current frame.*/ + ogg_int64_t granpos; + /*The type of the current frame.*/ + unsigned char frame_type; + /*The bias to add to the frame count when computing granule positions.*/ + unsigned char granpos_bias; + /*The number of quality indices used in the current frame.*/ + unsigned char nqis; + /*The quality indices of the current frame.*/ + unsigned char qis[3]; + /*The dequantization tables, stored in zig-zag order, and indexed by + qi, pli, qti, and zzi.*/ + ogg_uint16_t *dequant_tables[64][3][2]; + OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]); + /*Loop filter strength parameters.*/ + unsigned char loop_filter_limits[64]; +}; + + + +/*The function type used to fill in the chroma plane motion vectors for a + macro block when 4 different motion vectors are specified in the luma + plane. + _cbmvs: The chroma block-level motion vectors to fill in. + _lmbmv: The luma macro-block level motion vector to fill in for use in + prediction. + _lbmvs: The luma block-level motion vectors.*/ +typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]); + + + +/*A map from the index in the zig zag scan to the coefficient number in a + block.*/ +extern const unsigned char OC_FZIG_ZAG[128]; +/*A map from the coefficient number in a block to its index in the zig zag + scan.*/ +extern const unsigned char OC_IZIG_ZAG[64]; +/*A map from physical macro block ordering to bitstream macro block + ordering within a super block.*/ +extern const unsigned char OC_MB_MAP[2][2]; +/*A list of the indices in the oc_mb_map array that can be valid for each of + the various chroma decimation types.*/ +extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]; +/*The number of indices in the oc_mb_map array that can be valid for each of + the various chroma decimation types.*/ +extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]; +/*A table of functions used to fill in the Cb,Cr plane motion vectors for a + macro block when 4 different motion vectors are specified in the luma + plane.*/ +extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]; + + + +int oc_ilog(unsigned _v); +void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz); +void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz); +void oc_free_2d(void *_ptr); + +void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst, + const th_ycbcr_buffer _src); + +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs); +void oc_state_clear(oc_theora_state *_state); +void oc_state_vtable_init_c(oc_theora_state *_state); +void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, + int _y0,int _yend); +void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli); +void oc_state_borders_fill(oc_theora_state *_state,int _refi); +void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx, + th_ycbcr_buffer _img); +int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby); +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], + int _pli,int _dx,int _dy); + +int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv); +void oc_state_loop_filter(oc_theora_state *_state,int _frame); +#if defined(OC_DUMP_IMAGES) +int oc_state_dump_frame(const oc_theora_state *_state,int _frame, + const char *_suf); +#endif + +/*Shared accelerated functions.*/ +void oc_frag_copy(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride); +void oc_frag_recon_intra(const oc_theora_state *_state, + unsigned char *_dst,int _dst_ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter2(const oc_theora_state *_state, + unsigned char *_dst,const unsigned char *_src1,const unsigned char *_src2, + int _ystride,const ogg_int16_t _residue[64]); +void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu(const oc_theora_state *_state); + +/*Default pure-C implementations.*/ +void oc_frag_copy_c(unsigned char *_dst, + const unsigned char *_src,int _src_ystride); +void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride, + const ogg_int16_t _residue[64]); +void oc_frag_recon_inter_c(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]); +void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]); +void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_c(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu_c(void); + +/*We need a way to call a few encoder functions without introducing a link-time + dependency into the decoder, while still allowing the old alpha API which + does not distinguish between encoder and decoder objects to be used. + We do this by placing a function table at the start of the encoder object + which can dispatch into the encoder library. + We do a similar thing for the decoder in case we ever decide to split off a + common base library.*/ +typedef void (*oc_state_clear_func)(theora_state *_th); +typedef int (*oc_state_control_func)(theora_state *th,int _req, + void *_buf,size_t _buf_sz); +typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th, + ogg_int64_t _granulepos); +typedef double (*oc_state_granule_time_func)(theora_state *_th, + ogg_int64_t _granulepos); + + +struct oc_state_dispatch_vtable{ + oc_state_clear_func clear; + oc_state_control_func control; + oc_state_granule_frame_func granule_frame; + oc_state_granule_time_func granule_time; +}; + +#endif diff --git a/thirdparty/libtheora/mathops.c b/thirdparty/libtheora/mathops.c new file mode 100644 index 0000000000..d3fb909194 --- /dev/null +++ b/thirdparty/libtheora/mathops.c @@ -0,0 +1,296 @@ +#include "mathops.h" +#include + +/*The fastest fallback strategy for platforms with fast multiplication appears + to be based on de Bruijn sequences~\cite{LP98}. + Tests confirmed this to be true even on an ARM11, where it is actually faster + than using the native clz instruction. + Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where + multiplication or table lookups are too expensive. + + @UNPUBLISHED{LP98, + author="Charles E. Leiserson and Harald Prokop", + title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word", + month=Jun, + year=1998, + note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" + }*/ +#if !defined(OC_ILOG_NODEBRUIJN)&& \ + !defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL +static const unsigned char OC_DEBRUIJN_IDX32[32]={ + 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, + 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 +}; +#endif + +int oc_ilog32(ogg_uint32_t _v){ +#if defined(OC_CLZ32) + return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v; +#else +/*On a Pentium M, this branchless version tested as the fastest version without + multiplications on 1,000,000,000 random 32-bit integers, edging out a + similar version with branches, and a 256-entry LUT version.*/ +# if defined(OC_ILOG_NODEBRUIJN) + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFU)<<4; + _v>>=m; + ret|=m; + m=(_v>0xFFU)<<3; + _v>>=m; + ret|=m; + m=(_v>0xFU)<<2; + _v>>=m; + ret|=m; + m=(_v>3)<<1; + _v>>=m; + ret|=m; + ret+=_v>1; + return ret; +/*This de Bruijn sequence version is faster if you have a fast multiplier.*/ +# else + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v=(_v>>1)+1; + ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; + return ret; +# endif +#endif +} + +int oc_ilog64(ogg_int64_t _v){ +#if defined(OC_CLZ64) + return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v; +#else +# if defined(OC_ILOG_NODEBRUIJN) + ogg_uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(ogg_uint32_t)(_v>>m); + ret|=m; + m=(v>0xFFFFU)<<4; + v>>=m; + ret|=m; + m=(v>0xFFU)<<3; + v>>=m; + ret|=m; + m=(v>0xFU)<<2; + v>>=m; + ret|=m; + m=(v>3)<<1; + v>>=m; + ret|=m; + ret+=v>1; + return ret; +# else +/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ +# if LONG_MAX<9223372036854775807LL + ogg_uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(ogg_uint32_t)(_v>>m); + ret|=m; + v|=v>>1; + v|=v>>2; + v|=v>>4; + v|=v>>8; + v|=v>>16; + v=(v>>1)+1; + ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; + return ret; +/*Otherwise do it in one 64-bit operation.*/ +# else + static const unsigned char OC_DEBRUIJN_IDX64[64]={ + 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, + 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, + 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, + 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 + }; + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v|=_v>>32; + _v=(_v>>1)+1; + ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; + return ret; +# endif +# endif +#endif +} + +/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/ +static const ogg_int64_t OC_ATANH_LOG2[32]={ + 0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL, + 0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL, + 0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL, + 0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL, + 0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL, + 0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL, + 0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL, + 0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL, + 0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL, + 0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL, + 0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL +}; + +/*Computes the binary exponential of _z, a log base 2 in Q57 format.*/ +ogg_int64_t oc_bexp64(ogg_int64_t _z){ + ogg_int64_t w; + ogg_int64_t z; + int ipart; + ipart=(int)(_z>>57); + if(ipart<0)return 0; + if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL; + z=_z-OC_Q57(ipart); + if(z){ + ogg_int64_t mask; + long wlo; + int i; + /*C doesn't give us 64x64->128 muls, so we use CORDIC. + This is not particularly fast, but it's not being used in time-critical + code; it is very accurate.*/ + /*z is the fractional part of the log in Q62 format. + We need 1 bit of headroom since the magnitude can get larger than 1 + during the iteration, and a sign bit.*/ + z<<=5; + /*w is the exponential in Q61 format (since it also needs headroom and can + get as large as 2.0); we could get another bit if we dropped the sign, + but we'll recover that bit later anyway. + Ideally this should start out as + \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}} + but in order to guarantee convergence we have to repeat iterations 4, + 13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/ + w=0x26A3D0E401DD846DLL; + for(i=0;;i++){ + mask=-(z<0); + w+=(w>>i+1)+mask^mask; + z-=OC_ATANH_LOG2[i]+mask^mask; + /*Repeat iteration 4.*/ + if(i>=3)break; + z<<=1; + } + for(;;i++){ + mask=-(z<0); + w+=(w>>i+1)+mask^mask; + z-=OC_ATANH_LOG2[i]+mask^mask; + /*Repeat iteration 13.*/ + if(i>=12)break; + z<<=1; + } + for(;i<32;i++){ + mask=-(z<0); + w+=(w>>i+1)+mask^mask; + z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1; + } + wlo=0; + /*Skip the remaining iterations unless we really require that much + precision. + We could have bailed out earlier for smaller iparts, but that would + require initializing w from a table, as the limit doesn't converge to + 61-bit precision until n=30.*/ + if(ipart>30){ + /*For these iterations, we just update the low bits, as the high bits + can't possibly be affected. + OC_ATANH_LOG2 has also converged (it actually did so one iteration + earlier, but that's no reason for an extra special case).*/ + for(;;i++){ + mask=-(z<0); + wlo+=(w>>i)+mask^mask; + z-=OC_ATANH_LOG2[31]+mask^mask; + /*Repeat iteration 40.*/ + if(i>=39)break; + z<<=1; + } + for(;i<61;i++){ + mask=-(z<0); + wlo+=(w>>i)+mask^mask; + z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1; + } + } + w=(w<<1)+wlo; + } + else w=(ogg_int64_t)1<<62; + if(ipart<62)w=(w>>61-ipart)+1>>1; + return w; +} + +/*Computes the binary logarithm of _w, returned in Q57 format.*/ +ogg_int64_t oc_blog64(ogg_int64_t _w){ + ogg_int64_t z; + int ipart; + if(_w<=0)return -1; + ipart=OC_ILOGNZ_64(_w)-1; + if(ipart>61)_w>>=ipart-61; + else _w<<=61-ipart; + z=0; + if(_w&_w-1){ + ogg_int64_t x; + ogg_int64_t y; + ogg_int64_t u; + ogg_int64_t mask; + int i; + /*C doesn't give us 64x64->128 muls, so we use CORDIC. + This is not particularly fast, but it's not being used in time-critical + code; it is very accurate.*/ + /*z is the fractional part of the log in Q61 format.*/ + /*x and y are the cosh() and sinh(), respectively, in Q61 format. + We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/ + x=_w+((ogg_int64_t)1<<61); + y=_w-((ogg_int64_t)1<<61); + for(i=0;i<4;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*Repeat iteration 4.*/ + for(i--;i<13;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*Repeat iteration 13.*/ + for(i--;i<32;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[i]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*OC_ATANH_LOG2 has converged.*/ + for(;i<40;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[31]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + /*Repeat iteration 40.*/ + for(i--;i<62;i++){ + mask=-(y<0); + z+=(OC_ATANH_LOG2[31]>>i)+mask^mask; + u=x>>i+1; + x-=(y>>i+1)+mask^mask; + y-=u+mask^mask; + } + z=z+8>>4; + } + return OC_Q57(ipart)+z; +} diff --git a/thirdparty/libtheora/mathops.h b/thirdparty/libtheora/mathops.h new file mode 100644 index 0000000000..efbc5377b0 --- /dev/null +++ b/thirdparty/libtheora/mathops.h @@ -0,0 +1,141 @@ +#if !defined(_mathops_H) +# define _mathops_H (1) +# include + +# ifdef __GNUC_PREREQ +# if __GNUC_PREREQ(3,4) +# include +/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from + "upgrading" the type of an entire expression to an (unsigned) size_t.*/ +# if INT_MAX>=2147483647 +# define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT) +# define OC_CLZ32(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=2147483647L +# define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) +# define OC_CLZ32(_x) (__builtin_clzl(_x)) +# endif +# if INT_MAX>=9223372036854775807LL +# define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT) +# define OC_CLZ64(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=9223372036854775807LL +# define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT) +# define OC_CLZ64(_x) (__builtin_clzl(_x)) +# elif LLONG_MAX>=9223372036854775807LL|| \ + __LONG_LONG_MAX__>=9223372036854775807LL +# define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT) +# define OC_CLZ64(_x) (__builtin_clzll(_x)) +# endif +# endif +# endif + + + +/** + * oc_ilog32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin + * function instead, which should be faster. + */ +int oc_ilog32(ogg_uint32_t _v); +/** + * oc_ilog64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin + * function instead, which should be faster. + */ +int oc_ilog64(ogg_int64_t _v); + + +# if defined(OC_CLZ32) +/** + * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value. + * @_v: A non-zero 32-bit value. + * Returns floor(log2(_v))+1. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * If _v is zero, the return value is undefined; use OC_ILOG_32() instead. + */ +# define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v)) +/** + * OC_ILOG_32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + */ +# define OC_ILOG_32(_v) (OC_ILOGNZ_32(_v)&-!!(_v)) +# else +# define OC_ILOGNZ_32(_v) (oc_ilog32(_v)) +# define OC_ILOG_32(_v) (oc_ilog32(_v)) +# endif + +# if defined(CLZ64) +/** + * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value. + * @_v: A non-zero 64-bit value. + * Returns floor(log2(_v))+1. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * If _v is zero, the return value is undefined; use OC_ILOG_64() instead. + */ +# define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v)) +/** + * OC_ILOG_64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + */ +# define OC_ILOG_64(_v) (OC_ILOGNZ_64(_v)&-!!(_v)) +# else +# define OC_ILOGNZ_64(_v) (oc_ilog64(_v)) +# define OC_ILOG_64(_v) (oc_ilog64(_v)) +# endif + +# define OC_STATIC_ILOG0(_v) (!!(_v)) +# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v)) +# define OC_STATIC_ILOG2(_v) \ + (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v)) +# define OC_STATIC_ILOG3(_v) \ + (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v)) +# define OC_STATIC_ILOG4(_v) \ + (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v)) +# define OC_STATIC_ILOG5(_v) \ + (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v)) +# define OC_STATIC_ILOG6(_v) \ + (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v)) +/** + * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant. + * @_v: A non-negative 32-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro is suitable for evaluation at compile time, but it should not be + * used on values that can change at runtime, as it operates via exhaustive + * search. + */ +# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v))) +/** + * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant. + * @_v: A non-negative 64-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro is suitable for evaluation at compile time, but it should not be + * used on values that can change at runtime, as it operates via exhaustive + * search. + */ +# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v))) + +#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57) + +ogg_int64_t oc_bexp64(ogg_int64_t _z); +ogg_int64_t oc_blog64(ogg_int64_t _w); + +#endif diff --git a/thirdparty/libtheora/mcenc.c b/thirdparty/libtheora/mcenc.c new file mode 100644 index 0000000000..797e81f4f9 --- /dev/null +++ b/thirdparty/libtheora/mcenc.c @@ -0,0 +1,767 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id$ + + ********************************************************************/ +#include +#include +#include +#include "encint.h" + + + +typedef struct oc_mcenc_ctx oc_mcenc_ctx; + + + +/*Temporary state used for motion estimation.*/ +struct oc_mcenc_ctx{ + /*The candidate motion vectors.*/ + int candidates[13][2]; + /*The start of the Set B candidates.*/ + int setb0; + /*The total number of candidates.*/ + int ncandidates; +}; + + + +/*The maximum Y plane SAD value for accepting the median predictor.*/ +#define OC_YSAD_THRESH1 (256) +/*The amount to right shift the minimum error by when inflating it for + computing the second maximum Y plane SAD threshold.*/ +#define OC_YSAD_THRESH2_SCALE_BITS (4) +/*The amount to add to the second maximum Y plane threshold when inflating + it.*/ +#define OC_YSAD_THRESH2_OFFSET (64) + +/*The vector offsets in the X direction for each search site in the square + pattern.*/ +static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1}; +/*The vector offsets in the Y direction for each search site in the square + pattern.*/ +static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1}; +/*The number of sites to search for each boundary condition in the square + pattern. + Bit flags for the boundary conditions are as follows: + 1: -16==dx + 2: dx==15(.5) + 4: -16==dy + 8: dy==15(.5)*/ +static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3}; +/*The list of sites to search for each boundary condition in the square + pattern.*/ +static const int OC_SQUARE_SITES[11][8]={ + /* -15.5mb_info; + /*Skip a position to store the median predictor in.*/ + ncandidates=1; + if(embs[_mbi].ncneighbors>0){ + /*Fill in the first part of set A: the vectors from adjacent blocks.*/ + for(i=0;icandidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0]; + _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1]; + ncandidates++; + } + } + /*Add a few additional vectors to set A: the vectors used in the previous + frames and the (0,0) vector.*/ + _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,_accum[0],31); + _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31); + ncandidates++; + _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, + embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31); + _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, + embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31); + ncandidates++; + _mcenc->candidates[ncandidates][0]=0; + _mcenc->candidates[ncandidates][1]=0; + ncandidates++; + /*Use the first three vectors of set A to find our best predictor: their + median.*/ + memcpy(a,_mcenc->candidates+1,sizeof(a)); + OC_SORT2I(a[0][0],a[1][0]); + OC_SORT2I(a[0][1],a[1][1]); + OC_SORT2I(a[1][0],a[2][0]); + OC_SORT2I(a[1][1],a[2][1]); + OC_SORT2I(a[0][0],a[1][0]); + OC_SORT2I(a[0][1],a[1][1]); + _mcenc->candidates[0][0]=a[1][0]; + _mcenc->candidates[0][1]=a[1][1]; + /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/ + _mcenc->setb0=ncandidates; + /*The first time through the loop use the current macro block.*/ + nmbi=_mbi; + for(i=0;;i++){ + _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, + 2*embs[_mbi].analysis_mv[1][_frame][0] + -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31); + _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, + 2*embs[_mbi].analysis_mv[1][_frame][1] + -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31); + ncandidates++; + if(i>=embs[_mbi].npneighbors)break; + nmbi=embs[_mbi].pneighbors[i]; + } + /*Truncate to full-pel positions.*/ + for(i=0;icandidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]); + _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]); + } + _mcenc->ncandidates=ncandidates; +} + +#if 0 +static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4], + int _mvoffset0,int _mvoffset1,const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _best_err){ + unsigned err; + int bi; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + frag_offs=_frag_buf_offs[_fragis[bi]]; + err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0, + _ref+frag_offs+_mvoffset1,_ystride,_best_err-err); + } + return err; +} +#endif + +static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4], + int _mvoffset0,int _mvoffset1,const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _best_err){ + unsigned err; + int bi; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + frag_offs=_frag_buf_offs[_fragis[bi]]; + err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0, + _ref+frag_offs+_mvoffset1,_ystride,_best_err-err); + } + return err; +} + +static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy, + const unsigned char *_src,const unsigned char *_ref,int _ystride, + unsigned _block_err[4]){ + unsigned err; + int mvoffset; + int bi; + mvoffset=_dx+_dy*_ystride; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + unsigned block_err; + frag_offs=_frag_buf_offs[_fragis[bi]]; + block_err=oc_enc_frag_sad(_enc, + _src+frag_offs,_ref+frag_offs+mvoffset,_ystride); + _block_err[bi]=block_err; + err+=block_err; + } + return err; +} + +static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc, + const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy, + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + int mvoffset; + int err; + int bi; + mvoffset=_dx+_dy*_ystride; + err=0; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + frag_offs=_frag_buf_offs[_fragis[bi]]; + err+=oc_enc_frag_satd_thresh(_enc, + _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX); + } + return err; +} + +static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc, + ptrdiff_t _frag_offs,int _dx,int _dy, + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + return oc_enc_frag_satd_thresh(_enc, + _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX); +} + +/*Perform a motion vector search for this macro block against a single + reference frame. + As a bonus, individual block motion vectors are computed as well, as much of + the work can be shared. + The actual motion vector is stored in the appropriate place in the + oc_mb_enc_info structure. + _mcenc: The motion compensation context. + _accum: Drop frame/golden MV accumulators. + _mbi: The macro block index. + _frame: The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.*/ +void oc_mcenc_search_frame(oc_enc_ctx *_enc,int _accum[2],int _mbi,int _frame){ + /*Note: Traditionally this search is done using a rate-distortion objective + function of the form D+lambda*R. + However, xiphmont tested this and found it produced a small degredation, + while requiring extra computation. + This is most likely due to Theora's peculiar MV encoding scheme: MVs are + not coded relative to a predictor, and the only truly cheap way to use a + MV is in the LAST or LAST2 MB modes, which are not being considered here. + Therefore if we use the MV found here, it's only because both LAST and + LAST2 performed poorly, and therefore the MB is not likely to be uniform + or suffer from the aperture problem. + Furthermore we would like to re-use the MV found here for as many MBs as + possible, so picking a slightly sub-optimal vector to save a bit or two + may cause increased degredation in many blocks to come. + We could artificially reduce lambda to compensate, but it's faster to just + disable it entirely, and use D (the distortion) as the sole criterion.*/ + oc_mcenc_ctx mcenc; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + const unsigned char *src; + const unsigned char *ref; + int ystride; + oc_mb_enc_info *embs; + ogg_int32_t hit_cache[31]; + ogg_int32_t hitbit; + unsigned best_block_err[4]; + unsigned block_err[4]; + unsigned best_err; + int best_vec[2]; + int best_block_vec[4][2]; + int candx; + int candy; + int bi; + embs=_enc->mb_info; + /*Find some candidate motion vectors.*/ + oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame); + /*Clear the cache of locations we've examined.*/ + memset(hit_cache,0,sizeof(hit_cache)); + /*Start with the median predictor.*/ + candx=mcenc.candidates[0][0]; + candy=mcenc.candidates[0][1]; + hit_cache[candy+15]|=(ogg_int32_t)1<state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]]; + ystride=_enc->state.ref_ystride[0]; + /*TODO: customize error function for speed/(quality+size) tradeoff.*/ + best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc, + frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err); + best_vec[0]=candx; + best_vec[1]=candy; + if(_frame==OC_FRAME_PREV){ + for(bi=0;bi<4;bi++){ + best_block_err[bi]=block_err[bi]; + best_block_vec[bi][0]=candx; + best_block_vec[bi][1]=candy; + } + } + /*If this predictor fails, move on to set A.*/ + if(best_err>OC_YSAD_THRESH1){ + unsigned err; + unsigned t2; + int ncs; + int ci; + /*Compute the early termination threshold for set A.*/ + t2=embs[_mbi].error[_frame]; + ncs=OC_MINI(3,embs[_mbi].ncneighbors); + for(ci=0;ci>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET; + /*Examine the candidates in set A.*/ + for(ci=1;cit2){ + /*Examine the candidates in set B.*/ + for(;cit2){ + int best_site; + int nsites; + int sitei; + int site; + int b; + /*Square pattern search.*/ + for(;;){ + best_site=4; + /*Compose the bit flags for boundary conditions.*/ + b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1| + OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3; + nsites=OC_SQUARE_NSITES[b]; + for(sitei=0;sitei>=2; + for(bi=0;bi<4;bi++){ + if(best_block_err[bi]>t2){ + /*Square pattern search. + We do this in a slightly interesting manner. + We continue to check the SAD of all four blocks in the + macro block. + This gives us two things: + 1) We can continue to use the hit_cache to avoid duplicate + checks. + Otherwise we could continue to read it, but not write to it + without saving and restoring it for each block. + Note that we could still eliminate a large number of + duplicate checks by taking into account the site we came + from when choosing the site list. + We can still do that to avoid extra hit_cache queries, and + it might even be a speed win. + 2) It gives us a slightly better chance of escaping local + minima. + We would not be here if we weren't doing a fairly bad job + in finding a good vector, and checking these vectors can + save us from 100 to several thousand points off our SAD 1 + in 15 times. + TODO: Is this a good idea? + Who knows. + It needs more testing.*/ + for(;;){ + int bestx; + int besty; + int bj; + bestx=best_block_vec[bi][0]; + besty=best_block_vec[bi][1]; + /*Compose the bit flags for boundary conditions.*/ + b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1| + OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3; + nsites=OC_SQUARE_NSITES[b]; + for(sitei=0;siteimb_info[_mbi].analysis_mv; + if(_enc->prevframe_dropped){ + accum_p[0]=mvs[0][OC_FRAME_PREV][0]; + accum_p[1]=mvs[0][OC_FRAME_PREV][1]; + } + else accum_p[1]=accum_p[0]=0; + accum_g[0]=mvs[2][OC_FRAME_GOLD][0]; + accum_g[1]=mvs[2][OC_FRAME_GOLD][1]; + mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0]; + mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1]; + /*Move the motion vector predictors back a frame.*/ + memmove(mvs+1,mvs,2*sizeof(*mvs)); + /*Search the last frame.*/ + oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV); + mvs[2][OC_FRAME_PREV][0]=accum_p[0]; + mvs[2][OC_FRAME_PREV][1]=accum_p[1]; + /*GOLDEN MVs are different from PREV MVs in that they're each absolute + offsets from some frame in the past rather than relative offsets from the + frame before. + For predictor calculation to make sense, we need them to be in the same + form as PREV MVs.*/ + mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0]; + mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1]; + mvs[2][OC_FRAME_GOLD][0]-=accum_g[0]; + mvs[2][OC_FRAME_GOLD][1]-=accum_g[1]; + /*Search the golden frame.*/ + oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD); + /*Put GOLDEN MVs back into absolute offset form. + The newest MV is already an absolute offset.*/ + mvs[2][OC_FRAME_GOLD][0]+=accum_g[0]; + mvs[2][OC_FRAME_GOLD][1]+=accum_g[1]; + mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0]; + mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1]; +} + +#if 0 +static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi, + int _vec[2],int _best_err,int _frame){ + const unsigned char *src; + const unsigned char *ref; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + int offset_y[9]; + int ystride; + int mvoffset_base; + int best_site; + int sitei; + int err; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_framei]]; + frag_buf_offs=_enc->state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + ystride=_enc->state.ref_ystride[0]; + mvoffset_base=_vec[0]+_vec[1]*ystride; + offset_y[0]=offset_y[1]=offset_y[2]=-ystride; + offset_y[3]=offset_y[5]=0; + offset_y[6]=offset_y[7]=offset_y[8]=ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask); + err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis, + mvoffset0,mvoffset1,src,ref,ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} +#endif + +static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc, + int _mbi,int _vec[2],unsigned _best_err,int _frame){ + const unsigned char *src; + const unsigned char *ref; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + int offset_y[9]; + int ystride; + int mvoffset_base; + int best_site; + int sitei; + int err; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]]; + frag_buf_offs=_enc->state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + ystride=_enc->state.ref_ystride[0]; + mvoffset_base=_vec[0]+_vec[1]*ystride; + offset_y[0]=offset_y[1]=offset_y[2]=-ystride; + offset_y[3]=offset_y[5]=0; + offset_y[6]=offset_y[7]=offset_y[8]=ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask); + err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis, + mvoffset0,mvoffset1,src,ref,ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} + +void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){ + oc_mb_enc_info *embs; + int vec[2]; + embs=_enc->mb_info; + vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]); + vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]); + embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc, + _mbi,vec,embs[_mbi].satd[_frame],_frame); + embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0]; + embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1]; +} + +#if 0 +static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc, + int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride, + int _offset_y[9],unsigned _best_err){ + int mvoffset_base; + int best_site; + int sitei; + mvoffset_base=_vec[0]+_vec[1]*_ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + unsigned err; + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask); + err=oc_enc_frag_sad2_thresh(_enc,_src, + _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} +#endif + +static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc, + int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride, + int _offset_y[9],unsigned _best_err){ + int mvoffset_base; + int best_site; + int sitei; + mvoffset_base=_vec[0]+_vec[1]*_ystride; + best_site=4; + for(sitei=0;sitei<8;sitei++){ + unsigned err; + int site; + int xmask; + int ymask; + int dx; + int dy; + int mvoffset0; + int mvoffset1; + site=OC_SQUARE_SITES[0][sitei]; + dx=OC_SQUARE_DX[site]; + dy=OC_SQUARE_DY[site]; + /*The following code SHOULD be equivalent to + oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0, + (_vec[0]<<1)+dx,(_vec[1]<<1)+dy); + However, it should also be much faster, as it involves no multiplies and + doesn't have to handle chroma vectors.*/ + xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx); + ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy); + mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask); + mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask); + err=oc_enc_frag_satd2_thresh(_enc,_src, + _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err); + if(err<_best_err){ + _best_err=err; + best_site=site; + } + } + _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site]; + _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site]; + return _best_err; +} + +void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){ + oc_mb_enc_info *embs; + const ptrdiff_t *frag_buf_offs; + const ptrdiff_t *fragis; + const unsigned char *src; + const unsigned char *ref; + int offset_y[9]; + int ystride; + int bi; + ystride=_enc->state.ref_ystride[0]; + frag_buf_offs=_enc->state.frag_buf_offs; + fragis=_enc->state.mb_maps[_mbi][0]; + src=_enc->state.ref_frame_data[OC_FRAME_IO]; + ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]]; + offset_y[0]=offset_y[1]=offset_y[2]=-ystride; + offset_y[3]=offset_y[5]=0; + offset_y[6]=offset_y[7]=offset_y[8]=ystride; + embs=_enc->mb_info; + for(bi=0;bi<4;bi++){ + ptrdiff_t frag_offs; + int vec[2]; + frag_offs=frag_buf_offs[fragis[bi]]; + vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]); + vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]); + embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec, + src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]); + embs[_mbi].ref_mv[bi][0]=(signed char)vec[0]; + embs[_mbi].ref_mv[bi][1]=(signed char)vec[1]; + } +} diff --git a/thirdparty/libtheora/modedec.h b/thirdparty/libtheora/modedec.h new file mode 100644 index 0000000000..ea12c64afd --- /dev/null +++ b/thirdparty/libtheora/modedec.h @@ -0,0 +1,4027 @@ +/*File generated by libtheora with OC_COLLECT_METRICS defined at compile time.*/ +#if !defined(_modedec_H) +# define _modedec_H (1) + + + +# if defined(OC_COLLECT_METRICS) +typedef struct oc_mode_metrics oc_mode_metrics; +# endif +typedef struct oc_mode_rd oc_mode_rd; + + + +/*The number of extra bits of precision at which to store rate metrics.*/ +# define OC_BIT_SCALE (6) +/*The number of extra bits of precision at which to store RMSE metrics. + This must be at least half OC_BIT_SCALE (rounded up).*/ +# define OC_RMSE_SCALE (5) +/*The number of bins to partition statistics into.*/ +# define OC_SAD_BINS (24) +/*The number of bits of precision to drop from SAD scores to assign them to a + bin.*/ +# define OC_SAD_SHIFT (9) + + + +# if defined(OC_COLLECT_METRICS) +struct oc_mode_metrics{ + double fragw; + double satd; + double rate; + double rmse; + double satd2; + double satdrate; + double rate2; + double satdrmse; + double rmse2; +}; + + +int oc_has_mode_metrics; +oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS]; +# endif + + + +struct oc_mode_rd{ + ogg_int16_t rate; + ogg_int16_t rmse; +}; + + +# if !defined(OC_COLLECT_METRICS) +static const +# endif +oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={ + { + { + /*Y' qi=0 INTRA*/ + { + { 87, -66},{ 132, 1611},{ 197, 3474},{ 285, 5130}, + { 376, 6419},{ 450, 7545},{ 521, 8587},{ 600, 9587}, + { 689,10498},{ 790,11348},{ 899,12158},{ 1030,12855}, + { 1166,13459},{ 1276,14052},{ 1353,14732},{ 1444,15425}, + { 1535,16101},{ 1609,16856},{ 1697,17532},{ 1823,17995}, + { 1962,18426},{ 2085,18919},{ 2201,19503},{ 2304,20307} + }, + /*Y' qi=0 INTER*/ + { + { 32, -105},{ 40, 1268},{ 54, 2919},{ 91, 4559}, + { 118, 6244},{ 132, 7932},{ 142, 9514},{ 149,10989}, + { 155,12375},{ 161,13679},{ 168,14958},{ 176,16215}, + { 187,17431},{ 196,18623},{ 207,19790},{ 218,20941}, + { 230,22083},{ 246,23213},{ 265,24333},{ 292,25439}, + { 328,26512},{ 372,27538},{ 427,28522},{ 494,29479} + } + }, + { + /*Cb qi=0 INTRA*/ + { + { 1, 6},{ 27, 368},{ 52, 738},{ 67, 1171}, + { 80, 1642},{ 99, 2134},{ 110, 2642},{ 112, 3144}, + { 126, 3578},{ 154, 3967},{ 167, 4387},{ 172, 4839}, + { 191, 5278},{ 208, 5666},{ 220, 6036},{ 223, 6398}, + { 227, 6814},{ 253, 7157},{ 284, 7403},{ 292, 7699}, + { 314, 7983},{ 339, 8203},{ 363, 8460},{ 399, 8919} + }, + /*Cb qi=0 INTER*/ + { + { 68, -55},{ 63, 275},{ 58, 602},{ 53, 936}, + { 50, 1290},{ 54, 1691},{ 58, 2116},{ 62, 2553}, + { 67, 2992},{ 72, 3422},{ 78, 3843},{ 84, 4253}, + { 89, 4658},{ 94, 5062},{ 98, 5455},{ 100, 5848}, + { 102, 6231},{ 104, 6604},{ 104, 6982},{ 105, 7359}, + { 105, 7733},{ 104, 8104},{ 105, 8465},{ 111, 8828} + } + }, + { + /*Cr qi=0 INTRA*/ + { + { 1, 8},{ 23, 375},{ 47, 759},{ 63, 1220}, + { 71, 1693},{ 82, 2171},{ 94, 2652},{ 109, 3103}, + { 125, 3567},{ 133, 3995},{ 151, 4375},{ 168, 4819}, + { 174, 5244},{ 190, 5635},{ 215, 6005},{ 242, 6347}, + { 257, 6758},{ 280, 7068},{ 311, 7336},{ 326, 7652}, + { 346, 7968},{ 372, 8213},{ 388, 8515},{ 408, 9060} + }, + /*Cr qi=0 INTER*/ + { + { 69, 0},{ 60, 314},{ 49, 624},{ 45, 943}, + { 45, 1285},{ 49, 1691},{ 55, 2130},{ 62, 2560}, + { 71, 2973},{ 79, 3385},{ 85, 3800},{ 89, 4207}, + { 92, 4620},{ 95, 5037},{ 96, 5436},{ 97, 5839}, + { 98, 6252},{ 99, 6653},{ 99, 7038},{ 103, 7426}, + { 107, 7810},{ 108, 8178},{ 107, 8539},{ 106, 8937} + } + } + }, + { + { + /*Y' qi=1 INTRA*/ + { + { 81, -71},{ 133, 1610},{ 203, 3460},{ 296, 5083}, + { 392, 6342},{ 467, 7454},{ 541, 8486},{ 625, 9466}, + { 716,10352},{ 823,11181},{ 940,11961},{ 1074,12643}, + { 1211,13233},{ 1324,13807},{ 1408,14489},{ 1504,15167}, + { 1598,15824},{ 1679,16544},{ 1788,17161},{ 1928,17579}, + { 2070,17991},{ 2202,18456},{ 2324,19021},{ 2425,19894} + }, + /*Y' qi=1 INTER*/ + { + { 34, 4},{ 40, 1307},{ 55, 2914},{ 93, 4555}, + { 120, 6243},{ 134, 7912},{ 144, 9468},{ 152,10918}, + { 158,12275},{ 164,13569},{ 171,14846},{ 180,16098}, + { 191,17310},{ 204,18484},{ 216,19636},{ 228,20779}, + { 242,21912},{ 261,23036},{ 286,24146},{ 320,25221}, + { 363,26265},{ 418,27261},{ 485,28203},{ 551,29148} + } + }, + { + /*Cb qi=1 INTRA*/ + { + { 1, 6},{ 28, 367},{ 52, 738},{ 68, 1172}, + { 86, 1644},{ 106, 2135},{ 115, 2642},{ 119, 3141}, + { 132, 3569},{ 157, 3951},{ 172, 4366},{ 177, 4819}, + { 194, 5258},{ 211, 5638},{ 224, 6006},{ 233, 6367}, + { 236, 6784},{ 258, 7121},{ 299, 7357},{ 319, 7637}, + { 337, 7921},{ 358, 8141},{ 381, 8367},{ 401, 8768} + }, + /*Cb qi=1 INTER*/ + { + { 95, -31},{ 81, 295},{ 67, 614},{ 53, 953}, + { 48, 1305},{ 51, 1700},{ 56, 2125},{ 61, 2563}, + { 67, 3008},{ 73, 3435},{ 79, 3844},{ 85, 4251}, + { 90, 4663},{ 95, 5073},{ 98, 5458},{ 100, 5844}, + { 101, 6231},{ 102, 6606},{ 102, 6980},{ 103, 7347}, + { 104, 7726},{ 105, 8096},{ 105, 8453},{ 105, 8789} + } + }, + { + /*Cr qi=1 INTRA*/ + { + { 1, 8},{ 25, 375},{ 50, 759},{ 65, 1221}, + { 74, 1695},{ 86, 2172},{ 101, 2651},{ 117, 3101}, + { 129, 3561},{ 135, 3985},{ 153, 4368},{ 171, 4807}, + { 182, 5223},{ 202, 5608},{ 225, 5964},{ 251, 6300}, + { 271, 6697},{ 295, 6978},{ 324, 7235},{ 348, 7558}, + { 367, 7877},{ 394, 8101},{ 413, 8386},{ 409, 8945} + }, + /*Cr qi=1 INTER*/ + { + { 66, 11},{ 59, 323},{ 51, 631},{ 44, 949}, + { 44, 1292},{ 49, 1703},{ 56, 2140},{ 62, 2566}, + { 69, 2991},{ 77, 3397},{ 84, 3799},{ 89, 4211}, + { 93, 4634},{ 94, 5049},{ 95, 5444},{ 96, 5854}, + { 94, 6260},{ 95, 6640},{ 96, 7032},{ 101, 7423}, + { 104, 7790},{ 105, 8158},{ 109, 8527},{ 108, 8872} + } + } + }, + { + { + /*Y' qi=2 INTRA*/ + { + { 87, -72},{ 139, 1607},{ 213, 3426},{ 315, 4992}, + { 416, 6217},{ 495, 7315},{ 574, 8317},{ 666, 9265}, + { 763,10124},{ 875,10906},{ 1001,11654},{ 1147,12305}, + { 1289,12865},{ 1407,13424},{ 1503,14076},{ 1610,14724}, + { 1720,15342},{ 1815,16020},{ 1937,16579},{ 2084,16981}, + { 2236,17371},{ 2385,17779},{ 2536,18250},{ 2689,18931} + }, + /*Y' qi=2 INTER*/ + { + { 30, -2},{ 40, 1308},{ 57, 2921},{ 96, 4567}, + { 122, 6260},{ 136, 7902},{ 148, 9418},{ 156,10826}, + { 162,12157},{ 169,13448},{ 177,14709},{ 188,15938}, + { 200,17133},{ 213,18295},{ 228,19433},{ 245,20564}, + { 264,21685},{ 289,22790},{ 323,23876},{ 368,24916}, + { 427,25906},{ 499,26837},{ 585,27700},{ 680,28514} + } + }, + { + /*Cb qi=2 INTRA*/ + { + { 1, 6},{ 30, 367},{ 58, 738},{ 77, 1172}, + { 93, 1645},{ 111, 2137},{ 123, 2642},{ 126, 3133}, + { 136, 3553},{ 162, 3934},{ 178, 4352},{ 183, 4803}, + { 199, 5231},{ 220, 5596},{ 235, 5957},{ 245, 6314}, + { 256, 6718},{ 286, 7048},{ 320, 7285},{ 336, 7568}, + { 366, 7829},{ 387, 8045},{ 405, 8261},{ 445, 8550} + }, + /*Cb qi=2 INTER*/ + { + { 115, -61},{ 93, 277},{ 71, 609},{ 54, 963}, + { 49, 1329},{ 53, 1715},{ 58, 2138},{ 63, 2583}, + { 69, 3017},{ 75, 3442},{ 81, 3857},{ 88, 4263}, + { 93, 4667},{ 96, 5065},{ 101, 5451},{ 101, 5832}, + { 102, 6213},{ 103, 6593},{ 103, 6968},{ 104, 7336}, + { 104, 7710},{ 105, 8076},{ 106, 8440},{ 106, 8822} + } + }, + { + /*Cr qi=2 INTRA*/ + { + { 1, 8},{ 27, 375},{ 54, 759},{ 70, 1222}, + { 79, 1696},{ 89, 2173},{ 106, 2652},{ 123, 3098}, + { 135, 3553},{ 143, 3972},{ 161, 4348},{ 181, 4782}, + { 194, 5189},{ 213, 5565},{ 235, 5907},{ 266, 6229}, + { 286, 6618},{ 311, 6897},{ 339, 7152},{ 362, 7454}, + { 392, 7721},{ 416, 7946},{ 429, 8227},{ 458, 8540} + }, + /*Cr qi=2 INTER*/ + { + { 74, 20},{ 63, 330},{ 51, 635},{ 44, 942}, + { 47, 1287},{ 54, 1710},{ 59, 2147},{ 65, 2571}, + { 72, 2996},{ 79, 3413},{ 86, 3820},{ 91, 4230}, + { 93, 4642},{ 95, 5046},{ 95, 5442},{ 95, 5839}, + { 96, 6243},{ 97, 6641},{ 99, 7021},{ 101, 7396}, + { 103, 7764},{ 106, 8138},{ 109, 8507},{ 114, 8851} + } + } + }, + { + { + /*Y' qi=3 INTRA*/ + { + { 91, -67},{ 141, 1606},{ 219, 3405},{ 328, 4929}, + { 433, 6122},{ 515, 7209},{ 598, 8204},{ 693, 9145}, + { 796, 9986},{ 912,10756},{ 1045,11471},{ 1200,12079}, + { 1345,12640},{ 1471,13179},{ 1571,13809},{ 1678,14450}, + { 1798,15047},{ 1905,15701},{ 2043,16205},{ 2202,16569}, + { 2351,16971},{ 2501,17393},{ 2660,17851},{ 2825,18455} + }, + /*Y' qi=3 INTER*/ + { + { 53, -164},{ 38, 1314},{ 59, 2917},{ 99, 4563}, + { 124, 6253},{ 139, 7882},{ 150, 9375},{ 159,10749}, + { 166,12059},{ 173,13349},{ 183,14608},{ 194,15826}, + { 208,17003},{ 223,18150},{ 240,19287},{ 259,20411}, + { 284,21508},{ 317,22593},{ 359,23656},{ 414,24671}, + { 483,25634},{ 569,26519},{ 670,27332},{ 786,28072} + } + }, + { + /*Cb qi=3 INTRA*/ + { + { 1, 5},{ 31, 367},{ 58, 739},{ 78, 1173}, + { 96, 1645},{ 113, 2134},{ 125, 2638},{ 133, 3127}, + { 148, 3542},{ 171, 3915},{ 184, 4328},{ 192, 4776}, + { 209, 5197},{ 230, 5556},{ 245, 5909},{ 252, 6261}, + { 272, 6641},{ 304, 6942},{ 330, 7184},{ 342, 7477}, + { 380, 7736},{ 404, 7962},{ 428, 8151},{ 469, 8430} + }, + /*Cb qi=3 INTER*/ + { + { 86, -29},{ 72, 296},{ 58, 618},{ 46, 964}, + { 47, 1338},{ 51, 1743},{ 56, 2158},{ 63, 2594}, + { 69, 3035},{ 77, 3455},{ 84, 3859},{ 89, 4266}, + { 94, 4673},{ 98, 5074},{ 101, 5460},{ 101, 5842}, + { 101, 6217},{ 101, 6593},{ 102, 6964},{ 104, 7325}, + { 103, 7696},{ 103, 8056},{ 104, 8430},{ 103, 8792} + } + }, + { + /*Cr qi=3 INTRA*/ + { + { 1, 8},{ 27, 374},{ 56, 759},{ 74, 1221}, + { 83, 1696},{ 96, 2173},{ 113, 2650},{ 127, 3091}, + { 140, 3542},{ 151, 3960},{ 164, 4334},{ 188, 4764}, + { 208, 5144},{ 224, 5493},{ 250, 5841},{ 278, 6162}, + { 298, 6548},{ 334, 6816},{ 365, 7045},{ 388, 7343}, + { 419, 7613},{ 443, 7836},{ 455, 8105},{ 484, 8445} + }, + /*Cr qi=3 INTER*/ + { + { 76, 26},{ 65, 332},{ 53, 638},{ 45, 945}, + { 45, 1304},{ 53, 1725},{ 60, 2153},{ 68, 2584}, + { 74, 3007},{ 81, 3425},{ 87, 3844},{ 91, 4253}, + { 94, 4657},{ 95, 5061},{ 94, 5462},{ 94, 5856}, + { 95, 6250},{ 96, 6635},{ 97, 7014},{ 101, 7393}, + { 104, 7761},{ 106, 8137},{ 109, 8506},{ 111, 8823} + } + } + }, + { + { + /*Y' qi=4 INTRA*/ + { + { 80, -67},{ 143, 1603},{ 227, 3378},{ 344, 4861}, + { 454, 6026},{ 537, 7104},{ 626, 8089},{ 725, 9006}, + { 830, 9827},{ 950,10581},{ 1089,11270},{ 1257,11826}, + { 1409,12366},{ 1535,12912},{ 1640,13528},{ 1753,14173}, + { 1884,14756},{ 2007,15368},{ 2148,15852},{ 2307,16212}, + { 2464,16591},{ 2614,17019},{ 2785,17455},{ 2970,17963} + }, + /*Y' qi=4 INTER*/ + { + { 50, -145},{ 38, 1324},{ 61, 2921},{ 102, 4566}, + { 127, 6248},{ 142, 7845},{ 154, 9300},{ 163,10656}, + { 169,11965},{ 177,13246},{ 188,14495},{ 202,15702}, + { 218,16864},{ 236,18003},{ 256,19124},{ 278,20233}, + { 307,21330},{ 347,22398},{ 398,23437},{ 463,24429}, + { 546,25343},{ 649,26170},{ 767,26935},{ 888,27674} + } + }, + { + /*Cb qi=4 INTRA*/ + { + { 1, 5},{ 33, 367},{ 61, 739},{ 80, 1173}, + { 98, 1646},{ 114, 2136},{ 126, 2639},{ 137, 3124}, + { 152, 3535},{ 176, 3903},{ 194, 4307},{ 206, 4753}, + { 222, 5165},{ 242, 5508},{ 260, 5857},{ 272, 6205}, + { 294, 6559},{ 332, 6848},{ 356, 7104},{ 364, 7389}, + { 396, 7637},{ 415, 7878},{ 446, 8064},{ 506, 8294} + }, + /*Cb qi=4 INTER*/ + { + { 86, -15},{ 73, 308},{ 60, 627},{ 46, 967}, + { 47, 1343},{ 51, 1754},{ 56, 2183},{ 63, 2615}, + { 70, 3044},{ 79, 3459},{ 85, 3866},{ 90, 4276}, + { 94, 4686},{ 97, 5088},{ 100, 5467},{ 102, 5837}, + { 102, 6205},{ 101, 6569},{ 103, 6939},{ 104, 7317}, + { 105, 7690},{ 107, 8043},{ 107, 8394},{ 111, 8736} + } + }, + { + /*Cr qi=4 INTRA*/ + { + { 1, 7},{ 28, 375},{ 57, 759},{ 79, 1221}, + { 92, 1697},{ 105, 2174},{ 122, 2648},{ 135, 3085}, + { 146, 3530},{ 157, 3947},{ 171, 4316},{ 195, 4737}, + { 218, 5117},{ 239, 5445},{ 268, 5767},{ 295, 6074}, + { 315, 6460},{ 355, 6735},{ 392, 6933},{ 418, 7218}, + { 448, 7495},{ 471, 7688},{ 481, 7954},{ 504, 8313} + }, + /*Cr qi=4 INTER*/ + { + { 68, 28},{ 57, 334},{ 47, 639},{ 43, 953}, + { 48, 1314},{ 54, 1736},{ 59, 2169},{ 69, 2592}, + { 78, 3017},{ 84, 3434},{ 88, 3850},{ 92, 4260}, + { 95, 4663},{ 96, 5068},{ 95, 5455},{ 95, 5839}, + { 96, 6243},{ 97, 6626},{ 98, 7006},{ 101, 7390}, + { 104, 7755},{ 108, 8115},{ 111, 8471},{ 110, 8825} + } + } + }, + { + { + /*Y' qi=5 INTRA*/ + { + { 84, -69},{ 147, 1599},{ 237, 3350},{ 360, 4796}, + { 475, 5934},{ 562, 6992},{ 657, 7953},{ 765, 8837}, + { 874, 9641},{ 998,10384},{ 1146,11047},{ 1322,11572}, + { 1484,12076},{ 1617,12609},{ 1731,13203},{ 1856,13806}, + { 1995,14367},{ 2132,14936},{ 2289,15386},{ 2460,15721}, + { 2635,16066},{ 2802,16442},{ 2980,16805},{ 3177,17272} + }, + /*Y' qi=5 INTER*/ + { + { 38, -86},{ 37, 1349},{ 64, 2920},{ 105, 4563}, + { 129, 6236},{ 145, 7809},{ 158, 9236},{ 167,10572}, + { 174,11871},{ 182,13141},{ 195,14368},{ 212,15558}, + { 230,16706},{ 250,17828},{ 274,18944},{ 303,20041}, + { 342,21116},{ 394,22152},{ 460,23144},{ 543,24073}, + { 648,24919},{ 773,25673},{ 922,26323},{ 1084,26924} + } + }, + { + /*Cb qi=5 INTRA*/ + { + { 1, 5},{ 34, 367},{ 63, 739},{ 82, 1174}, + { 102, 1647},{ 119, 2137},{ 134, 2639},{ 145, 3121}, + { 161, 3529},{ 189, 3891},{ 207, 4290},{ 216, 4721}, + { 232, 5113},{ 258, 5455},{ 277, 5798},{ 294, 6124}, + { 322, 6427},{ 352, 6697},{ 370, 6982},{ 384, 7283}, + { 423, 7529},{ 448, 7766},{ 478, 7943},{ 527, 8151} + }, + /*Cb qi=5 INTER*/ + { + { 83, -49},{ 69, 284},{ 55, 611},{ 48, 961}, + { 49, 1355},{ 52, 1769},{ 58, 2191},{ 65, 2616}, + { 73, 3041},{ 80, 3460},{ 87, 3868},{ 92, 4276}, + { 95, 4682},{ 98, 5077},{ 100, 5459},{ 102, 5827}, + { 102, 6200},{ 102, 6568},{ 103, 6930},{ 103, 7303}, + { 104, 7672},{ 106, 8032},{ 106, 8391},{ 106, 8727} + } + }, + { + /*Cr qi=5 INTRA*/ + { + { 1, 8},{ 28, 375},{ 57, 760},{ 81, 1222}, + { 99, 1696},{ 111, 2175},{ 125, 2648},{ 140, 3079}, + { 152, 3520},{ 162, 3927},{ 179, 4294},{ 203, 4714}, + { 225, 5080},{ 254, 5389},{ 286, 5703},{ 318, 5997}, + { 342, 6364},{ 380, 6640},{ 416, 6837},{ 445, 7103}, + { 473, 7370},{ 497, 7562},{ 514, 7811},{ 549, 8148} + }, + /*Cr qi=5 INTER*/ + { + { 60, 6},{ 54, 323},{ 46, 638},{ 43, 958}, + { 45, 1329},{ 54, 1749},{ 61, 2175},{ 70, 2600}, + { 79, 3021},{ 85, 3437},{ 89, 3847},{ 93, 4254}, + { 95, 4660},{ 96, 5065},{ 95, 5456},{ 95, 5849}, + { 96, 6243},{ 96, 6621},{ 97, 6996},{ 101, 7366}, + { 104, 7722},{ 107, 8088},{ 111, 8448},{ 119, 8816} + } + } + }, + { + { + /*Y' qi=6 INTRA*/ + { + { 88, -69},{ 151, 1593},{ 251, 3294},{ 387, 4681}, + { 507, 5790},{ 601, 6837},{ 702, 7787},{ 813, 8648}, + { 927, 9427},{ 1059,10152},{ 1213,10787},{ 1399,11284}, + { 1568,11781},{ 1705,12312},{ 1823,12890},{ 1957,13482}, + { 2106,14036},{ 2249,14600},{ 2411,15042},{ 2588,15359}, + { 2772,15699},{ 2947,16062},{ 3127,16429},{ 3320,16849} + }, + /*Y' qi=6 INTER*/ + { + { 44, -80},{ 36, 1346},{ 69, 2919},{ 111, 4563}, + { 136, 6216},{ 154, 7746},{ 168, 9139},{ 178,10461}, + { 185,11747},{ 195,13007},{ 211,14229},{ 230,15408}, + { 250,16547},{ 274,17663},{ 302,18769},{ 339,19851}, + { 386,20907},{ 446,21933},{ 527,22884},{ 631,23746}, + { 760,24512},{ 914,25178},{ 1087,25758},{ 1278,26262} + } + }, + { + /*Cb qi=6 INTRA*/ + { + { 1, 4},{ 36, 367},{ 66, 739},{ 84, 1174}, + { 105, 1648},{ 126, 2139},{ 140, 2639},{ 149, 3116}, + { 164, 3523},{ 194, 3880},{ 217, 4271},{ 226, 4694}, + { 243, 5077},{ 270, 5407},{ 291, 5742},{ 310, 6061}, + { 340, 6340},{ 373, 6609},{ 394, 6890},{ 409, 7189}, + { 444, 7434},{ 469, 7652},{ 499, 7853},{ 559, 8135} + }, + /*Cb qi=6 INTER*/ + { + { 68, -46},{ 60, 291},{ 50, 623},{ 49, 971}, + { 50, 1357},{ 55, 1781},{ 61, 2211},{ 69, 2634}, + { 78, 3052},{ 86, 3466},{ 91, 3882},{ 95, 4292}, + { 98, 4691},{ 101, 5080},{ 102, 5458},{ 103, 5830}, + { 103, 6192},{ 104, 6554},{ 104, 6916},{ 106, 7278}, + { 108, 7641},{ 110, 8004},{ 112, 8371},{ 112, 8758} + } + }, + { + /*Cr qi=6 INTRA*/ + { + { 1, 8},{ 29, 375},{ 59, 760},{ 84, 1223}, + { 99, 1698},{ 112, 2176},{ 129, 2647},{ 143, 3076}, + { 156, 3510},{ 168, 3906},{ 189, 4269},{ 220, 4682}, + { 241, 5047},{ 266, 5342},{ 299, 5649},{ 331, 5954}, + { 357, 6309},{ 393, 6579},{ 431, 6765},{ 467, 6997}, + { 501, 7276},{ 520, 7488},{ 525, 7749},{ 548, 8146} + }, + /*Cr qi=6 INTER*/ + { + { 94, 31},{ 69, 335},{ 47, 641},{ 43, 967}, + { 50, 1350},{ 57, 1772},{ 65, 2197},{ 74, 2625}, + { 83, 3043},{ 90, 3454},{ 94, 3867},{ 97, 4273}, + { 98, 4671},{ 99, 5068},{ 99, 5461},{ 98, 5857}, + { 98, 6245},{ 99, 6610},{ 103, 6975},{ 105, 7345}, + { 108, 7712},{ 111, 8073},{ 113, 8415},{ 119, 8768} + } + } + }, + { + { + /*Y' qi=7 INTRA*/ + { + { 92, -70},{ 156, 1590},{ 261, 3267},{ 403, 4618}, + { 529, 5704},{ 628, 6730},{ 736, 7657},{ 856, 8491}, + { 978, 9246},{ 1118, 9943},{ 1281,10550},{ 1472,11028}, + { 1645,11507},{ 1793,12008},{ 1924,12565},{ 2067,13130}, + { 2229,13638},{ 2388,14160},{ 2558,14584},{ 2744,14886}, + { 2932,15194},{ 3116,15531},{ 3311,15858},{ 3538,16197} + }, + /*Y' qi=7 INTER*/ + { + { 43, -8},{ 36, 1351},{ 71, 2923},{ 112, 4568}, + { 138, 6201},{ 157, 7705},{ 171, 9083},{ 181,10390}, + { 189,11664},{ 202,12910},{ 220,14121},{ 241,15281}, + { 266,16401},{ 295,17507},{ 328,18608},{ 371,19677}, + { 430,20701},{ 508,21676},{ 604,22588},{ 727,23397}, + { 878,24093},{ 1055,24690},{ 1263,25151},{ 1496,25504} + } + }, + { + /*Cb qi=7 INTRA*/ + { + { 1, 5},{ 40, 367},{ 72, 740},{ 89, 1175}, + { 108, 1649},{ 129, 2140},{ 143, 2637},{ 154, 3110}, + { 169, 3507},{ 198, 3860},{ 224, 4237},{ 235, 4652}, + { 253, 5037},{ 282, 5358},{ 307, 5674},{ 329, 5986}, + { 361, 6273},{ 393, 6527},{ 419, 6777},{ 435, 7078}, + { 467, 7342},{ 495, 7554},{ 529, 7757},{ 591, 8053} + }, + /*Cb qi=7 INTER*/ + { + { 79, -33},{ 68, 299},{ 56, 627},{ 50, 978}, + { 51, 1366},{ 55, 1786},{ 61, 2213},{ 70, 2642}, + { 80, 3062},{ 87, 3474},{ 92, 3886},{ 96, 4292}, + { 99, 4684},{ 102, 5072},{ 103, 5450},{ 104, 5814}, + { 104, 6176},{ 104, 6538},{ 107, 6905},{ 110, 7270}, + { 110, 7625},{ 110, 7978},{ 111, 8340},{ 117, 8674} + } + }, + { + /*Cr qi=7 INTRA*/ + { + { 2, 7},{ 31, 375},{ 62, 760},{ 87, 1223}, + { 103, 1698},{ 115, 2175},{ 131, 2644},{ 147, 3066}, + { 161, 3494},{ 175, 3889},{ 199, 4250},{ 229, 4653}, + { 250, 5001},{ 279, 5275},{ 311, 5577},{ 343, 5889}, + { 376, 6227},{ 417, 6486},{ 457, 6689},{ 484, 6925}, + { 518, 7174},{ 544, 7393},{ 549, 7662},{ 577, 8050} + }, + /*Cr qi=7 INTER*/ + { + { 89, 22},{ 62, 332},{ 45, 641},{ 47, 976}, + { 52, 1363},{ 59, 1779},{ 67, 2203},{ 76, 2628}, + { 84, 3046},{ 90, 3460},{ 94, 3875},{ 98, 4272}, + { 99, 4666},{ 98, 5063},{ 98, 5459},{ 98, 5849}, + { 99, 6226},{ 101, 6594},{ 104, 6957},{ 109, 7324}, + { 109, 7686},{ 111, 8042},{ 115, 8379},{ 119, 8699} + } + } + }, + { + { + /*Y' qi=8 INTRA*/ + { + { 91, -69},{ 160, 1585},{ 274, 3226},{ 423, 4538}, + { 557, 5596},{ 664, 6595},{ 778, 7506},{ 905, 8319}, + { 1038, 9035},{ 1186, 9701},{ 1355,10292},{ 1554,10754}, + { 1739,11196},{ 1904,11639},{ 2047,12184},{ 2194,12763}, + { 2361,13256},{ 2529,13753},{ 2709,14155},{ 2902,14433}, + { 3100,14723},{ 3292,15026},{ 3489,15327},{ 3714,15705} + }, + /*Y' qi=8 INTER*/ + { + { 32, -157},{ 33, 1346},{ 74, 2914},{ 116, 4554}, + { 142, 6172},{ 162, 7648},{ 177, 9004},{ 186,10300}, + { 196,11570},{ 210,12808},{ 231,14001},{ 256,15150}, + { 285,16259},{ 319,17352},{ 359,18435},{ 415,19475}, + { 489,20470},{ 584,21400},{ 703,22246},{ 852,22968}, + { 1038,23556},{ 1253,24032},{ 1503,24367},{ 1778,24628} + } + }, + { + /*Cb qi=8 INTRA*/ + { + { 1, 4},{ 42, 367},{ 75, 740},{ 93, 1176}, + { 111, 1649},{ 128, 2139},{ 144, 2635},{ 157, 3103}, + { 174, 3494},{ 206, 3844},{ 233, 4207},{ 251, 4605}, + { 277, 4980},{ 304, 5284},{ 335, 5584},{ 359, 5888}, + { 393, 6152},{ 432, 6398},{ 455, 6656},{ 471, 6956}, + { 502, 7193},{ 528, 7405},{ 562, 7630},{ 603, 7922} + }, + /*Cb qi=8 INTER*/ + { + { 77, -37},{ 68, 299},{ 58, 632},{ 50, 991}, + { 50, 1382},{ 55, 1799},{ 62, 2226},{ 73, 2647}, + { 82, 3066},{ 90, 3480},{ 94, 3891},{ 96, 4296}, + { 98, 4687},{ 101, 5073},{ 103, 5456},{ 104, 5817}, + { 105, 6170},{ 106, 6523},{ 107, 6886},{ 108, 7250}, + { 109, 7600},{ 110, 7955},{ 111, 8305},{ 112, 8641} + } + }, + { + /*Cr qi=8 INTRA*/ + { + { 2, 7},{ 33, 375},{ 64, 760},{ 92, 1224}, + { 111, 1700},{ 122, 2173},{ 137, 2637},{ 156, 3055}, + { 172, 3476},{ 186, 3856},{ 211, 4211},{ 242, 4597}, + { 263, 4939},{ 292, 5214},{ 335, 5489},{ 376, 5772}, + { 406, 6099},{ 440, 6378},{ 483, 6578},{ 517, 6797}, + { 550, 7049},{ 571, 7283},{ 583, 7560},{ 618, 7967} + }, + /*Cr qi=8 INTER*/ + { + { 74, 25},{ 58, 328},{ 43, 637},{ 45, 980}, + { 51, 1371},{ 59, 1788},{ 69, 2207},{ 79, 2630}, + { 86, 3051},{ 91, 3470},{ 95, 3880},{ 97, 4280}, + { 98, 4680},{ 97, 5074},{ 96, 5456},{ 97, 5839}, + { 99, 6219},{ 101, 6583},{ 103, 6945},{ 106, 7312}, + { 110, 7671},{ 114, 8009},{ 115, 8345},{ 117, 8686} + } + } + }, + { + { + /*Y' qi=9 INTRA*/ + { + { 104, -68},{ 164, 1580},{ 288, 3173},{ 448, 4439}, + { 587, 5485},{ 702, 6465},{ 824, 7351},{ 958, 8148}, + { 1096, 8845},{ 1253, 9480},{ 1432,10047},{ 1640,10494}, + { 1835,10926},{ 2015,11350},{ 2166,11871},{ 2321,12428}, + { 2508,12876},{ 2684,13345},{ 2866,13741},{ 3069,13991}, + { 3281,14243},{ 3487,14518},{ 3689,14813},{ 3911,15175} + }, + /*Y' qi=9 INTER*/ + { + { 47, -140},{ 34, 1348},{ 77, 2915},{ 119, 4552}, + { 145, 6150},{ 166, 7600},{ 182, 8936},{ 192,10221}, + { 203,11482},{ 220,12711},{ 244,13886},{ 274,15012}, + { 308,16111},{ 349,17190},{ 401,18244},{ 470,19257}, + { 561,20209},{ 680,21069},{ 830,21822},{ 1010,22463}, + { 1227,22971},{ 1482,23328},{ 1769,23544},{ 2077,23655} + } + }, + { + /*Cb qi=9 INTRA*/ + { + { 1, 5},{ 43, 367},{ 76, 740},{ 95, 1176}, + { 114, 1649},{ 135, 2138},{ 153, 2629},{ 165, 3091}, + { 184, 3481},{ 217, 3831},{ 244, 4187},{ 260, 4572}, + { 290, 4930},{ 320, 5231},{ 351, 5521},{ 379, 5812}, + { 414, 6055},{ 452, 6307},{ 483, 6564},{ 502, 6848}, + { 525, 7115},{ 554, 7321},{ 589, 7533},{ 626, 7833} + }, + /*Cb qi=9 INTER*/ + { + { 101, -43},{ 81, 298},{ 62, 637},{ 49, 989}, + { 51, 1381},{ 56, 1806},{ 65, 2231},{ 74, 2653}, + { 84, 3071},{ 91, 3482},{ 95, 3892},{ 97, 4293}, + { 99, 4684},{ 101, 5066},{ 103, 5437},{ 103, 5793}, + { 103, 6148},{ 104, 6511},{ 105, 6867},{ 107, 7221}, + { 110, 7572},{ 111, 7926},{ 112, 8283},{ 116, 8625} + } + }, + { + /*Cr qi=9 INTRA*/ + { + { 2, 7},{ 35, 375},{ 66, 761},{ 93, 1224}, + { 112, 1700},{ 126, 2173},{ 144, 2633},{ 165, 3047}, + { 183, 3458},{ 199, 3835},{ 224, 4191},{ 257, 4558}, + { 283, 4887},{ 309, 5176},{ 351, 5446},{ 397, 5713}, + { 433, 6017},{ 469, 6283},{ 508, 6480},{ 546, 6687}, + { 579, 6945},{ 600, 7182},{ 610, 7434},{ 623, 7793} + }, + /*Cr qi=9 INTER*/ + { + { 77, 15},{ 57, 330},{ 45, 640},{ 48, 980}, + { 54, 1380},{ 61, 1802},{ 70, 2220},{ 80, 2639}, + { 87, 3057},{ 92, 3474},{ 94, 3882},{ 98, 4282}, + { 98, 4675},{ 97, 5062},{ 97, 5450},{ 98, 5829}, + { 100, 6197},{ 101, 6561},{ 104, 6927},{ 107, 7289}, + { 113, 7638},{ 117, 7978},{ 119, 8311},{ 117, 8629} + } + } + }, + { + { + /*Y' qi=10 INTRA*/ + { + { 101, -69},{ 168, 1574},{ 299, 3143},{ 465, 4386}, + { 610, 5410},{ 736, 6353},{ 866, 7207},{ 1006, 7982}, + { 1153, 8655},{ 1319, 9261},{ 1504, 9812},{ 1719,10248}, + { 1928,10653},{ 2116,11056},{ 2282,11550},{ 2458,12070}, + { 2654,12492},{ 2846,12923},{ 3043,13291},{ 3249,13537}, + { 3466,13764},{ 3682,13999},{ 3896,14268},{ 4145,14548} + }, + /*Y' qi=10 INTER*/ + { + { 48, -94},{ 34, 1355},{ 81, 2920},{ 124, 4545}, + { 151, 6113},{ 174, 7532},{ 190, 8850},{ 201,10125}, + { 214,11379},{ 235,12591},{ 264,13745},{ 299,14859}, + { 338,15948},{ 388,17008},{ 456,18029},{ 546,18988}, + { 661,19877},{ 808,20666},{ 993,21321},{ 1218,21835}, + { 1481,22203},{ 1783,22420},{ 2117,22504},{ 2469,22481} + } + }, + { + /*Cb qi=10 INTRA*/ + { + { 2, 4},{ 44, 367},{ 79, 740},{ 99, 1178}, + { 117, 1652},{ 137, 2141},{ 156, 2630},{ 170, 3089}, + { 192, 3474},{ 227, 3813},{ 259, 4157},{ 282, 4526}, + { 310, 4860},{ 342, 5140},{ 377, 5425},{ 400, 5714}, + { 436, 5952},{ 475, 6194},{ 496, 6468},{ 522, 6748}, + { 559, 6996},{ 587, 7216},{ 617, 7433},{ 673, 7678} + }, + /*Cb qi=10 INTER*/ + { + { 87, -37},{ 72, 301},{ 58, 636},{ 49, 995}, + { 51, 1394},{ 57, 1819},{ 66, 2241},{ 78, 2660}, + { 87, 3074},{ 93, 3482},{ 97, 3891},{ 99, 4294}, + { 101, 4678},{ 103, 5050},{ 105, 5414},{ 106, 5773}, + { 107, 6134},{ 108, 6485},{ 110, 6832},{ 113, 7187}, + { 113, 7547},{ 114, 7887},{ 117, 8230},{ 112, 8590} + } + }, + { + /*Cr qi=10 INTRA*/ + { + { 2, 7},{ 38, 375},{ 69, 761},{ 96, 1224}, + { 116, 1701},{ 131, 2175},{ 148, 2634},{ 168, 3041}, + { 190, 3439},{ 211, 3802},{ 238, 4151},{ 271, 4506}, + { 297, 4824},{ 331, 5103},{ 373, 5360},{ 415, 5632}, + { 459, 5928},{ 500, 6176},{ 535, 6386},{ 573, 6586}, + { 608, 6834},{ 629, 7079},{ 642, 7337},{ 686, 7680} + }, + /*Cr qi=10 INTER*/ + { + { 81, 34},{ 63, 333},{ 50, 633},{ 48, 987}, + { 53, 1397},{ 61, 1820},{ 71, 2237},{ 83, 2651}, + { 91, 3065},{ 95, 3479},{ 98, 3882},{ 100, 4279}, + { 101, 4673},{ 101, 5054},{ 100, 5429},{ 101, 5801}, + { 102, 6173},{ 104, 6541},{ 108, 6904},{ 110, 7264}, + { 114, 7609},{ 119, 7945},{ 123, 8275},{ 128, 8615} + } + } + }, + { + { + /*Y' qi=11 INTRA*/ + { + { 110, -66},{ 176, 1564},{ 316, 3087},{ 492, 4296}, + { 645, 5299},{ 781, 6217},{ 924, 7039},{ 1075, 7776}, + { 1232, 8421},{ 1410, 9005},{ 1607, 9532},{ 1834, 9929}, + { 2053,10300},{ 2249,10697},{ 2427,11184},{ 2619,11682}, + { 2826,12083},{ 3019,12508},{ 3225,12869},{ 3452,13064}, + { 3670,13280},{ 3890,13519},{ 4123,13750},{ 4367,14059} + }, + /*Y' qi=11 INTER*/ + { + { 72, -115},{ 32, 1354},{ 83, 2911},{ 126, 4534}, + { 154, 6080},{ 178, 7475},{ 194, 8779},{ 205,10047}, + { 222,11290},{ 246,12488},{ 281,13621},{ 322,14714}, + { 372,15786},{ 436,16821},{ 519,17813},{ 628,18728}, + { 770,19549},{ 950,20254},{ 1175,20800},{ 1443,21197}, + { 1752,21446},{ 2095,21555},{ 2457,21553},{ 2808,21544} + } + }, + { + /*Cb qi=11 INTRA*/ + { + { 2, 4},{ 45, 367},{ 81, 740},{ 101, 1177}, + { 121, 1650},{ 142, 2136},{ 159, 2621},{ 174, 3075}, + { 199, 3451},{ 234, 3778},{ 265, 4117},{ 297, 4473}, + { 333, 4789},{ 367, 5054},{ 402, 5319},{ 427, 5613}, + { 462, 5871},{ 503, 6107},{ 532, 6336},{ 560, 6584}, + { 601, 6842},{ 631, 7092},{ 662, 7292},{ 721, 7497} + }, + /*Cb qi=11 INTER*/ + { + { 117, -24},{ 93, 308},{ 69, 638},{ 52, 993}, + { 52, 1395},{ 58, 1822},{ 68, 2246},{ 80, 2665}, + { 89, 3082},{ 94, 3492},{ 96, 3900},{ 98, 4299}, + { 101, 4679},{ 103, 5047},{ 104, 5405},{ 106, 5763}, + { 106, 6120},{ 107, 6474},{ 109, 6823},{ 112, 7163}, + { 115, 7516},{ 117, 7868},{ 118, 8213},{ 119, 8561} + } + }, + { + /*Cr qi=11 INTRA*/ + { + { 2, 7},{ 40, 375},{ 75, 761},{ 100, 1224}, + { 119, 1700},{ 137, 2169},{ 154, 2622},{ 178, 3025}, + { 198, 3416},{ 220, 3770},{ 255, 4114},{ 294, 4459}, + { 323, 4756},{ 359, 5028},{ 399, 5292},{ 438, 5556}, + { 483, 5827},{ 518, 6073},{ 551, 6298},{ 598, 6501}, + { 634, 6754},{ 652, 6997},{ 670, 7211},{ 689, 7560} + }, + /*Cr qi=11 INTER*/ + { + { 75, 30},{ 61, 334},{ 51, 639},{ 49, 995}, + { 53, 1403},{ 62, 1821},{ 73, 2237},{ 84, 2654}, + { 91, 3070},{ 95, 3485},{ 96, 3890},{ 98, 4287}, + { 98, 4672},{ 99, 5050},{ 99, 5427},{ 100, 5798}, + { 103, 6169},{ 105, 6528},{ 107, 6881},{ 113, 7233}, + { 118, 7580},{ 121, 7916},{ 125, 8240},{ 130, 8551} + } + } + }, + { + { + /*Y' qi=12 INTRA*/ + { + { 104, -69},{ 182, 1557},{ 335, 3040},{ 521, 4205}, + { 684, 5178},{ 831, 6068},{ 986, 6854},{ 1151, 7559}, + { 1323, 8169},{ 1523, 8704},{ 1736, 9192},{ 1978, 9558}, + { 2213, 9908},{ 2421,10298},{ 2613,10757},{ 2822,11208}, + { 3042,11585},{ 3250,11991},{ 3474,12308},{ 3710,12480}, + { 3939,12687},{ 4174,12902},{ 4416,13102},{ 4672,13369} + }, + /*Y' qi=12 INTER*/ + { + { 52, -91},{ 34, 1355},{ 86, 2911},{ 129, 4518}, + { 159, 6037},{ 184, 7405},{ 200, 8694},{ 213, 9955}, + { 232,11185},{ 263,12360},{ 304,13479},{ 354,14555}, + { 415,15601},{ 495,16608},{ 601,17549},{ 738,18400}, + { 915,19136},{ 1139,19724},{ 1414,20150},{ 1731,20412}, + { 2090,20520},{ 2473,20509},{ 2851,20442},{ 3227,20328} + } + }, + { + /*Cb qi=12 INTRA*/ + { + { 1, 4},{ 46, 367},{ 85, 740},{ 109, 1178}, + { 126, 1650},{ 145, 2134},{ 165, 2617},{ 182, 3061}, + { 209, 3428},{ 245, 3749},{ 281, 4077},{ 316, 4417}, + { 354, 4718},{ 392, 4970},{ 430, 5217},{ 456, 5501}, + { 490, 5771},{ 534, 5996},{ 571, 6207},{ 600, 6458}, + { 644, 6697},{ 675, 6942},{ 707, 7151},{ 766, 7342} + }, + /*Cb qi=12 INTER*/ + { + { 84, -24},{ 73, 311},{ 60, 644},{ 52, 998}, + { 53, 1398},{ 60, 1825},{ 71, 2249},{ 83, 2665}, + { 90, 3081},{ 94, 3490},{ 97, 3893},{ 99, 4286}, + { 102, 4663},{ 104, 5032},{ 105, 5393},{ 106, 5751}, + { 107, 6102},{ 108, 6445},{ 111, 6788},{ 113, 7136}, + { 114, 7483},{ 117, 7828},{ 121, 8163},{ 122, 8496} + } + }, + { + /*Cr qi=12 INTRA*/ + { + { 3, 7},{ 41, 375},{ 78, 761},{ 106, 1225}, + { 124, 1700},{ 140, 2167},{ 163, 2616},{ 188, 3010}, + { 213, 3385},{ 240, 3718},{ 271, 4062},{ 309, 4406}, + { 345, 4691},{ 387, 4956},{ 430, 5212},{ 469, 5467}, + { 513, 5729},{ 554, 5970},{ 587, 6176},{ 633, 6395}, + { 673, 6659},{ 692, 6868},{ 712, 7061},{ 758, 7259} + }, + /*Cr qi=12 INTER*/ + { + { 73, 31},{ 59, 335},{ 48, 638},{ 50, 998}, + { 56, 1410},{ 65, 1827},{ 75, 2240},{ 85, 2657}, + { 92, 3073},{ 95, 3485},{ 97, 3888},{ 99, 4279}, + { 98, 4663},{ 99, 5042},{ 101, 5412},{ 102, 5779}, + { 105, 6142},{ 107, 6498},{ 108, 6848},{ 113, 7198}, + { 118, 7540},{ 121, 7867},{ 127, 8188},{ 132, 8508} + } + } + }, + { + { + /*Y' qi=13 INTRA*/ + { + { 109, -68},{ 187, 1551},{ 347, 3010},{ 541, 4153}, + { 709, 5107},{ 864, 5975},{ 1026, 6745},{ 1194, 7433}, + { 1375, 8021},{ 1581, 8550},{ 1803, 9026},{ 2054, 9371}, + { 2301, 9713},{ 2522,10082},{ 2728,10515},{ 2949,10956}, + { 3184,11297},{ 3408,11653},{ 3643,11946},{ 3886,12100}, + { 4124,12277},{ 4377,12459},{ 4632,12635},{ 4898,12861} + }, + /*Y' qi=13 INTER*/ + { + { 48, -78},{ 35, 1357},{ 89, 2914},{ 133, 4512}, + { 164, 6004},{ 190, 7348},{ 207, 8627},{ 222, 9881}, + { 247,11096},{ 284,12251},{ 333,13350},{ 392,14407}, + { 466,15426},{ 565,16391},{ 696,17279},{ 865,18058}, + { 1085,18689},{ 1358,19156},{ 1684,19456},{ 2050,19605}, + { 2447,19614},{ 2855,19524},{ 3243,19398},{ 3611,19201} + } + }, + { + /*Cb qi=13 INTRA*/ + { + { 2, 4},{ 47, 367},{ 86, 741},{ 108, 1179}, + { 127, 1651},{ 150, 2133},{ 173, 2611},{ 194, 3050}, + { 222, 3417},{ 262, 3733},{ 303, 4048},{ 337, 4375}, + { 378, 4657},{ 420, 4897},{ 456, 5148},{ 486, 5422}, + { 518, 5682},{ 558, 5903},{ 592, 6113},{ 623, 6372}, + { 662, 6628},{ 700, 6833},{ 751, 6989},{ 805, 7147} + }, + /*Cb qi=13 INTER*/ + { + { 94, -34},{ 78, 303},{ 60, 638},{ 51, 994}, + { 54, 1406},{ 61, 1836},{ 73, 2253},{ 84, 2668}, + { 92, 3082},{ 96, 3492},{ 99, 3894},{ 101, 4284}, + { 103, 4659},{ 105, 5023},{ 106, 5376},{ 108, 5726}, + { 109, 6070},{ 110, 6418},{ 113, 6765},{ 117, 7105}, + { 119, 7448},{ 122, 7784},{ 126, 8119},{ 131, 8463} + } + }, + { + /*Cr qi=13 INTRA*/ + { + { 3, 7},{ 43, 375},{ 80, 762},{ 110, 1226}, + { 131, 1701},{ 149, 2166},{ 172, 2610},{ 196, 2999}, + { 221, 3359},{ 254, 3679},{ 292, 4005},{ 332, 4329}, + { 369, 4612},{ 408, 4880},{ 456, 5139},{ 500, 5388}, + { 544, 5631},{ 581, 5877},{ 615, 6101},{ 660, 6316}, + { 692, 6594},{ 714, 6795},{ 736, 6997},{ 789, 7290} + }, + /*Cr qi=13 INTER*/ + { + { 73, 28},{ 61, 336},{ 46, 642},{ 50, 1003}, + { 58, 1414},{ 67, 1832},{ 79, 2245},{ 87, 2660}, + { 93, 3075},{ 97, 3484},{ 99, 3888},{ 100, 4277}, + { 100, 4651},{ 100, 5027},{ 101, 5403},{ 102, 5765}, + { 105, 6116},{ 109, 6470},{ 113, 6825},{ 119, 7163}, + { 124, 7497},{ 127, 7827},{ 131, 8137},{ 135, 8437} + } + } + }, + { + { + /*Y' qi=14 INTRA*/ + { + { 113, -68},{ 191, 1545},{ 358, 2981},{ 559, 4104}, + { 733, 5044},{ 896, 5890},{ 1066, 6636},{ 1241, 7304}, + { 1428, 7886},{ 1642, 8402},{ 1872, 8871},{ 2128, 9219}, + { 2380, 9547},{ 2609, 9908},{ 2825,10321},{ 3055,10728}, + { 3294,11076},{ 3523,11425},{ 3766,11689},{ 4013,11845}, + { 4254,12022},{ 4506,12209},{ 4759,12383},{ 5013,12637} + }, + /*Y' qi=14 INTER*/ + { + { 58, -82},{ 38, 1362},{ 93, 2914},{ 138, 4492}, + { 171, 5962},{ 198, 7289},{ 216, 8559},{ 234, 9804}, + { 263,11005},{ 306,12143},{ 363,13222},{ 434,14259}, + { 523,15255},{ 639,16188},{ 794,17021},{ 1000,17717}, + { 1262,18260},{ 1575,18645},{ 1943,18841},{ 2356,18872}, + { 2782,18802},{ 3194,18682},{ 3576,18559},{ 3923,18447} + } + }, + { + /*Cb qi=14 INTRA*/ + { + { 2, 3},{ 50, 367},{ 91, 741},{ 114, 1180}, + { 134, 1651},{ 157, 2131},{ 181, 2601},{ 208, 3028}, + { 239, 3391},{ 279, 3706},{ 322, 4000},{ 361, 4309}, + { 406, 4587},{ 445, 4822},{ 482, 5067},{ 515, 5344}, + { 546, 5612},{ 589, 5821},{ 626, 6020},{ 655, 6276}, + { 701, 6523},{ 748, 6717},{ 796, 6876},{ 815, 7151} + }, + /*Cb qi=14 INTER*/ + { + { 80, -43},{ 68, 301},{ 56, 644},{ 50, 1004}, + { 54, 1412},{ 63, 1836},{ 75, 2253},{ 87, 2670}, + { 94, 3083},{ 98, 3487},{ 101, 3885},{ 103, 4271}, + { 106, 4645},{ 107, 5004},{ 108, 5358},{ 109, 5705}, + { 112, 6047},{ 115, 6388},{ 118, 6731},{ 121, 7081}, + { 126, 7421},{ 129, 7747},{ 132, 8076},{ 137, 8419} + } + }, + { + /*Cr qi=14 INTRA*/ + { + { 3, 6},{ 45, 375},{ 85, 762},{ 116, 1226}, + { 138, 1700},{ 158, 2163},{ 180, 2602},{ 206, 2985}, + { 236, 3333},{ 270, 3639},{ 310, 3956},{ 359, 4258}, + { 397, 4524},{ 430, 4802},{ 478, 5068},{ 527, 5316}, + { 572, 5560},{ 613, 5802},{ 654, 6012},{ 699, 6216}, + { 734, 6489},{ 755, 6707},{ 775, 6898},{ 841, 7111} + }, + /*Cr qi=14 INTER*/ + { + { 78, 0},{ 59, 322},{ 46, 649},{ 51, 1016}, + { 58, 1422},{ 68, 1839},{ 81, 2253},{ 90, 2666}, + { 95, 3080},{ 98, 3486},{ 101, 3881},{ 102, 4268}, + { 102, 4644},{ 103, 5017},{ 105, 5382},{ 106, 5743}, + { 108, 6093},{ 112, 6442},{ 118, 6791},{ 124, 7130}, + { 127, 7463},{ 133, 7784},{ 138, 8085},{ 142, 8395} + } + } + }, + { + { + /*Y' qi=15 INTRA*/ + { + { 111, -66},{ 197, 1538},{ 370, 2949},{ 579, 4050}, + { 762, 4968},{ 933, 5798},{ 1112, 6520},{ 1299, 7161}, + { 1497, 7725},{ 1723, 8219},{ 1967, 8654},{ 2234, 8990}, + { 2499, 9302},{ 2740, 9637},{ 2968,10039},{ 3215,10414}, + { 3473,10709},{ 3721,11015},{ 3971,11270},{ 4228,11402}, + { 4487,11543},{ 4752,11707},{ 5011,11871},{ 5290,12099} + }, + /*Y' qi=15 INTER*/ + { + { 59, -113},{ 37, 1349},{ 95, 2904},{ 139, 4478}, + { 174, 5929},{ 201, 7244},{ 220, 8505},{ 241, 9736}, + { 275,10922},{ 327,12040},{ 395,13097},{ 477,14114}, + { 585,15071},{ 730,15947},{ 917,16714},{ 1162,17326}, + { 1468,17770},{ 1833,18029},{ 2251,18111},{ 2694,18068}, + { 3125,17968},{ 3529,17845},{ 3908,17713},{ 4260,17587} + } + }, + { + /*Cb qi=15 INTRA*/ + { + { 2, 3},{ 51, 367},{ 94, 741},{ 120, 1180}, + { 140, 1651},{ 160, 2129},{ 184, 2591},{ 213, 3010}, + { 246, 3371},{ 289, 3680},{ 335, 3969},{ 374, 4274}, + { 418, 4546},{ 460, 4783},{ 498, 5019},{ 532, 5280}, + { 565, 5553},{ 608, 5765},{ 647, 5958},{ 683, 6193}, + { 732, 6433},{ 782, 6620},{ 832, 6769},{ 848, 7027} + }, + /*Cb qi=15 INTER*/ + { + { 71, -52},{ 63, 296},{ 54, 644},{ 50, 1010}, + { 53, 1417},{ 64, 1837},{ 77, 2253},{ 88, 2666}, + { 95, 3079},{ 98, 3487},{ 100, 3882},{ 103, 4264}, + { 106, 4633},{ 108, 4991},{ 109, 5343},{ 109, 5693}, + { 112, 6038},{ 114, 6371},{ 119, 6709},{ 123, 7051}, + { 125, 7385},{ 130, 7716},{ 135, 8050},{ 140, 8374} + } + }, + { + /*Cr qi=15 INTRA*/ + { + { 2, 6},{ 47, 375},{ 87, 763},{ 119, 1225}, + { 143, 1699},{ 162, 2158},{ 185, 2595},{ 213, 2971}, + { 246, 3315},{ 279, 3618},{ 320, 3920},{ 372, 4210}, + { 409, 4480},{ 446, 4756},{ 496, 5017},{ 542, 5263}, + { 590, 5487},{ 639, 5721},{ 687, 5923},{ 724, 6132}, + { 753, 6417},{ 781, 6622},{ 805, 6806},{ 856, 6977} + }, + /*Cr qi=15 INTER*/ + { + { 71, 3},{ 61, 326},{ 52, 651},{ 50, 1017}, + { 58, 1422},{ 69, 1837},{ 82, 2251},{ 90, 2668}, + { 95, 3080},{ 98, 3484},{ 101, 3877},{ 102, 4257}, + { 102, 4632},{ 101, 5005},{ 103, 5370},{ 106, 5733}, + { 110, 6082},{ 116, 6424},{ 120, 6774},{ 124, 7106}, + { 130, 7427},{ 135, 7748},{ 141, 8052},{ 147, 8333} + } + } + }, + { + { + /*Y' qi=16 INTRA*/ + { + { 114, -63},{ 206, 1525},{ 396, 2887},{ 618, 3945}, + { 816, 4832},{ 1002, 5626},{ 1196, 6319},{ 1401, 6923}, + { 1616, 7458},{ 1857, 7928},{ 2121, 8334},{ 2405, 8645}, + { 2685, 8934},{ 2938, 9255},{ 3175, 9638},{ 3433, 9990}, + { 3707,10263},{ 3958,10577},{ 4218,10807},{ 4488,10906}, + { 4760,11028},{ 5037,11148},{ 5306,11286},{ 5625,11463} + }, + /*Y' qi=16 INTER*/ + { + { 69, -153},{ 39, 1348},{ 98, 2894},{ 144, 4448}, + { 181, 5872},{ 209, 7167},{ 228, 8422},{ 254, 9644}, + { 297,10810},{ 359,11908},{ 438,12944},{ 539,13930}, + { 672,14842},{ 850,15650},{ 1085,16318},{ 1391,16793}, + { 1769,17082},{ 2200,17198},{ 2659,17174},{ 3116,17072}, + { 3547,16948},{ 3943,16819},{ 4299,16701},{ 4611,16644} + } + }, + { + /*Cb qi=16 INTRA*/ + { + { 3, 4},{ 54, 367},{ 97, 742},{ 122, 1181}, + { 143, 1651},{ 168, 2123},{ 197, 2575},{ 226, 2985}, + { 263, 3338},{ 314, 3631},{ 367, 3903},{ 409, 4200}, + { 453, 4468},{ 491, 4703},{ 528, 4932},{ 566, 5188}, + { 601, 5459},{ 647, 5672},{ 693, 5844},{ 734, 6058}, + { 784, 6305},{ 836, 6460},{ 882, 6602},{ 905, 6891} + }, + /*Cb qi=16 INTER*/ + { + { 75, -64},{ 67, 292},{ 56, 645},{ 51, 1016}, + { 54, 1421},{ 66, 1842},{ 79, 2257},{ 89, 2670}, + { 95, 3082},{ 98, 3488},{ 101, 3879},{ 104, 4258}, + { 106, 4623},{ 108, 4974},{ 109, 5321},{ 113, 5664}, + { 116, 6001},{ 117, 6341},{ 123, 6677},{ 128, 7004}, + { 130, 7336},{ 136, 7671},{ 143, 7996},{ 148, 8310} + } + }, + { + /*Cr qi=16 INTRA*/ + { + { 4, 7},{ 50, 375},{ 90, 763},{ 124, 1225}, + { 148, 1698},{ 168, 2154},{ 195, 2582},{ 227, 2948}, + { 263, 3279},{ 302, 3575},{ 343, 3865},{ 394, 4137}, + { 439, 4402},{ 482, 4672},{ 533, 4925},{ 579, 5165}, + { 626, 5382},{ 675, 5616},{ 725, 5812},{ 769, 5991}, + { 810, 6242},{ 848, 6430},{ 868, 6615},{ 944, 6732} + }, + /*Cr qi=16 INTER*/ + { + { 78, 11},{ 62, 327},{ 49, 650},{ 50, 1025}, + { 59, 1431},{ 72, 1841},{ 83, 2253},{ 90, 2671}, + { 95, 3084},{ 98, 3487},{ 100, 3879},{ 101, 4254}, + { 102, 4625},{ 103, 4994},{ 106, 5355},{ 108, 5708}, + { 111, 6058},{ 115, 6400},{ 121, 6733},{ 128, 7058}, + { 134, 7374},{ 140, 7691},{ 146, 7993},{ 146, 8317} + } + } + }, + { + { + /*Y' qi=17 INTRA*/ + { + { 112, -59},{ 210, 1515},{ 409, 2850},{ 640, 3882}, + { 844, 4748},{ 1038, 5529},{ 1240, 6206},{ 1452, 6803}, + { 1676, 7330},{ 1925, 7792},{ 2194, 8201},{ 2483, 8512}, + { 2766, 8801},{ 3027, 9121},{ 3279, 9482},{ 3548, 9810}, + { 3825,10069},{ 4088,10345},{ 4362,10544},{ 4638,10644}, + { 4915,10744},{ 5196,10850},{ 5471,10981},{ 5802,11136} + }, + /*Y' qi=17 INTER*/ + { + { 70, -147},{ 45, 1349},{ 106, 2894},{ 155, 4425}, + { 195, 5818},{ 225, 7099},{ 247, 8348},{ 278, 9565}, + { 328,10717},{ 399,11794},{ 491,12807},{ 609,13760}, + { 766,14623},{ 984,15349},{ 1274,15902},{ 1642,16256}, + { 2082,16411},{ 2563,16409},{ 3048,16315},{ 3508,16194}, + { 3924,16064},{ 4306,15938},{ 4656,15828},{ 4966,15733} + } + }, + { + /*Cb qi=17 INTRA*/ + { + { 3, 4},{ 57, 367},{ 101, 742},{ 126, 1182}, + { 148, 1650},{ 175, 2118},{ 207, 2565},{ 241, 2966}, + { 279, 3307},{ 331, 3588},{ 389, 3845},{ 435, 4132}, + { 474, 4408},{ 517, 4641},{ 560, 4869},{ 602, 5122}, + { 638, 5389},{ 672, 5610},{ 716, 5787},{ 758, 6002}, + { 817, 6226},{ 869, 6393},{ 916, 6530},{ 950, 6799} + }, + /*Cb qi=17 INTER*/ + { + { 105, -65},{ 86, 288},{ 66, 638},{ 54, 1014}, + { 59, 1427},{ 71, 1844},{ 86, 2257},{ 95, 2668}, + { 100, 3075},{ 103, 3476},{ 106, 3867},{ 110, 4241}, + { 112, 4598},{ 114, 4948},{ 117, 5294},{ 121, 5633}, + { 123, 5968},{ 126, 6301},{ 131, 6637},{ 136, 6968}, + { 144, 7287},{ 152, 7606},{ 158, 7931},{ 162, 8262} + } + }, + { + /*Cr qi=17 INTRA*/ + { + { 4, 6},{ 55, 376},{ 97, 765},{ 128, 1226}, + { 152, 1696},{ 175, 2144},{ 204, 2568},{ 241, 2928}, + { 282, 3250},{ 323, 3530},{ 368, 3811},{ 420, 4089}, + { 463, 4347},{ 505, 4609},{ 562, 4860},{ 609, 5094}, + { 655, 5303},{ 709, 5535},{ 759, 5740},{ 803, 5913}, + { 844, 6153},{ 879, 6350},{ 905, 6527},{ 972, 6637} + }, + /*Cr qi=17 INTER*/ + { + { 88, 8},{ 68, 330},{ 51, 653},{ 54, 1028}, + { 65, 1433},{ 77, 1845},{ 89, 2257},{ 96, 2669}, + { 100, 3081},{ 102, 3481},{ 105, 3867},{ 106, 4245}, + { 108, 4613},{ 110, 4971},{ 112, 5328},{ 115, 5679}, + { 120, 6019},{ 127, 6355},{ 133, 6686},{ 140, 7007}, + { 149, 7316},{ 158, 7618},{ 166, 7924},{ 170, 8232} + } + } + }, + { + { + /*Y' qi=18 INTRA*/ + { + { 122, -58},{ 216, 1506},{ 425, 2815},{ 665, 3822}, + { 882, 4666},{ 1088, 5425},{ 1301, 6084},{ 1529, 6653}, + { 1766, 7162},{ 2026, 7611},{ 2312, 7987},{ 2612, 8278}, + { 2913, 8551},{ 3196, 8840},{ 3454, 9184},{ 3734, 9490}, + { 4030, 9725},{ 4305, 9973},{ 4585,10162},{ 4864,10251}, + { 5150,10324},{ 5443,10420},{ 5727,10536},{ 6053,10682} + }, + /*Y' qi=18 INTER*/ + { + { 66, -143},{ 47, 1351},{ 108, 2886},{ 158, 4401}, + { 200, 5775},{ 232, 7044},{ 256, 8288},{ 292, 9493}, + { 351,10625},{ 434,11679},{ 541,12665},{ 681,13578}, + { 875,14379},{ 1136,15025},{ 1483,15475},{ 1914,15709}, + { 2399,15767},{ 2907,15699},{ 3400,15579},{ 3852,15453}, + { 4259,15332},{ 4630,15221},{ 4976,15121},{ 5294,15061} + } + }, + { + /*Cb qi=18 INTRA*/ + { + { 2, 3},{ 61, 367},{ 107, 743},{ 131, 1182}, + { 155, 1648},{ 183, 2110},{ 220, 2542},{ 260, 2927}, + { 303, 3265},{ 359, 3540},{ 416, 3785},{ 462, 4063}, + { 506, 4334},{ 553, 4567},{ 595, 4797},{ 636, 5049}, + { 676, 5304},{ 717, 5516},{ 759, 5698},{ 801, 5904}, + { 861, 6133},{ 911, 6311},{ 962, 6443},{ 1021, 6645} + }, + /*Cb qi=18 INTER*/ + { + { 126, 5},{ 95, 326},{ 66, 643},{ 55, 1015}, + { 60, 1427},{ 73, 1843},{ 87, 2256},{ 96, 2667}, + { 101, 3073},{ 104, 3470},{ 108, 3853},{ 111, 4226}, + { 114, 4584},{ 117, 4928},{ 119, 5274},{ 122, 5612}, + { 126, 5942},{ 130, 6271},{ 136, 6606},{ 141, 6931}, + { 148, 7247},{ 156, 7568},{ 164, 7891},{ 173, 8211} + } + }, + { + /*Cr qi=18 INTRA*/ + { + { 4, 6},{ 59, 376},{ 104, 765},{ 133, 1226}, + { 156, 1692},{ 184, 2136},{ 218, 2548},{ 260, 2893}, + { 308, 3204},{ 348, 3481},{ 397, 3751},{ 448, 4024}, + { 490, 4281},{ 541, 4523},{ 593, 4776},{ 634, 5022}, + { 685, 5236},{ 748, 5455},{ 812, 5638},{ 856, 5818}, + { 891, 6048},{ 928, 6230},{ 961, 6405},{ 1055, 6449} + }, + /*Cr qi=18 INTER*/ + { + { 81, 34},{ 68, 342},{ 57, 652},{ 59, 1027}, + { 67, 1439},{ 80, 1848},{ 91, 2257},{ 97, 2670}, + { 100, 3076},{ 103, 3473},{ 106, 3857},{ 108, 4231}, + { 109, 4599},{ 110, 4958},{ 113, 5307},{ 119, 5650}, + { 125, 5991},{ 130, 6325},{ 138, 6651},{ 147, 6971}, + { 153, 7278},{ 162, 7578},{ 172, 7874},{ 177, 8156} + } + } + }, + { + { + /*Y' qi=19 INTRA*/ + { + { 128, -55},{ 228, 1495},{ 448, 2775},{ 699, 3758}, + { 931, 4571},{ 1154, 5296},{ 1386, 5914},{ 1636, 6450}, + { 1894, 6930},{ 2177, 7342},{ 2479, 7698},{ 2792, 7976}, + { 3099, 8235},{ 3392, 8517},{ 3658, 8853},{ 3938, 9155}, + { 4242, 9371},{ 4527, 9605},{ 4810, 9781},{ 5089, 9853}, + { 5378, 9920},{ 5674,10009},{ 5972,10110},{ 6336,10196} + }, + /*Y' qi=19 INTER*/ + { + { 69, -147},{ 49, 1353},{ 111, 2883},{ 162, 4381}, + { 205, 5737},{ 237, 6996},{ 264, 8232},{ 307, 9421}, + { 376,10534},{ 472,11567},{ 596,12525},{ 761,13395}, + { 990,14130},{ 1298,14694},{ 1695,15053},{ 2172,15195}, + { 2696,15173},{ 3213,15075},{ 3696,14948},{ 4141,14829}, + { 4541,14721},{ 4910,14609},{ 5245,14506},{ 5536,14399} + } + }, + { + /*Cb qi=19 INTRA*/ + { + { 3, 3},{ 61, 367},{ 109, 743},{ 135, 1182}, + { 161, 1646},{ 191, 2101},{ 229, 2524},{ 273, 2898}, + { 318, 3221},{ 376, 3490},{ 436, 3731},{ 487, 3994}, + { 539, 4251},{ 584, 4485},{ 621, 4721},{ 664, 4967}, + { 709, 5225},{ 752, 5431},{ 801, 5595},{ 846, 5796}, + { 912, 6011},{ 959, 6193},{ 1015, 6321},{ 1121, 6504} + }, + /*Cb qi=19 INTER*/ + { + { 126, 4},{ 97, 329},{ 69, 649},{ 56, 1017}, + { 61, 1432},{ 74, 1846},{ 88, 2255},{ 98, 2663}, + { 103, 3065},{ 106, 3460},{ 110, 3844},{ 114, 4211}, + { 117, 4564},{ 120, 4911},{ 122, 5253},{ 125, 5588}, + { 129, 5916},{ 135, 6241},{ 142, 6567},{ 149, 6885}, + { 155, 7206},{ 163, 7527},{ 174, 7843},{ 188, 8145} + } + }, + { + /*Cr qi=19 INTRA*/ + { + { 5, 6},{ 61, 376},{ 106, 765},{ 135, 1225}, + { 160, 1689},{ 192, 2126},{ 229, 2531},{ 271, 2869}, + { 321, 3168},{ 370, 3433},{ 421, 3704},{ 476, 3965}, + { 520, 4212},{ 572, 4452},{ 629, 4691},{ 671, 4939}, + { 724, 5152},{ 792, 5347},{ 858, 5510},{ 895, 5696}, + { 939, 5905},{ 991, 6056},{ 1027, 6244},{ 1127, 6333} + }, + /*Cr qi=19 INTER*/ + { + { 80, 45},{ 66, 344},{ 55, 654},{ 56, 1030}, + { 66, 1440},{ 80, 1850},{ 91, 2259},{ 98, 2668}, + { 102, 3072},{ 104, 3466},{ 107, 3845},{ 109, 4215}, + { 110, 4578},{ 112, 4933},{ 116, 5283},{ 122, 5625}, + { 129, 5963},{ 136, 6287},{ 143, 6611},{ 151, 6927}, + { 160, 7229},{ 170, 7528},{ 181, 7818},{ 191, 8092} + } + } + }, + { + { + /*Y' qi=20 INTRA*/ + { + { 129, -50},{ 238, 1481},{ 469, 2728},{ 730, 3684}, + { 974, 4473},{ 1213, 5171},{ 1463, 5763},{ 1729, 6281}, + { 2002, 6744},{ 2299, 7146},{ 2613, 7492},{ 2940, 7746}, + { 3265, 7978},{ 3571, 8228},{ 3853, 8543},{ 4156, 8815}, + { 4476, 9001},{ 4775, 9218},{ 5070, 9373},{ 5352, 9446}, + { 5649, 9510},{ 5956, 9580},{ 6268, 9660},{ 6647, 9705} + }, + /*Y' qi=20 INTER*/ + { + { 64, -93},{ 52, 1340},{ 116, 2862},{ 170, 4344}, + { 216, 5678},{ 249, 6928},{ 281, 8155},{ 333, 9326}, + { 418,10410},{ 533,11411},{ 683,12329},{ 890,13127}, + { 1183,13750},{ 1579,14162},{ 2066,14357},{ 2611,14370}, + { 3159,14284},{ 3675,14167},{ 4142,14053},{ 4568,13953}, + { 4961,13852},{ 5320,13755},{ 5649,13675},{ 5933,13610} + } + }, + { + /*Cb qi=20 INTRA*/ + { + { 3, 3},{ 62, 367},{ 112, 743},{ 140, 1183}, + { 165, 1646},{ 196, 2099},{ 235, 2517},{ 284, 2883}, + { 334, 3198},{ 393, 3460},{ 457, 3690},{ 509, 3945}, + { 560, 4198},{ 605, 4435},{ 647, 4658},{ 699, 4888}, + { 742, 5155},{ 788, 5350},{ 835, 5517},{ 880, 5730}, + { 956, 5914},{ 1007, 6060},{ 1053, 6199},{ 1158, 6358} + }, + /*Cb qi=20 INTER*/ + { + { 128, -6},{ 96, 322},{ 66, 653},{ 54, 1025}, + { 63, 1431},{ 79, 1844},{ 91, 2256},{ 99, 2665}, + { 104, 3065},{ 107, 3455},{ 111, 3831},{ 115, 4189}, + { 120, 4539},{ 123, 4885},{ 126, 5219},{ 130, 5548}, + { 135, 5876},{ 141, 6199},{ 149, 6519},{ 156, 6837}, + { 166, 7153},{ 179, 7468},{ 189, 7784},{ 194, 8102} + } + }, + { + /*Cr qi=20 INTRA*/ + { + { 4, 6},{ 63, 376},{ 109, 765},{ 139, 1225}, + { 165, 1689},{ 199, 2124},{ 239, 2523},{ 285, 2852}, + { 340, 3140},{ 388, 3398},{ 438, 3662},{ 499, 3914}, + { 547, 4155},{ 596, 4392},{ 652, 4634},{ 699, 4877}, + { 759, 5074},{ 824, 5257},{ 883, 5428},{ 936, 5589}, + { 986, 5790},{ 1030, 5960},{ 1074, 6119},{ 1172, 6191} + }, + /*Cr qi=20 INTER*/ + { + { 92, 40},{ 70, 345},{ 55, 658},{ 57, 1034}, + { 69, 1441},{ 84, 1852},{ 94, 2261},{ 98, 2669}, + { 102, 3074},{ 105, 3465},{ 107, 3841},{ 110, 4206}, + { 112, 4562},{ 116, 4915},{ 121, 5260},{ 127, 5591}, + { 134, 5920},{ 142, 6246},{ 153, 6562},{ 163, 6870}, + { 173, 7170},{ 186, 7463},{ 198, 7746},{ 199, 8030} + } + } + }, + { + { + /*Y' qi=21 INTRA*/ + { + { 130, -51},{ 244, 1476},{ 483, 2705},{ 756, 3635}, + { 1013, 4396},{ 1266, 5070},{ 1530, 5647},{ 1806, 6153}, + { 2093, 6600},{ 2411, 6976},{ 2739, 7299},{ 3079, 7534}, + { 3422, 7744},{ 3738, 7987},{ 4032, 8274},{ 4348, 8533}, + { 4675, 8721},{ 4989, 8909},{ 5291, 9051},{ 5577, 9111}, + { 5879, 9163},{ 6190, 9228},{ 6506, 9286},{ 6899, 9295} + }, + /*Y' qi=21 INTER*/ + { + { 64, -56},{ 55, 1341},{ 119, 2859},{ 174, 4324}, + { 223, 5640},{ 258, 6880},{ 295, 8096},{ 359, 9246}, + { 460,10302},{ 595,11268},{ 778,12131},{ 1032,12857}, + { 1387,13385},{ 1850,13683},{ 2399,13774},{ 2976,13729}, + { 3527,13619},{ 4034,13504},{ 4492,13401},{ 4912,13291}, + { 5298,13209},{ 5648,13137},{ 5974,13046},{ 6308,12977} + } + }, + { + /*Cb qi=21 INTRA*/ + { + { 4, 3},{ 64, 367},{ 114, 743},{ 141, 1183}, + { 166, 1645},{ 201, 2092},{ 247, 2502},{ 299, 2856}, + { 352, 3158},{ 413, 3412},{ 480, 3642},{ 536, 3893}, + { 588, 4137},{ 637, 4367},{ 678, 4598},{ 725, 4834}, + { 774, 5083},{ 827, 5269},{ 883, 5420},{ 930, 5633}, + { 999, 5829},{ 1057, 5959},{ 1113, 6082},{ 1200, 6265} + }, + /*Cb qi=21 INTER*/ + { + { 109, -8},{ 84, 321},{ 62, 654},{ 54, 1028}, + { 64, 1434},{ 80, 1847},{ 92, 2259},{ 100, 2664}, + { 105, 3060},{ 109, 3445},{ 114, 3815},{ 118, 4172}, + { 122, 4519},{ 126, 4861},{ 128, 5194},{ 133, 5520}, + { 139, 5847},{ 146, 6169},{ 155, 6487},{ 166, 6801}, + { 177, 7114},{ 189, 7423},{ 201, 7729},{ 208, 8035} + } + }, + { + /*Cr qi=21 INTRA*/ + { + { 4, 6},{ 64, 377},{ 111, 766},{ 144, 1225}, + { 174, 1683},{ 206, 2114},{ 248, 2506},{ 302, 2824}, + { 357, 3099},{ 404, 3357},{ 455, 3622},{ 519, 3867}, + { 573, 4098},{ 625, 4331},{ 683, 4571},{ 733, 4802}, + { 793, 4994},{ 863, 5173},{ 926, 5337},{ 978, 5492}, + { 1030, 5685},{ 1079, 5856},{ 1126, 6027},{ 1217, 6159} + }, + /*Cr qi=21 INTER*/ + { + { 82, 29},{ 67, 341},{ 55, 660},{ 58, 1038}, + { 71, 1443},{ 85, 1851},{ 95, 2258},{ 99, 2666}, + { 103, 3069},{ 107, 3456},{ 110, 3826},{ 112, 4188}, + { 114, 4544},{ 118, 4891},{ 124, 5231},{ 132, 5567}, + { 139, 5894},{ 148, 6210},{ 159, 6520},{ 171, 6822}, + { 185, 7111},{ 196, 7403},{ 209, 7691},{ 225, 7945} + } + } + }, + { + { + /*Y' qi=22 INTRA*/ + { + { 128, -45},{ 254, 1463},{ 507, 2662},{ 794, 3562}, + { 1070, 4292},{ 1340, 4941},{ 1622, 5492},{ 1920, 5968}, + { 2229, 6387},{ 2565, 6742},{ 2911, 7047},{ 3263, 7264}, + { 3615, 7464},{ 3944, 7689},{ 4258, 7950},{ 4591, 8183}, + { 4934, 8347},{ 5259, 8517},{ 5573, 8634},{ 5870, 8683}, + { 6186, 8723},{ 6508, 8762},{ 6831, 8801},{ 7232, 8830} + }, + /*Y' qi=22 INTER*/ + { + { 77, -48},{ 57, 1343},{ 122, 2853},{ 180, 4299}, + { 231, 5597},{ 269, 6826},{ 314, 8025},{ 393, 9150}, + { 512,10179},{ 673,11103},{ 894,11908},{ 1207,12542}, + { 1635,12956},{ 2166,13148},{ 2755,13167},{ 3345,13088}, + { 3895,12966},{ 4386,12848},{ 4832,12746},{ 5252,12647}, + { 5634,12563},{ 5978,12497},{ 6299,12412},{ 6633,12338} + } + }, + { + /*Cb qi=22 INTRA*/ + { + { 4, 3},{ 66, 367},{ 122, 744},{ 153, 1182}, + { 177, 1640},{ 213, 2080},{ 263, 2475},{ 323, 2811}, + { 382, 3103},{ 451, 3346},{ 522, 3568},{ 581, 3814}, + { 633, 4054},{ 674, 4288},{ 719, 4523},{ 768, 4756}, + { 823, 4979},{ 883, 5162},{ 937, 5325},{ 996, 5510}, + { 1070, 5687},{ 1129, 5807},{ 1193, 5929},{ 1311, 6099} + }, + /*Cb qi=22 INTER*/ + { + { 107, -5},{ 83, 322},{ 61, 653},{ 55, 1030}, + { 66, 1436},{ 81, 1845},{ 94, 2253},{ 102, 2656}, + { 107, 3050},{ 111, 3435},{ 115, 3804},{ 119, 4158}, + { 124, 4501},{ 128, 4835},{ 132, 5164},{ 138, 5490}, + { 146, 5812},{ 154, 6128},{ 163, 6442},{ 174, 6754}, + { 188, 7060},{ 205, 7361},{ 219, 7662},{ 233, 7953} + } + }, + { + /*Cr qi=22 INTRA*/ + { + { 4, 6},{ 67, 378},{ 118, 767},{ 151, 1222}, + { 182, 1675},{ 221, 2097},{ 269, 2476},{ 329, 2774}, + { 389, 3039},{ 444, 3292},{ 500, 3545},{ 560, 3788}, + { 615, 4020},{ 671, 4251},{ 734, 4484},{ 781, 4712}, + { 850, 4887},{ 925, 5060},{ 981, 5229},{ 1031, 5369}, + { 1092, 5549},{ 1148, 5715},{ 1200, 5861},{ 1291, 5943} + }, + /*Cr qi=22 INTER*/ + { + { 88, 34},{ 69, 340},{ 57, 657},{ 60, 1039}, + { 73, 1445},{ 87, 1851},{ 96, 2257},{ 100, 2662}, + { 103, 3058},{ 107, 3442},{ 111, 3812},{ 115, 4172}, + { 118, 4524},{ 123, 4864},{ 129, 5199},{ 136, 5531}, + { 145, 5855},{ 156, 6168},{ 170, 6468},{ 184, 6765}, + { 193, 7066},{ 207, 7353},{ 222, 7628},{ 230, 7900} + } + } + }, + { + { + /*Y' qi=23 INTRA*/ + { + { 126, -40},{ 257, 1458},{ 521, 2636},{ 825, 3501}, + { 1111, 4207},{ 1391, 4842},{ 1684, 5385},{ 1992, 5858}, + { 2311, 6277},{ 2653, 6626},{ 3005, 6929},{ 3366, 7134}, + { 3729, 7311},{ 4071, 7526},{ 4396, 7770},{ 4734, 7986}, + { 5086, 8131},{ 5421, 8286},{ 5735, 8404},{ 6033, 8456}, + { 6357, 8486},{ 6682, 8525},{ 7003, 8573},{ 7387, 8604} + }, + /*Y' qi=23 INTER*/ + { + { 64, -57},{ 60, 1345},{ 124, 2853},{ 185, 4284}, + { 239, 5565},{ 282, 6783},{ 336, 7967},{ 429, 9069}, + { 568,10063},{ 758,10943},{ 1028,11679},{ 1407,12216}, + { 1909,12520},{ 2502,12616},{ 3126,12573},{ 3722,12461}, + { 4258,12344},{ 4742,12236},{ 5185,12136},{ 5590,12052}, + { 5970,11980},{ 6315,11901},{ 6631,11826},{ 6954,11769} + } + }, + { + /*Cb qi=23 INTRA*/ + { + { 3, 3},{ 70, 367},{ 124, 744},{ 151, 1182}, + { 181, 1637},{ 222, 2071},{ 276, 2460},{ 343, 2785}, + { 403, 3072},{ 468, 3317},{ 542, 3534},{ 605, 3773}, + { 659, 4009},{ 703, 4243},{ 747, 4479},{ 795, 4707}, + { 852, 4923},{ 908, 5105},{ 972, 5254},{ 1043, 5423}, + { 1118, 5594},{ 1172, 5731},{ 1240, 5853},{ 1365, 6005} + }, + /*Cb qi=23 INTER*/ + { + { 109, -10},{ 87, 325},{ 63, 650},{ 57, 1031}, + { 67, 1439},{ 83, 1847},{ 96, 2253},{ 103, 2652}, + { 109, 3041},{ 114, 3421},{ 117, 3789},{ 122, 4141}, + { 128, 4480},{ 134, 4811},{ 139, 5138},{ 144, 5463}, + { 152, 5781},{ 161, 6096},{ 174, 6404},{ 185, 6714}, + { 198, 7023},{ 216, 7320},{ 233, 7621},{ 245, 7935} + } + }, + { + /*Cr qi=23 INTRA*/ + { + { 5, 6},{ 70, 379},{ 122, 768},{ 155, 1222}, + { 187, 1671},{ 231, 2088},{ 283, 2459},{ 346, 2750}, + { 411, 3009},{ 465, 3261},{ 523, 3509},{ 585, 3746}, + { 639, 3980},{ 695, 4219},{ 754, 4449},{ 803, 4671}, + { 873, 4840},{ 953, 5001},{ 1015, 5156},{ 1071, 5286}, + { 1137, 5464},{ 1191, 5629},{ 1249, 5782},{ 1359, 5885} + }, + /*Cr qi=23 INTER*/ + { + { 84, 29},{ 69, 343},{ 58, 660},{ 62, 1041}, + { 75, 1448},{ 88, 1853},{ 97, 2258},{ 102, 2659}, + { 105, 3050},{ 108, 3430},{ 113, 3799},{ 116, 4155}, + { 121, 4505},{ 126, 4845},{ 132, 5176},{ 142, 5504}, + { 153, 5826},{ 165, 6133},{ 180, 6432},{ 197, 6722}, + { 212, 7005},{ 226, 7287},{ 244, 7555},{ 258, 7828} + } + } + }, + { + { + /*Y' qi=24 INTRA*/ + { + { 125, -34},{ 268, 1444},{ 547, 2590},{ 866, 3422}, + { 1172, 4098},{ 1476, 4702},{ 1790, 5222},{ 2117, 5678}, + { 2453, 6080},{ 2811, 6418},{ 3178, 6700},{ 3552, 6895}, + { 3928, 7055},{ 4286, 7243},{ 4627, 7477},{ 4981, 7674}, + { 5344, 7802},{ 5683, 7944},{ 6009, 8043},{ 6313, 8082}, + { 6633, 8111},{ 6959, 8151},{ 7280, 8197},{ 7660, 8221} + }, + /*Y' qi=24 INTER*/ + { + { 62, -63},{ 68, 1345},{ 134, 2840},{ 199, 4245}, + { 256, 5508},{ 304, 6715},{ 371, 7880},{ 484, 8950}, + { 652, 9899},{ 892,10709},{ 1238,11334},{ 1722,11722}, + { 2326,11875},{ 2983,11864},{ 3616,11783},{ 4189,11678}, + { 4707,11570},{ 5178,11476},{ 5617,11395},{ 6017,11319}, + { 6380,11252},{ 6720,11185},{ 7044,11126},{ 7377,11118} + } + }, + { + /*Cb qi=24 INTRA*/ + { + { 4, 3},{ 75, 367},{ 132, 745},{ 159, 1182}, + { 187, 1634},{ 230, 2061},{ 289, 2439},{ 361, 2753}, + { 425, 3034},{ 492, 3278},{ 566, 3490},{ 630, 3720}, + { 686, 3956},{ 732, 4190},{ 777, 4420},{ 829, 4637}, + { 894, 4840},{ 958, 5012},{ 1023, 5155},{ 1090, 5326}, + { 1165, 5502},{ 1226, 5622},{ 1299, 5717},{ 1408, 5887} + }, + /*Cb qi=24 INTER*/ + { + { 110, 35},{ 92, 337},{ 70, 651},{ 63, 1033}, + { 74, 1440},{ 91, 1846},{ 102, 2248},{ 109, 2644}, + { 114, 3031},{ 120, 3404},{ 127, 3762},{ 133, 4109}, + { 138, 4445},{ 144, 4772},{ 151, 5094},{ 159, 5411}, + { 168, 5728},{ 180, 6037},{ 195, 6338},{ 210, 6640}, + { 227, 6944},{ 249, 7236},{ 272, 7528},{ 299, 7809} + } + }, + { + /*Cr qi=24 INTRA*/ + { + { 5, 6},{ 72, 380},{ 124, 770},{ 158, 1222}, + { 195, 1668},{ 240, 2079},{ 297, 2438},{ 367, 2715}, + { 433, 2966},{ 488, 3218},{ 549, 3467},{ 609, 3701}, + { 664, 3935},{ 728, 4165},{ 792, 4379},{ 845, 4586}, + { 917, 4744},{ 995, 4898},{ 1063, 5049},{ 1120, 5187}, + { 1190, 5359},{ 1249, 5522},{ 1304, 5672},{ 1397, 5806} + }, + /*Cr qi=24 INTER*/ + { + { 91, 56},{ 73, 353},{ 61, 664},{ 66, 1045}, + { 80, 1449},{ 95, 1851},{ 103, 2250},{ 107, 2648}, + { 111, 3038},{ 116, 3413},{ 120, 3774},{ 124, 4128}, + { 130, 4471},{ 138, 4802},{ 145, 5130},{ 156, 5453}, + { 171, 5764},{ 187, 6061},{ 204, 6355},{ 220, 6643}, + { 238, 6923},{ 254, 7204},{ 275, 7475},{ 289, 7752} + } + } + }, + { + { + /*Y' qi=25 INTRA*/ + { + { 125, -28},{ 285, 1426},{ 582, 2540},{ 917, 3351}, + { 1244, 3997},{ 1569, 4570},{ 1903, 5071},{ 2258, 5498}, + { 2626, 5866},{ 3002, 6182},{ 3382, 6448},{ 3770, 6623}, + { 4162, 6760},{ 4528, 6934},{ 4882, 7144},{ 5249, 7328}, + { 5610, 7453},{ 5958, 7578},{ 6291, 7672},{ 6597, 7708}, + { 6928, 7715},{ 7258, 7737},{ 7575, 7781},{ 7950, 7829} + }, + /*Y' qi=25 INTER*/ + { + { 64, -16},{ 72, 1348},{ 139, 2832},{ 206, 4218}, + { 268, 5465},{ 322, 6659},{ 403, 7803},{ 540, 8838}, + { 747, 9734},{ 1044,10465},{ 1473,10981},{ 2048,11249}, + { 2717,11311},{ 3397,11257},{ 4025,11161},{ 4589,11052}, + { 5099,10947},{ 5560,10859},{ 5989,10786},{ 6389,10717}, + { 6753,10652},{ 7078,10592},{ 7389,10535},{ 7697,10460} + } + }, + { + /*Cb qi=25 INTRA*/ + { + { 3, 3},{ 78, 368},{ 133, 745},{ 159, 1180}, + { 193, 1627},{ 242, 2046},{ 304, 2411},{ 381, 2714}, + { 456, 2983},{ 527, 3224},{ 598, 3437},{ 667, 3655}, + { 726, 3888},{ 776, 4117},{ 826, 4333},{ 883, 4543}, + { 954, 4727},{ 1019, 4878},{ 1095, 5014},{ 1171, 5187}, + { 1255, 5342},{ 1319, 5458},{ 1396, 5546},{ 1536, 5678} + }, + /*Cb qi=25 INTER*/ + { + { 117, 32},{ 89, 342},{ 67, 660},{ 64, 1037}, + { 77, 1441},{ 93, 1845},{ 105, 2243},{ 113, 2633}, + { 120, 3016},{ 125, 3387},{ 131, 3739},{ 137, 4080}, + { 144, 4416},{ 152, 4741},{ 160, 5057},{ 169, 5369}, + { 180, 5680},{ 193, 5990},{ 209, 6294},{ 227, 6594}, + { 249, 6888},{ 269, 7180},{ 294, 7467},{ 317, 7768} + } + }, + { + /*Cr qi=25 INTRA*/ + { + { 6, 6},{ 74, 380},{ 129, 770},{ 165, 1220}, + { 201, 1658},{ 253, 2061},{ 315, 2410},{ 388, 2676}, + { 462, 2920},{ 523, 3166},{ 584, 3404},{ 647, 3637}, + { 701, 3870},{ 769, 4086},{ 838, 4296},{ 898, 4491}, + { 980, 4627},{ 1065, 4759},{ 1126, 4920},{ 1187, 5058}, + { 1283, 5180},{ 1347, 5332},{ 1404, 5475},{ 1527, 5534} + }, + /*Cr qi=25 INTER*/ + { + { 92, 41},{ 75, 347},{ 64, 664},{ 70, 1045}, + { 85, 1448},{ 98, 1849},{ 105, 2245},{ 110, 2637}, + { 115, 3023},{ 120, 3395},{ 126, 3753},{ 131, 4102}, + { 136, 4439},{ 145, 4768},{ 156, 5094},{ 168, 5410}, + { 184, 5717},{ 203, 6010},{ 221, 6300},{ 239, 6577}, + { 262, 6847},{ 282, 7123},{ 303, 7390},{ 322, 7665} + } + } + }, + { + { + /*Y' qi=26 INTRA*/ + { + { 130, -24},{ 292, 1423},{ 594, 2525},{ 943, 3307}, + { 1289, 3921},{ 1633, 4467},{ 1991, 4943},{ 2368, 5348}, + { 2753, 5696},{ 3148, 5991},{ 3545, 6247},{ 3942, 6415}, + { 4342, 6535},{ 4726, 6690},{ 5093, 6883},{ 5466, 7047}, + { 5840, 7159},{ 6202, 7274},{ 6545, 7351},{ 6855, 7375}, + { 7186, 7384},{ 7517, 7416},{ 7840, 7447},{ 8238, 7450} + }, + /*Y' qi=26 INTER*/ + { + { 52, 16},{ 75, 1336},{ 143, 2815},{ 213, 4191}, + { 278, 5427},{ 339, 6611},{ 436, 7734},{ 600, 8732}, + { 843, 9579},{ 1195,10243},{ 1702,10660},{ 2355,10825}, + { 3070,10820},{ 3755,10743},{ 4372,10643},{ 4925,10538}, + { 5426,10440},{ 5882,10354},{ 6296,10290},{ 6686,10224}, + { 7049,10163},{ 7380,10113},{ 7672,10062},{ 7937,10021} + } + }, + { + /*Cb qi=26 INTRA*/ + { + { 4, 3},{ 79, 368},{ 138, 745},{ 167, 1180}, + { 200, 1623},{ 252, 2034},{ 322, 2389},{ 403, 2682}, + { 480, 2941},{ 558, 3176},{ 631, 3393},{ 700, 3608}, + { 766, 3825},{ 819, 4046},{ 868, 4265},{ 926, 4472}, + { 1002, 4645},{ 1070, 4800},{ 1151, 4924},{ 1242, 5063}, + { 1325, 5221},{ 1393, 5338},{ 1464, 5431},{ 1595, 5559} + }, + /*Cb qi=26 INTER*/ + { + { 98, 33},{ 83, 343},{ 65, 662},{ 65, 1037}, + { 80, 1437},{ 96, 1839},{ 107, 2238},{ 115, 2628}, + { 122, 3007},{ 128, 3373},{ 134, 3722},{ 142, 4060}, + { 149, 4390},{ 158, 4713},{ 167, 5029},{ 178, 5341}, + { 191, 5647},{ 208, 5948},{ 227, 6244},{ 247, 6539}, + { 269, 6833},{ 295, 7114},{ 328, 7388},{ 369, 7658} + } + }, + { + /*Cr qi=26 INTRA*/ + { + { 5, 6},{ 75, 380},{ 133, 769},{ 172, 1217}, + { 212, 1652},{ 266, 2048},{ 333, 2384},{ 412, 2643}, + { 490, 2880},{ 552, 3124},{ 616, 3365},{ 681, 3594}, + { 739, 3816},{ 810, 4024},{ 880, 4224},{ 945, 4405}, + { 1029, 4538},{ 1114, 4674},{ 1183, 4822},{ 1254, 4946}, + { 1346, 5063},{ 1417, 5201},{ 1478, 5345},{ 1597, 5411} + }, + /*Cr qi=26 INTER*/ + { + { 97, 29},{ 75, 342},{ 62, 667},{ 70, 1047}, + { 87, 1447},{ 100, 1846},{ 107, 2242},{ 113, 2633}, + { 118, 3016},{ 123, 3382},{ 128, 3737},{ 135, 4082}, + { 142, 4417},{ 151, 4746},{ 162, 5066},{ 176, 5377}, + { 194, 5679},{ 217, 5963},{ 239, 6244},{ 260, 6522}, + { 284, 6789},{ 309, 7052},{ 335, 7313},{ 355, 7582} + } + } + }, + { + { + /*Y' qi=27 INTRA*/ + { + { 118, -10},{ 308, 1404},{ 630, 2473},{ 997, 3227}, + { 1360, 3819},{ 1719, 4354},{ 2086, 4829},{ 2470, 5233}, + { 2863, 5576},{ 3267, 5870},{ 3677, 6117},{ 4085, 6268}, + { 4499, 6376},{ 4888, 6521},{ 5257, 6705},{ 5638, 6865}, + { 6020, 6962},{ 6394, 7056},{ 6744, 7130},{ 7051, 7158}, + { 7386, 7164},{ 7717, 7185},{ 8042, 7209},{ 8444, 7206} + }, + /*Y' qi=27 INTER*/ + { + { 54, 19},{ 77, 1333},{ 147, 2806},{ 221, 4166}, + { 290, 5390},{ 360, 6564},{ 474, 7665},{ 664, 8630}, + { 949, 9423},{ 1370,10002},{ 1958,10323},{ 2670,10414}, + { 3406,10375},{ 4086,10285},{ 4691,10182},{ 5233,10085}, + { 5724, 9994},{ 6169, 9918},{ 6582, 9863},{ 6962, 9813}, + { 7316, 9759},{ 7645, 9707},{ 7948, 9660},{ 8262, 9623} + } + }, + { + /*Cb qi=27 INTRA*/ + { + { 4, 3},{ 79, 368},{ 137, 745},{ 166, 1180}, + { 200, 1622},{ 253, 2030},{ 324, 2381},{ 407, 2671}, + { 487, 2925},{ 567, 3156},{ 640, 3372},{ 712, 3580}, + { 782, 3792},{ 833, 4015},{ 887, 4227},{ 954, 4422}, + { 1031, 4592},{ 1103, 4738},{ 1187, 4856},{ 1280, 4990}, + { 1371, 5135},{ 1442, 5244},{ 1520, 5321},{ 1684, 5398} + }, + /*Cb qi=27 INTER*/ + { + { 113, 20},{ 90, 338},{ 66, 661},{ 67, 1034}, + { 82, 1438},{ 97, 1842},{ 108, 2238},{ 115, 2624}, + { 123, 3000},{ 130, 3361},{ 138, 3708},{ 146, 4040}, + { 155, 4367},{ 164, 4688},{ 174, 4999},{ 186, 5306}, + { 203, 5609},{ 222, 5908},{ 243, 6202},{ 268, 6494}, + { 295, 6781},{ 326, 7058},{ 367, 7319},{ 420, 7551} + } + }, + { + /*Cr qi=27 INTRA*/ + { + { 5, 6},{ 75, 380},{ 133, 770},{ 173, 1217}, + { 214, 1650},{ 268, 2040},{ 337, 2375},{ 418, 2631}, + { 496, 2862},{ 558, 3104},{ 625, 3346},{ 692, 3571}, + { 753, 3786},{ 825, 3989},{ 896, 4182},{ 969, 4352}, + { 1059, 4479},{ 1144, 4614},{ 1212, 4757},{ 1284, 4871}, + { 1380, 4982},{ 1457, 5125},{ 1528, 5267},{ 1651, 5346} + }, + /*Cr qi=27 INTER*/ + { + { 92, 24},{ 74, 341},{ 61, 669},{ 71, 1049}, + { 88, 1448},{ 100, 1849},{ 107, 2243},{ 113, 2631}, + { 119, 3010},{ 125, 3373},{ 131, 3723},{ 137, 4064}, + { 146, 4396},{ 159, 4720},{ 172, 5033},{ 189, 5340}, + { 210, 5636},{ 233, 5920},{ 256, 6197},{ 282, 6465}, + { 310, 6730},{ 332, 7000},{ 359, 7259},{ 385, 7515} + } + } + }, + { + { + /*Y' qi=28 INTRA*/ + { + { 116, -8},{ 314, 1400},{ 640, 2458},{ 1013, 3197}, + { 1386, 3768},{ 1762, 4279},{ 2151, 4733},{ 2558, 5117}, + { 2970, 5442},{ 3393, 5714},{ 3820, 5935},{ 4243, 6069}, + { 4671, 6161},{ 5074, 6289},{ 5456, 6457},{ 5849, 6598}, + { 6244, 6689},{ 6632, 6777},{ 6984, 6833},{ 7294, 6855}, + { 7625, 6862},{ 7961, 6875},{ 8302, 6890},{ 8720, 6883} + }, + /*Y' qi=28 INTER*/ + { + { 54, 8},{ 81, 1333},{ 154, 2793},{ 231, 4138}, + { 304, 5352},{ 384, 6512},{ 519, 7585},{ 743, 8508}, + { 1082, 9236},{ 1587, 9717},{ 2267, 9928},{ 3034, 9944}, + { 3775, 9878},{ 4438, 9786},{ 5031, 9686},{ 5563, 9601}, + { 6042, 9523},{ 6481, 9456},{ 6890, 9405},{ 7266, 9356}, + { 7614, 9313},{ 7933, 9265},{ 8238, 9220},{ 8545, 9193} + } + }, + { + /*Cb qi=28 INTRA*/ + { + { 3, 3},{ 80, 368},{ 138, 746},{ 168, 1179}, + { 208, 1615},{ 268, 2014},{ 345, 2354},{ 432, 2637}, + { 515, 2884},{ 595, 3108},{ 669, 3323},{ 745, 3533}, + { 818, 3740},{ 876, 3953},{ 932, 4160},{ 1003, 4349}, + { 1088, 4501},{ 1154, 4648},{ 1241, 4768},{ 1349, 4889}, + { 1441, 5023},{ 1524, 5113},{ 1611, 5187},{ 1783, 5283} + }, + /*Cb qi=28 INTER*/ + { + { 117, 29},{ 91, 341},{ 65, 663},{ 68, 1038}, + { 85, 1440},{ 100, 1841},{ 110, 2234},{ 119, 2616}, + { 127, 2985},{ 135, 3342},{ 142, 3685},{ 151, 4015}, + { 162, 4337},{ 174, 4652},{ 186, 4960},{ 201, 5264}, + { 218, 5567},{ 239, 5863},{ 266, 6149},{ 295, 6434}, + { 328, 6715},{ 371, 6976},{ 409, 7239},{ 460, 7477} + } + }, + { + /*Cr qi=28 INTRA*/ + { + { 6, 7},{ 79, 381},{ 138, 771},{ 178, 1215}, + { 222, 1644},{ 285, 2026},{ 359, 2347},{ 441, 2597}, + { 521, 2827},{ 588, 3066},{ 655, 3303},{ 725, 3523}, + { 791, 3728},{ 870, 3920},{ 950, 4103},{ 1030, 4265}, + { 1121, 4388},{ 1198, 4520},{ 1266, 4659},{ 1356, 4759}, + { 1461, 4865},{ 1540, 4993},{ 1619, 5115},{ 1786, 5160} + }, + /*Cr qi=28 INTER*/ + { + { 96, 18},{ 78, 340},{ 66, 672},{ 74, 1051}, + { 90, 1450},{ 103, 1845},{ 110, 2235},{ 116, 2619}, + { 122, 2995},{ 129, 3356},{ 137, 3702},{ 146, 4038}, + { 156, 4365},{ 168, 4684},{ 182, 4995},{ 203, 5297}, + { 227, 5588},{ 253, 5866},{ 282, 6131},{ 311, 6394}, + { 339, 6664},{ 366, 6918},{ 400, 7171},{ 424, 7450} + } + } + }, + { + { + /*Y' qi=29 INTRA*/ + { + { 112, 7},{ 334, 1382},{ 681, 2410},{ 1081, 3112}, + { 1484, 3650},{ 1894, 4128},{ 2316, 4547},{ 2749, 4905}, + { 3188, 5208},{ 3634, 5458},{ 4079, 5666},{ 4517, 5791}, + { 4952, 5870},{ 5359, 5983},{ 5754, 6137},{ 6165, 6268}, + { 6568, 6351},{ 6958, 6423},{ 7320, 6471},{ 7638, 6490}, + { 7979, 6490},{ 8313, 6499},{ 8651, 6517},{ 9085, 6499} + }, + /*Y' qi=29 INTER*/ + { + { 55, 15},{ 85, 1336},{ 160, 2780},{ 242, 4104}, + { 323, 5302},{ 418, 6443},{ 586, 7480},{ 859, 8342}, + { 1278, 8982},{ 1888, 9347},{ 2658, 9457},{ 3457, 9425}, + { 4192, 9343},{ 4842, 9247},{ 5417, 9162},{ 5935, 9086}, + { 6404, 9011},{ 6841, 8952},{ 7241, 8907},{ 7609, 8867}, + { 7953, 8832},{ 8267, 8792},{ 8562, 8740},{ 8836, 8701} + } + }, + { + /*Cb qi=29 INTRA*/ + { + { 5, 3},{ 84, 368},{ 144, 746},{ 176, 1175}, + { 219, 1604},{ 285, 1991},{ 372, 2318},{ 462, 2591}, + { 546, 2833},{ 628, 3058},{ 704, 3274},{ 788, 3473}, + { 870, 3664},{ 935, 3865},{ 995, 4059},{ 1072, 4239}, + { 1167, 4388},{ 1248, 4518},{ 1334, 4634},{ 1429, 4765}, + { 1536, 4884},{ 1628, 4964},{ 1716, 5038},{ 1885, 5128} + }, + /*Cb qi=29 INTER*/ + { + { 126, 25},{ 95, 340},{ 69, 662},{ 71, 1039}, + { 88, 1440},{ 102, 1839},{ 113, 2227},{ 122, 2604}, + { 132, 2969},{ 141, 3320},{ 151, 3659},{ 161, 3985}, + { 172, 4301},{ 186, 4612},{ 200, 4917},{ 219, 5213}, + { 241, 5509},{ 265, 5800},{ 296, 6081},{ 329, 6360}, + { 369, 6633},{ 414, 6899},{ 465, 7148},{ 520, 7387} + } + }, + { + /*Cr qi=29 INTRA*/ + { + { 6, 7},{ 82, 382},{ 142, 772},{ 185, 1211}, + { 233, 1632},{ 303, 2000},{ 388, 2306},{ 475, 2550}, + { 556, 2779},{ 627, 3007},{ 707, 3237},{ 778, 3459}, + { 843, 3654},{ 927, 3834},{ 1012, 4012},{ 1101, 4152}, + { 1197, 4262},{ 1275, 4399},{ 1359, 4511},{ 1455, 4596}, + { 1562, 4708},{ 1644, 4833},{ 1719, 4954},{ 1888, 4988} + }, + /*Cr qi=29 INTER*/ + { + { 101, 28},{ 81, 343},{ 67, 673},{ 75, 1053}, + { 93, 1450},{ 106, 1844},{ 113, 2230},{ 119, 2610}, + { 127, 2980},{ 135, 3334},{ 143, 3676},{ 153, 4007}, + { 165, 4330},{ 180, 4645},{ 201, 4951},{ 224, 5243}, + { 253, 5522},{ 284, 5794},{ 314, 6060},{ 345, 6322}, + { 381, 6578},{ 419, 6828},{ 455, 7073},{ 495, 7316} + } + } + }, + { + { + /*Y' qi=30 INTRA*/ + { + { 112, 8},{ 335, 1380},{ 682, 2401},{ 1083, 3093}, + { 1489, 3619},{ 1902, 4092},{ 2332, 4511},{ 2777, 4865}, + { 3231, 5156},{ 3693, 5394},{ 4153, 5585},{ 4605, 5689}, + { 5049, 5764},{ 5468, 5871},{ 5875, 6004},{ 6295, 6120}, + { 6706, 6201},{ 7099, 6273},{ 7461, 6311},{ 7785, 6320}, + { 8128, 6322},{ 8469, 6331},{ 8806, 6342},{ 9220, 6338} + }, + /*Y' qi=30 INTER*/ + { + { 58, 8},{ 90, 1340},{ 169, 2771},{ 257, 4079}, + { 345, 5266},{ 459, 6387},{ 660, 7383},{ 990, 8180}, + { 1496, 8726},{ 2203, 8992},{ 3029, 9038},{ 3833, 8984}, + { 4549, 8900},{ 5183, 8813},{ 5745, 8735},{ 6250, 8674}, + { 6715, 8619},{ 7138, 8565},{ 7529, 8528},{ 7899, 8495}, + { 8234, 8465},{ 8550, 8429},{ 8856, 8395},{ 9160, 8374} + } + }, + { + /*Cb qi=30 INTRA*/ + { + { 7, 3},{ 88, 369},{ 149, 747},{ 185, 1175}, + { 232, 1599},{ 304, 1976},{ 392, 2293},{ 486, 2557}, + { 573, 2797},{ 656, 3027},{ 735, 3243},{ 819, 3442}, + { 903, 3629},{ 966, 3828},{ 1025, 4027},{ 1105, 4204}, + { 1201, 4343},{ 1282, 4469},{ 1379, 4575},{ 1486, 4689}, + { 1588, 4813},{ 1678, 4900},{ 1767, 4969},{ 1911, 5080} + }, + /*Cb qi=30 INTER*/ + { + { 120, 23},{ 96, 336},{ 72, 661},{ 75, 1043}, + { 91, 1441},{ 105, 1837},{ 117, 2221},{ 127, 2592}, + { 137, 2953},{ 148, 3301},{ 159, 3635},{ 170, 3959}, + { 184, 4271},{ 199, 4578},{ 216, 4879},{ 238, 5175}, + { 262, 5466},{ 294, 5750},{ 332, 6027},{ 373, 6298}, + { 421, 6559},{ 473, 6805},{ 526, 7053},{ 587, 7298} + } + }, + { + /*Cr qi=30 INTRA*/ + { + { 10, 7},{ 89, 384},{ 147, 773},{ 192, 1211}, + { 245, 1627},{ 322, 1984},{ 412, 2280},{ 501, 2520}, + { 583, 2750},{ 654, 2982},{ 736, 3207},{ 810, 3419}, + { 873, 3614},{ 957, 3794},{ 1048, 3965},{ 1139, 4102}, + { 1237, 4208},{ 1327, 4328},{ 1408, 4448},{ 1496, 4545}, + { 1604, 4652},{ 1699, 4760},{ 1780, 4877},{ 1937, 4942} + }, + /*Cr qi=30 INTER*/ + { + { 115, 26},{ 89, 342},{ 70, 672},{ 79, 1055}, + { 96, 1451},{ 108, 1841},{ 116, 2222},{ 124, 2599}, + { 132, 2965},{ 141, 3316},{ 151, 3655},{ 163, 3984}, + { 178, 4301},{ 197, 4609},{ 219, 4909},{ 247, 5195}, + { 280, 5469},{ 317, 5734},{ 351, 5991},{ 383, 6248}, + { 423, 6500},{ 467, 6744},{ 502, 6995},{ 558, 7226} + } + } + }, + { + { + /*Y' qi=31 INTRA*/ + { + { 116, 20},{ 359, 1361},{ 732, 2350},{ 1162, 3010}, + { 1597, 3507},{ 2042, 3950},{ 2503, 4339},{ 2974, 4670}, + { 3446, 4951},{ 3922, 5179},{ 4394, 5357},{ 4858, 5454}, + { 5313, 5519},{ 5734, 5626},{ 6154, 5755},{ 6585, 5859}, + { 7004, 5928},{ 7408, 5998},{ 7775, 6039},{ 8102, 6048}, + { 8442, 6051},{ 8790, 6054},{ 9136, 6057},{ 9554, 6041} + }, + /*Y' qi=31 INTER*/ + { + { 53, 12},{ 90, 1340},{ 169, 2765},{ 259, 4062}, + { 353, 5236},{ 483, 6340},{ 713, 7305},{ 1086, 8059}, + { 1651, 8548},{ 2423, 8751},{ 3288, 8754},{ 4106, 8674}, + { 4827, 8572},{ 5451, 8482},{ 6007, 8407},{ 6514, 8344}, + { 6970, 8282},{ 7397, 8225},{ 7795, 8193},{ 8159, 8161}, + { 8498, 8120},{ 8814, 8093},{ 9127, 8066},{ 9432, 8040} + } + }, + { + /*Cb qi=31 INTRA*/ + { + { 7, 3},{ 88, 369},{ 149, 746},{ 185, 1173}, + { 234, 1595},{ 308, 1967},{ 399, 2278},{ 494, 2537}, + { 583, 2774},{ 669, 2997},{ 755, 3204},{ 847, 3390}, + { 936, 3569},{ 1008, 3759},{ 1078, 3942},{ 1162, 4104}, + { 1262, 4238},{ 1352, 4364},{ 1442, 4470},{ 1557, 4567}, + { 1676, 4674},{ 1759, 4781},{ 1850, 4853},{ 2043, 4897} + }, + /*Cb qi=31 INTER*/ + { + { 121, 23},{ 96, 335},{ 72, 660},{ 74, 1043}, + { 90, 1440},{ 105, 1834},{ 116, 2217},{ 127, 2586}, + { 138, 2945},{ 148, 3293},{ 159, 3626},{ 172, 3945}, + { 185, 4256},{ 202, 4559},{ 223, 4856},{ 245, 5150}, + { 272, 5440},{ 306, 5719},{ 346, 5989},{ 391, 6253}, + { 443, 6511},{ 510, 6743},{ 583, 6965},{ 651, 7182} + } + }, + { + /*Cr qi=31 INTRA*/ + { + { 10, 7},{ 88, 384},{ 147, 773},{ 192, 1209}, + { 247, 1622},{ 326, 1974},{ 417, 2262},{ 509, 2500}, + { 596, 2726},{ 670, 2949},{ 754, 3170},{ 836, 3370}, + { 912, 3548},{ 999, 3724},{ 1093, 3888},{ 1198, 4000}, + { 1304, 4095},{ 1384, 4230},{ 1470, 4347},{ 1577, 4422}, + { 1696, 4513},{ 1798, 4620},{ 1869, 4746},{ 1991, 4798} + }, + /*Cr qi=31 INTER*/ + { + { 113, 32},{ 88, 345},{ 69, 674},{ 79, 1055}, + { 96, 1451},{ 108, 1839},{ 115, 2218},{ 123, 2592}, + { 132, 2957},{ 141, 3308},{ 151, 3643},{ 163, 3968}, + { 179, 4285},{ 200, 4590},{ 225, 4886},{ 254, 5169}, + { 291, 5436},{ 330, 5696},{ 368, 5951},{ 409, 6200}, + { 452, 6448},{ 493, 6695},{ 536, 6940},{ 571, 7204} + } + } + }, + { + { + /*Y' qi=32 INTRA*/ + { + { 123, 26},{ 370, 1356},{ 756, 2321},{ 1211, 2944}, + { 1674, 3408},{ 2148, 3826},{ 2639, 4193},{ 3138, 4504}, + { 3634, 4765},{ 4133, 4973},{ 4625, 5137},{ 5101, 5225}, + { 5567, 5274},{ 6002, 5363},{ 6437, 5482},{ 6885, 5566}, + { 7312, 5625},{ 7723, 5686},{ 8101, 5721},{ 8429, 5732}, + { 8769, 5728},{ 9120, 5726},{ 9472, 5723},{ 9918, 5700} + }, + /*Y' qi=32 INTER*/ + { + { 54, -3},{ 95, 1343},{ 179, 2750},{ 276, 4027}, + { 382, 5185},{ 543, 6256},{ 830, 7161},{ 1301, 7815}, + { 2003, 8172},{ 2883, 8266},{ 3779, 8217},{ 4578, 8127}, + { 5274, 8035},{ 5886, 7952},{ 6430, 7887},{ 6929, 7835}, + { 7380, 7779},{ 7796, 7737},{ 8190, 7705},{ 8552, 7672}, + { 8896, 7640},{ 9210, 7612},{ 9510, 7589},{ 9746, 7552} + } + }, + { + /*Cb qi=32 INTRA*/ + { + { 6, 3},{ 89, 369},{ 153, 746},{ 193, 1167}, + { 247, 1577},{ 330, 1935},{ 429, 2236},{ 528, 2494}, + { 620, 2732},{ 712, 2948},{ 801, 3146},{ 898, 3325}, + { 999, 3489},{ 1078, 3664},{ 1155, 3832},{ 1251, 3985}, + { 1360, 4115},{ 1451, 4236},{ 1549, 4338},{ 1667, 4433}, + { 1797, 4522},{ 1891, 4613},{ 1989, 4687},{ 2162, 4776} + }, + /*Cb qi=32 INTER*/ + { + { 116, -1},{ 98, 321},{ 80, 656},{ 80, 1042}, + { 96, 1438},{ 110, 1827},{ 122, 2205},{ 133, 2570}, + { 144, 2925},{ 157, 3268},{ 170, 3597},{ 185, 3911}, + { 202, 4216},{ 221, 4516},{ 244, 4809},{ 273, 5096}, + { 308, 5376},{ 350, 5644},{ 401, 5907},{ 459, 6160}, + { 520, 6401},{ 592, 6630},{ 676, 6837},{ 758, 7050} + } + }, + { + /*Cr qi=32 INTRA*/ + { + { 12, 7},{ 91, 386},{ 152, 773},{ 201, 1202}, + { 261, 1603},{ 347, 1942},{ 447, 2223},{ 540, 2460}, + { 626, 2684},{ 711, 2901},{ 801, 3115},{ 887, 3312}, + { 969, 3480},{ 1068, 3633},{ 1176, 3779},{ 1283, 3885}, + { 1392, 3969},{ 1485, 4090},{ 1573, 4206},{ 1686, 4274}, + { 1813, 4354},{ 1911, 4459},{ 2004, 4563},{ 2162, 4590} + }, + /*Cr qi=32 INTER*/ + { + { 129, 5},{ 98, 334},{ 75, 673},{ 84, 1055}, + { 101, 1448},{ 113, 1832},{ 121, 2206},{ 129, 2577}, + { 140, 2937},{ 151, 3282},{ 163, 3614},{ 179, 3932}, + { 198, 4240},{ 221, 4542},{ 252, 4830},{ 290, 5102}, + { 329, 5364},{ 373, 5618},{ 420, 5864},{ 468, 6105}, + { 513, 6351},{ 564, 6587},{ 624, 6810},{ 697, 7017} + } + } + }, + { + { + /*Y' qi=33 INTRA*/ + { + { 115, 36},{ 388, 1338},{ 791, 2289},{ 1258, 2899}, + { 1732, 3352},{ 2220, 3760},{ 2730, 4117},{ 3244, 4415}, + { 3751, 4662},{ 4261, 4858},{ 4766, 5012},{ 5249, 5094}, + { 5719, 5141},{ 6159, 5225},{ 6597, 5333},{ 7044, 5416}, + { 7474, 5472},{ 7893, 5531},{ 8268, 5570},{ 8591, 5580}, + { 8931, 5578},{ 9283, 5579},{ 9634, 5582},{10067, 5560} + }, + /*Y' qi=33 INTER*/ + { + { 65, -14},{ 102, 1345},{ 190, 2736},{ 294, 3999}, + { 411, 5146},{ 597, 6192},{ 934, 7045},{ 1488, 7622}, + { 2281, 7895},{ 3213, 7937},{ 4108, 7871},{ 4883, 7784}, + { 5556, 7709},{ 6150, 7643},{ 6685, 7585},{ 7176, 7539}, + { 7620, 7502},{ 8034, 7466},{ 8427, 7435},{ 8793, 7409}, + { 9136, 7386},{ 9446, 7364},{ 9743, 7339},{10025, 7303} + } + }, + { + /*Cb qi=33 INTRA*/ + { + { 5, 3},{ 92, 369},{ 159, 746},{ 203, 1163}, + { 263, 1564},{ 353, 1911},{ 458, 2204},{ 557, 2460}, + { 650, 2697},{ 744, 2913},{ 836, 3110},{ 934, 3292}, + { 1036, 3454},{ 1125, 3616},{ 1204, 3781},{ 1298, 3932}, + { 1410, 4058},{ 1507, 4170},{ 1606, 4265},{ 1725, 4358}, + { 1853, 4445},{ 1955, 4535},{ 2067, 4597},{ 2258, 4663} + }, + /*Cb qi=33 INTER*/ + { + { 109, 37},{ 94, 343},{ 81, 662},{ 85, 1042}, + { 102, 1436},{ 116, 1823},{ 128, 2195},{ 141, 2554}, + { 154, 2906},{ 167, 3246},{ 183, 3570},{ 202, 3881}, + { 220, 4185},{ 241, 4482},{ 268, 4772},{ 302, 5053}, + { 341, 5328},{ 388, 5592},{ 446, 5846},{ 507, 6096}, + { 581, 6328},{ 670, 6534},{ 762, 6731},{ 842, 6922} + } + }, + { + /*Cr qi=33 INTRA*/ + { + { 11, 7},{ 93, 387},{ 158, 774},{ 211, 1197}, + { 278, 1589},{ 372, 1917},{ 475, 2191},{ 569, 2429}, + { 658, 2655},{ 744, 2868},{ 835, 3083},{ 926, 3271}, + { 1010, 3430},{ 1110, 3586},{ 1224, 3724},{ 1336, 3826}, + { 1449, 3908},{ 1547, 4021},{ 1636, 4136},{ 1751, 4200}, + { 1886, 4277},{ 1977, 4384},{ 2070, 4474},{ 2232, 4510} + }, + /*Cr qi=33 INTER*/ + { + { 77, 9},{ 90, 347},{ 80, 674},{ 91, 1053}, + { 107, 1444},{ 119, 1825},{ 127, 2196},{ 137, 2563}, + { 149, 2919},{ 161, 3259},{ 176, 3588},{ 194, 3905}, + { 217, 4209},{ 246, 4504},{ 280, 4786},{ 320, 5055}, + { 364, 5316},{ 409, 5565},{ 460, 5804},{ 517, 6039}, + { 578, 6264},{ 640, 6489},{ 701, 6721},{ 772, 6948} + } + } + }, + { + { + /*Y' qi=34 INTRA*/ + { + { 124, 40},{ 401, 1333},{ 823, 2262},{ 1318, 2842}, + { 1823, 3265},{ 2339, 3650},{ 2872, 3991},{ 3405, 4274}, + { 3926, 4513},{ 4448, 4704},{ 4961, 4845},{ 5450, 4921}, + { 5925, 4971},{ 6372, 5053},{ 6813, 5160},{ 7264, 5242}, + { 7704, 5291},{ 8124, 5346},{ 8500, 5382},{ 8831, 5384}, + { 9178, 5380},{ 9525, 5387},{ 9869, 5389},{10310, 5356} + }, + /*Y' qi=34 INTER*/ + { + { 64, -17},{ 101, 1344},{ 190, 2730},{ 299, 3981}, + { 430, 5110},{ 648, 6127},{ 1036, 6933},{ 1664, 7445}, + { 2535, 7652},{ 3504, 7653},{ 4402, 7572},{ 5173, 7479}, + { 5843, 7400},{ 6441, 7334},{ 6976, 7280},{ 7464, 7231}, + { 7910, 7189},{ 8332, 7157},{ 8730, 7125},{ 9091, 7103}, + { 9422, 7086},{ 9753, 7061},{10067, 7036},{10316, 7029} + } + }, + { + /*Cb qi=34 INTRA*/ + { + { 5, 3},{ 91, 369},{ 158, 746},{ 204, 1162}, + { 266, 1561},{ 358, 1903},{ 466, 2189},{ 570, 2439}, + { 665, 2671},{ 765, 2880},{ 864, 3069},{ 970, 3238}, + { 1079, 3392},{ 1174, 3545},{ 1265, 3693},{ 1360, 3841}, + { 1471, 3968},{ 1572, 4083},{ 1675, 4181},{ 1804, 4255}, + { 1939, 4332},{ 2048, 4411},{ 2155, 4484},{ 2339, 4584} + }, + /*Cb qi=34 INTER*/ + { + { 99, 44},{ 92, 345},{ 82, 661},{ 86, 1043}, + { 101, 1436},{ 116, 1821},{ 128, 2191},{ 140, 2549}, + { 154, 2898},{ 168, 3235},{ 185, 3556},{ 203, 3865}, + { 224, 4166},{ 248, 4457},{ 278, 4741},{ 315, 5021}, + { 361, 5289},{ 416, 5546},{ 483, 5792},{ 559, 6025}, + { 651, 6237},{ 752, 6432},{ 849, 6626},{ 967, 6790} + } + }, + { + /*Cr qi=34 INTRA*/ + { + { 11, 7},{ 93, 387},{ 158, 773},{ 212, 1195}, + { 282, 1584},{ 378, 1909},{ 483, 2179},{ 578, 2414}, + { 671, 2633},{ 766, 2837},{ 866, 3038},{ 960, 3223}, + { 1049, 3376},{ 1158, 3520},{ 1285, 3644},{ 1400, 3740}, + { 1505, 3828},{ 1616, 3928},{ 1713, 4030},{ 1820, 4104}, + { 1957, 4185},{ 2063, 4280},{ 2160, 4355},{ 2320, 4341} + }, + /*Cr qi=34 INTER*/ + { + { 78, 11},{ 89, 347},{ 79, 674},{ 90, 1053}, + { 106, 1444},{ 117, 1823},{ 127, 2192},{ 137, 2558}, + { 149, 2912},{ 163, 3249},{ 178, 3574},{ 197, 3888}, + { 222, 4189},{ 252, 4481},{ 293, 4755},{ 341, 5013}, + { 386, 5268},{ 436, 5512},{ 498, 5743},{ 563, 5970}, + { 622, 6200},{ 694, 6415},{ 776, 6622},{ 871, 6818} + } + } + }, + { + { + /*Y' qi=35 INTRA*/ + { + { 116, 51},{ 433, 1312},{ 881, 2221},{ 1406, 2771}, + { 1948, 3156},{ 2511, 3501},{ 3085, 3811},{ 3654, 4066}, + { 4212, 4273},{ 4763, 4444},{ 5298, 4572},{ 5799, 4638}, + { 6285, 4678},{ 6747, 4746},{ 7203, 4838},{ 7673, 4905}, + { 8124, 4950},{ 8552, 5003},{ 8938, 5027},{ 9275, 5026}, + { 9628, 5019},{ 9981, 5024},{10331, 5030},{10795, 5000} + }, + /*Y' qi=35 INTER*/ + { + { 71, -10},{ 108, 1348},{ 203, 2710},{ 325, 3938}, + { 485, 5040},{ 766, 6000},{ 1267, 6706},{ 2048, 7089}, + { 3037, 7191},{ 4032, 7146},{ 4903, 7061},{ 5648, 6977}, + { 6301, 6912},{ 6884, 6857},{ 7413, 6812},{ 7898, 6775}, + { 8342, 6739},{ 8764, 6710},{ 9160, 6688},{ 9519, 6668}, + { 9859, 6646},{10190, 6625},{10492, 6612},{10755, 6595} + } + }, + { + /*Cb qi=35 INTRA*/ + { + { 6, 3},{ 95, 369},{ 164, 746},{ 214, 1156}, + { 287, 1542},{ 390, 1869},{ 504, 2143},{ 611, 2388}, + { 712, 2613},{ 822, 2811},{ 937, 2987},{ 1055, 3147}, + { 1174, 3285},{ 1286, 3420},{ 1386, 3560},{ 1488, 3698}, + { 1604, 3814},{ 1714, 3916},{ 1825, 4008},{ 1958, 4088}, + { 2101, 4159},{ 2224, 4226},{ 2339, 4292},{ 2538, 4383} + }, + /*Cb qi=35 INTER*/ + { + { 98, 41},{ 90, 348},{ 86, 665},{ 92, 1042}, + { 108, 1432},{ 122, 1812},{ 136, 2175},{ 151, 2528}, + { 165, 2872},{ 182, 3202},{ 202, 3516},{ 225, 3819}, + { 251, 4112},{ 281, 4398},{ 320, 4675},{ 367, 4944}, + { 421, 5204},{ 493, 5450},{ 579, 5679},{ 672, 5892}, + { 785, 6082},{ 906, 6258},{ 1026, 6432},{ 1153, 6592} + } + }, + { + /*Cr qi=35 INTRA*/ + { + { 12, 7},{ 98, 388},{ 166, 773},{ 226, 1187}, + { 306, 1563},{ 411, 1874},{ 524, 2134},{ 622, 2365}, + { 721, 2577},{ 826, 2768},{ 947, 2946},{ 1066, 3106}, + { 1163, 3250},{ 1274, 3395},{ 1417, 3508},{ 1539, 3590}, + { 1639, 3671},{ 1754, 3765},{ 1865, 3855},{ 1979, 3921}, + { 2127, 3998},{ 2249, 4085},{ 2346, 4172},{ 2473, 4210} + }, + /*Cr qi=35 INTER*/ + { + { 86, 12},{ 94, 354},{ 85, 677},{ 96, 1052}, + { 113, 1439},{ 125, 1811},{ 135, 2177},{ 147, 2537}, + { 160, 2884},{ 177, 3215},{ 195, 3535},{ 219, 3842}, + { 252, 4133},{ 292, 4413},{ 339, 4680},{ 396, 4928}, + { 455, 5169},{ 514, 5408},{ 588, 5626},{ 672, 5835}, + { 750, 6051},{ 837, 6257},{ 943, 6442},{ 1073, 6595} + } + } + }, + { + { + /*Y' qi=36 INTRA*/ + { + { 116, 52},{ 432, 1312},{ 881, 2215},{ 1407, 2759}, + { 1948, 3140},{ 2511, 3484},{ 3090, 3789},{ 3672, 4036}, + { 4243, 4236},{ 4803, 4397},{ 5346, 4517},{ 5856, 4581}, + { 6350, 4614},{ 6821, 4675},{ 7286, 4763},{ 7754, 4832}, + { 8201, 4875},{ 8631, 4922},{ 9015, 4948},{ 9351, 4945}, + { 9706, 4941},{10061, 4948},{10408, 4949},{10878, 4923} + }, + /*Y' qi=36 INTER*/ + { + { 63, -16},{ 114, 1332},{ 216, 2690},{ 343, 3914}, + { 515, 5009},{ 829, 5939},{ 1399, 6586},{ 2263, 6901}, + { 3290, 6967},{ 4272, 6920},{ 5115, 6847},{ 5839, 6779}, + { 6478, 6726},{ 7051, 6685},{ 7571, 6649},{ 8050, 6614}, + { 8495, 6587},{ 8908, 6567},{ 9298, 6550},{ 9673, 6530}, + {10005, 6512},{10324, 6499},{10640, 6483},{10936, 6487} + } + }, + { + /*Cb qi=36 INTRA*/ + { + { 6, 3},{ 98, 370},{ 170, 746},{ 225, 1150}, + { 306, 1527},{ 416, 1845},{ 534, 2116},{ 642, 2363}, + { 743, 2591},{ 851, 2794},{ 964, 2972},{ 1081, 3133}, + { 1198, 3275},{ 1311, 3410},{ 1411, 3547},{ 1519, 3680}, + { 1642, 3789},{ 1750, 3892},{ 1860, 3982},{ 1998, 4054}, + { 2141, 4129},{ 2256, 4204},{ 2372, 4278},{ 2567, 4356} + }, + /*Cb qi=36 INTER*/ + { + { 107, 30},{ 96, 346},{ 88, 667},{ 100, 1039}, + { 115, 1426},{ 128, 1804},{ 142, 2164},{ 158, 2512}, + { 176, 2851},{ 195, 3178},{ 218, 3491},{ 243, 3791}, + { 270, 4084},{ 307, 4365},{ 348, 4638},{ 397, 4908}, + { 464, 5157},{ 545, 5392},{ 635, 5620},{ 734, 5831}, + { 854, 6015},{ 993, 6170},{ 1124, 6327},{ 1234, 6502} + } + }, + { + /*Cr qi=36 INTRA*/ + { + { 12, 7},{ 102, 388},{ 172, 773},{ 239, 1182}, + { 328, 1546},{ 439, 1848},{ 554, 2106},{ 651, 2341}, + { 747, 2561},{ 850, 2757},{ 972, 2934},{ 1086, 3097}, + { 1182, 3245},{ 1302, 3382},{ 1447, 3491},{ 1572, 3567}, + { 1677, 3641},{ 1793, 3733},{ 1899, 3828},{ 2013, 3894}, + { 2163, 3967},{ 2283, 4059},{ 2387, 4142},{ 2559, 4145} + }, + /*Cr qi=36 INTER*/ + { + { 98, -10},{ 96, 347},{ 89, 676},{ 102, 1048}, + { 118, 1433},{ 130, 1804},{ 141, 2167},{ 154, 2523}, + { 171, 2866},{ 190, 3194},{ 212, 3508},{ 240, 3809}, + { 276, 4099},{ 320, 4377},{ 372, 4638},{ 428, 4887}, + { 492, 5122},{ 560, 5353},{ 638, 5572},{ 725, 5779}, + { 814, 5985},{ 902, 6192},{ 1013, 6377},{ 1155, 6527} + } + } + }, + { + { + /*Y' qi=37 INTRA*/ + { + { 109, 58},{ 445, 1302},{ 927, 2177},{ 1489, 2689}, + { 2053, 3052},{ 2632, 3387},{ 3230, 3683},{ 3830, 3922}, + { 4417, 4114},{ 4992, 4266},{ 5546, 4375},{ 6067, 4430}, + { 6571, 4459},{ 7046, 4516},{ 7513, 4599},{ 7991, 4663}, + { 8445, 4706},{ 8883, 4749},{ 9273, 4771},{ 9612, 4770}, + { 9970, 4765},{10325, 4773},{10672, 4778},{11106, 4758} + }, + /*Y' qi=37 INTER*/ + { + { 56, -14},{ 114, 1333},{ 218, 2683},{ 354, 3894}, + { 550, 4966},{ 916, 5854},{ 1569, 6437},{ 2520, 6685}, + { 3596, 6704},{ 4585, 6635},{ 5424, 6556},{ 6147, 6489}, + { 6787, 6437},{ 7358, 6395},{ 7876, 6358},{ 8361, 6325}, + { 8807, 6294},{ 9229, 6271},{ 9631, 6253},{10002, 6238}, + {10356, 6228},{10678, 6212},{10975, 6197},{11274, 6185} + } + }, + { + /*Cb qi=37 INTRA*/ + { + { 6, 3},{ 99, 370},{ 171, 746},{ 227, 1149}, + { 309, 1522},{ 421, 1836},{ 541, 2104},{ 652, 2347}, + { 757, 2572},{ 871, 2768},{ 989, 2936},{ 1111, 3087}, + { 1238, 3223},{ 1357, 3352},{ 1465, 3486},{ 1576, 3612}, + { 1709, 3705},{ 1828, 3801},{ 1937, 3895},{ 2076, 3967}, + { 2220, 4035},{ 2345, 4104},{ 2466, 4173},{ 2680, 4265} + }, + /*Cb qi=37 INTER*/ + { + { 111, 27},{ 97, 344},{ 87, 667},{ 99, 1038}, + { 115, 1425},{ 128, 1802},{ 143, 2160},{ 159, 2506}, + { 176, 2843},{ 198, 3167},{ 220, 3477},{ 247, 3774}, + { 280, 4061},{ 321, 4338},{ 368, 4608},{ 427, 4867}, + { 501, 5109},{ 595, 5332},{ 701, 5544},{ 818, 5738}, + { 956, 5905},{ 1105, 6066},{ 1248, 6217},{ 1381, 6353} + } + }, + { + /*Cr qi=37 INTRA*/ + { + { 12, 7},{ 102, 388},{ 173, 773},{ 242, 1180}, + { 331, 1541},{ 444, 1839},{ 562, 2095},{ 662, 2326}, + { 763, 2540},{ 871, 2728},{ 1003, 2892},{ 1130, 3045}, + { 1230, 3188},{ 1350, 3321},{ 1503, 3418},{ 1634, 3492}, + { 1737, 3568},{ 1856, 3653},{ 1970, 3744},{ 2091, 3802}, + { 2247, 3871},{ 2371, 3962},{ 2477, 4041},{ 2655, 4052} + }, + /*Cr qi=37 INTER*/ + { + { 89, -9},{ 97, 347},{ 88, 677},{ 102, 1048}, + { 118, 1432},{ 130, 1802},{ 141, 2163},{ 154, 2517}, + { 172, 2857},{ 192, 3181},{ 216, 3494},{ 246, 3793}, + { 286, 4074},{ 337, 4343},{ 395, 4600},{ 464, 4837}, + { 534, 5066},{ 608, 5289},{ 694, 5501},{ 788, 5704}, + { 893, 5901},{ 1010, 6088},{ 1151, 6249},{ 1331, 6374} + } + } + }, + { + { + /*Y' qi=38 INTRA*/ + { + { 107, 65},{ 476, 1286},{ 968, 2148},{ 1548, 2641}, + { 2141, 2979},{ 2757, 3289},{ 3390, 3564},{ 4020, 3784}, + { 4632, 3957},{ 5224, 4097},{ 5794, 4201},{ 6326, 4250}, + { 6828, 4274},{ 7309, 4322},{ 7790, 4401},{ 8271, 4463}, + { 8729, 4498},{ 9165, 4540},{ 9552, 4566},{ 9901, 4560}, + {10266, 4552},{10617, 4563},{10964, 4572},{11393, 4567} + }, + /*Y' qi=38 INTER*/ + { + { 57, -13},{ 118, 1332},{ 233, 2665},{ 386, 3856}, + { 620, 4899},{ 1070, 5722},{ 1849, 6211},{ 2898, 6384}, + { 3989, 6376},{ 4947, 6311},{ 5754, 6249},{ 6454, 6199}, + { 7077, 6161},{ 7640, 6132},{ 8159, 6101},{ 8639, 6076}, + { 9081, 6054},{ 9502, 6037},{ 9900, 6027},{10274, 6012}, + {10621, 5999},{10938, 5991},{11237, 5977},{11557, 5966} + } + }, + { + /*Cb qi=38 INTRA*/ + { + { 8, 3},{ 104, 370},{ 179, 744},{ 243, 1139}, + { 338, 1498},{ 458, 1801},{ 584, 2060},{ 700, 2297}, + { 812, 2514},{ 935, 2699},{ 1061, 2858},{ 1189, 3007}, + { 1321, 3141},{ 1446, 3266},{ 1563, 3388},{ 1684, 3512}, + { 1816, 3614},{ 1942, 3702},{ 2055, 3793},{ 2201, 3857}, + { 2357, 3923},{ 2477, 3994},{ 2593, 4061},{ 2768, 4178} + }, + /*Cb qi=38 INTER*/ + { + { 118, 24},{ 102, 342},{ 91, 663},{ 101, 1040}, + { 116, 1427},{ 131, 1799},{ 147, 2152},{ 168, 2491}, + { 191, 2822},{ 215, 3139},{ 244, 3441},{ 276, 3731}, + { 316, 4013},{ 363, 4286},{ 423, 4546},{ 495, 4795}, + { 584, 5028},{ 691, 5242},{ 814, 5439},{ 959, 5608}, + { 1119, 5759},{ 1277, 5906},{ 1449, 6035},{ 1655, 6144} + } + }, + { + /*Cr qi=38 INTRA*/ + { + { 12, 6},{ 106, 387},{ 182, 771},{ 261, 1168}, + { 364, 1514},{ 483, 1802},{ 603, 2053},{ 707, 2282}, + { 817, 2489},{ 933, 2670},{ 1074, 2825},{ 1210, 2967}, + { 1320, 3104},{ 1444, 3229},{ 1599, 3324},{ 1735, 3396}, + { 1846, 3464},{ 1971, 3547},{ 2086, 3646},{ 2206, 3711}, + { 2366, 3773},{ 2499, 3859},{ 2603, 3945},{ 2766, 3952} + }, + /*Cr qi=38 INTER*/ + { + { 86, -9},{ 91, 352},{ 85, 680},{ 102, 1053}, + { 119, 1435},{ 132, 1799},{ 146, 2153},{ 162, 2501}, + { 183, 2835},{ 209, 3154},{ 240, 3458},{ 278, 3751}, + { 327, 4025},{ 388, 4284},{ 455, 4532},{ 529, 4766}, + { 616, 4980},{ 711, 5188},{ 815, 5386},{ 920, 5583}, + { 1042, 5770},{ 1186, 5936},{ 1348, 6080},{ 1542, 6196} + } + } + }, + { + { + /*Y' qi=39 INTRA*/ + { + { 103, 66},{ 479, 1283},{ 998, 2125},{ 1610, 2591}, + { 2223, 2913},{ 2855, 3214},{ 3501, 3482},{ 4146, 3698}, + { 4772, 3868},{ 5376, 3999},{ 5956, 4095},{ 6496, 4140}, + { 7008, 4162},{ 7499, 4209},{ 7987, 4282},{ 8478, 4338}, + { 8947, 4374},{ 9385, 4417},{ 9783, 4437},{10143, 4433}, + {10504, 4424},{10866, 4435},{11225, 4444},{11665, 4430} + }, + /*Y' qi=39 INTER*/ + { + { 56, 2},{ 118, 1332},{ 235, 2660},{ 395, 3843}, + { 653, 4867},{ 1153, 5652},{ 2003, 6089},{ 3113, 6214}, + { 4228, 6178},{ 5189, 6102},{ 6002, 6031},{ 6707, 5976}, + { 7336, 5936},{ 7901, 5900},{ 8424, 5870},{ 8915, 5844}, + { 9361, 5822},{ 9784, 5807},{10187, 5794},{10571, 5778}, + {10931, 5763},{11264, 5751},{11582, 5742},{11916, 5730} + } + }, + { + /*Cb qi=39 INTRA*/ + { + { 8, 3},{ 104, 370},{ 179, 744},{ 244, 1138}, + { 340, 1496},{ 461, 1796},{ 588, 2053},{ 705, 2288}, + { 820, 2503},{ 945, 2684},{ 1073, 2840},{ 1210, 2981}, + { 1352, 3106},{ 1480, 3225},{ 1603, 3342},{ 1728, 3464}, + { 1865, 3559},{ 1990, 3645},{ 2106, 3734},{ 2258, 3796}, + { 2413, 3856},{ 2540, 3920},{ 2667, 3986},{ 2887, 4060} + }, + /*Cb qi=39 INTER*/ + { + { 119, 19},{ 103, 340},{ 90, 664},{ 100, 1040}, + { 115, 1426},{ 131, 1797},{ 148, 2148},{ 169, 2486}, + { 192, 2816},{ 217, 3131},{ 247, 3432},{ 282, 3721}, + { 324, 3999},{ 374, 4268},{ 435, 4526},{ 520, 4766}, + { 621, 4990},{ 738, 5194},{ 878, 5376},{ 1035, 5543}, + { 1202, 5686},{ 1374, 5819},{ 1545, 5950},{ 1729, 6064} + } + }, + { + /*Cr qi=39 INTRA*/ + { + { 12, 6},{ 106, 387},{ 182, 771},{ 262, 1167}, + { 365, 1512},{ 486, 1798},{ 608, 2047},{ 713, 2274}, + { 824, 2479},{ 945, 2655},{ 1091, 2804},{ 1231, 2941}, + { 1346, 3073},{ 1475, 3194},{ 1633, 3282},{ 1778, 3345}, + { 1891, 3414},{ 2013, 3501},{ 2138, 3584},{ 2266, 3640}, + { 2428, 3701},{ 2568, 3782},{ 2674, 3863},{ 2816, 3894} + }, + /*Cr qi=39 INTER*/ + { + { 88, -7},{ 92, 352},{ 85, 680},{ 102, 1053}, + { 119, 1434},{ 132, 1797},{ 146, 2151},{ 163, 2498}, + { 185, 2830},{ 211, 3147},{ 243, 3451},{ 285, 3735}, + { 337, 4005},{ 401, 4260},{ 477, 4499},{ 565, 4721}, + { 655, 4937},{ 749, 5148},{ 858, 5344},{ 979, 5529}, + { 1110, 5710},{ 1264, 5871},{ 1460, 5990},{ 1677, 6086} + } + } + }, + { + { + /*Y' qi=40 INTRA*/ + { + { 98, 71},{ 491, 1274},{ 1023, 2103},{ 1641, 2559}, + { 2257, 2877},{ 2898, 3171},{ 3566, 3429},{ 4233, 3629}, + { 4881, 3784},{ 5499, 3906},{ 6088, 3997},{ 6631, 4040}, + { 7145, 4060},{ 7640, 4107},{ 8128, 4178},{ 8618, 4233}, + { 9077, 4267},{ 9514, 4304},{ 9919, 4324},{10277, 4317}, + {10635, 4312},{10985, 4324},{11338, 4331},{11792, 4334} + }, + /*Y' qi=40 INTER*/ + { + { 63, -26},{ 125, 1331},{ 256, 2640},{ 439, 3801}, + { 757, 4782},{ 1391, 5474},{ 2399, 5805},{ 3582, 5870}, + { 4678, 5824},{ 5600, 5763},{ 6386, 5710},{ 7076, 5667}, + { 7693, 5637},{ 8252, 5610},{ 8775, 5586},{ 9255, 5571}, + { 9694, 5556},{10115, 5541},{10530, 5530},{10903, 5522}, + {11242, 5515},{11596, 5501},{11904, 5482},{12205, 5475} + } + }, + { + /*Cb qi=40 INTRA*/ + { + { 8, 3},{ 108, 371},{ 189, 743},{ 265, 1128}, + { 371, 1475},{ 499, 1767},{ 628, 2022},{ 746, 2256}, + { 864, 2467},{ 991, 2647},{ 1124, 2801},{ 1270, 2933}, + { 1412, 3054},{ 1547, 3165},{ 1677, 3277},{ 1804, 3393}, + { 1946, 3483},{ 2078, 3569},{ 2201, 3651},{ 2352, 3711}, + { 2513, 3766},{ 2643, 3826},{ 2775, 3880},{ 3025, 3919} + }, + /*Cb qi=40 INTER*/ + { + { 114, 35},{ 104, 349},{ 96, 667},{ 106, 1040}, + { 121, 1423},{ 138, 1789},{ 158, 2132},{ 184, 2464}, + { 212, 2787},{ 242, 3095},{ 279, 3389},{ 321, 3671}, + { 374, 3941},{ 438, 4199},{ 517, 4446},{ 617, 4673}, + { 740, 4881},{ 891, 5064},{ 1058, 5225},{ 1239, 5372}, + { 1441, 5499},{ 1638, 5610},{ 1840, 5719},{ 2076, 5814} + } + }, + { + /*Cr qi=40 INTRA*/ + { + { 14, 7},{ 114, 389},{ 193, 771},{ 283, 1156}, + { 399, 1488},{ 523, 1768},{ 643, 2018},{ 752, 2245}, + { 865, 2450},{ 984, 2626},{ 1139, 2763},{ 1290, 2887}, + { 1413, 3014},{ 1550, 3128},{ 1711, 3211},{ 1865, 3268}, + { 1981, 3334},{ 2103, 3415},{ 2237, 3486},{ 2365, 3543}, + { 2529, 3610},{ 2666, 3700},{ 2775, 3779},{ 2929, 3803} + }, + /*Cr qi=40 INTER*/ + { + { 89, -8},{ 95, 353},{ 90, 681},{ 107, 1053}, + { 124, 1430},{ 139, 1787},{ 156, 2136},{ 177, 2477}, + { 203, 2803},{ 237, 3112},{ 276, 3406},{ 329, 3683}, + { 395, 3942},{ 475, 4182},{ 567, 4407},{ 665, 4624}, + { 767, 4834},{ 879, 5032},{ 1011, 5213},{ 1169, 5375}, + { 1348, 5525},{ 1547, 5654},{ 1785, 5743},{ 2066, 5787} + } + } + }, + { + { + /*Y' qi=41 INTRA*/ + { + { 98, 71},{ 495, 1272},{ 1040, 2090},{ 1675, 2533}, + { 2302, 2842},{ 2953, 3132},{ 3631, 3381},{ 4309, 3574}, + { 4966, 3726},{ 5593, 3846},{ 6189, 3934},{ 6738, 3972}, + { 7256, 3991},{ 7754, 4036},{ 8250, 4099},{ 8747, 4150}, + { 9207, 4185},{ 9650, 4222},{10057, 4242},{10411, 4237}, + {10771, 4230},{11127, 4244},{11486, 4254},{11933, 4252} + }, + /*Y' qi=41 INTER*/ + { + { 65, -25},{ 125, 1331},{ 260, 2633},{ 457, 3782}, + { 807, 4740},{ 1499, 5397},{ 2562, 5693},{ 3766, 5743}, + { 4859, 5695},{ 5776, 5638},{ 6556, 5590},{ 7243, 5554}, + { 7859, 5529},{ 8417, 5506},{ 8935, 5486},{ 9419, 5473}, + { 9869, 5460},{10296, 5446},{10711, 5436},{11089, 5430}, + {11445, 5421},{11802, 5412},{12129, 5404},{12465, 5393} + } + }, + { + /*Cb qi=41 INTRA*/ + { + { 8, 3},{ 108, 371},{ 189, 743},{ 267, 1126}, + { 374, 1471},{ 504, 1760},{ 635, 2011},{ 758, 2241}, + { 881, 2447},{ 1013, 2621},{ 1147, 2773},{ 1293, 2906}, + { 1441, 3023},{ 1580, 3131},{ 1712, 3243},{ 1844, 3360}, + { 1985, 3451},{ 2114, 3532},{ 2240, 3613},{ 2390, 3680}, + { 2550, 3740},{ 2687, 3800},{ 2825, 3862},{ 3052, 3944} + }, + /*Cb qi=41 INTER*/ + { + { 104, 39},{ 100, 350},{ 95, 667},{ 105, 1040}, + { 121, 1422},{ 137, 1787},{ 159, 2129},{ 185, 2459}, + { 216, 2778},{ 249, 3083},{ 287, 3374},{ 335, 3653}, + { 393, 3920},{ 462, 4175},{ 549, 4414},{ 660, 4636}, + { 791, 4839},{ 952, 5014},{ 1135, 5166},{ 1337, 5297}, + { 1552, 5411},{ 1752, 5530},{ 1972, 5634},{ 2224, 5724} + } + }, + { + /*Cr qi=41 INTRA*/ + { + { 15, 7},{ 115, 389},{ 193, 770},{ 284, 1154}, + { 401, 1484},{ 528, 1761},{ 652, 2005},{ 764, 2228}, + { 882, 2427},{ 1008, 2599},{ 1167, 2734},{ 1320, 2859}, + { 1443, 2990},{ 1580, 3103},{ 1743, 3181},{ 1894, 3241}, + { 2012, 3309},{ 2141, 3385},{ 2272, 3459},{ 2398, 3519}, + { 2566, 3584},{ 2707, 3680},{ 2816, 3762},{ 2991, 3770} + }, + /*Cr qi=41 INTER*/ + { + { 92, -9},{ 98, 354},{ 90, 682},{ 107, 1052}, + { 124, 1429},{ 139, 1786},{ 156, 2132},{ 178, 2471}, + { 207, 2794},{ 241, 3100},{ 285, 3391},{ 345, 3662}, + { 417, 3915},{ 503, 4151},{ 600, 4375},{ 703, 4589}, + { 815, 4791},{ 942, 4981},{ 1088, 5155},{ 1250, 5316}, + { 1432, 5462},{ 1653, 5575},{ 1930, 5639},{ 2250, 5655} + } + } + }, + { + { + /*Y' qi=42 INTRA*/ + { + { 109, 75},{ 534, 1257},{ 1114, 2047},{ 1793, 2456}, + { 2461, 2735},{ 3157, 2994},{ 3879, 3221},{ 4595, 3396}, + { 5282, 3531},{ 5931, 3638},{ 6546, 3714},{ 7105, 3749}, + { 7633, 3766},{ 8147, 3803},{ 8652, 3865},{ 9148, 3915}, + { 9613, 3946},{10075, 3976},{10489, 3997},{10835, 3994}, + {11195, 3985},{11553, 3997},{11909, 4004},{12369, 3990} + }, + /*Y' qi=42 INTER*/ + { + { 69, -23},{ 134, 1332},{ 287, 2611},{ 521, 3730}, + { 970, 4624},{ 1827, 5176},{ 3028, 5382},{ 4262, 5389}, + { 5325, 5338},{ 6214, 5291},{ 6976, 5255},{ 7651, 5228}, + { 8260, 5206},{ 8821, 5190},{ 9343, 5177},{ 9823, 5165}, + {10273, 5152},{10709, 5143},{11121, 5136},{11502, 5129}, + {11857, 5125},{12193, 5115},{12520, 5107},{12802, 5097} + } + }, + { + /*Cb qi=42 INTRA*/ + { + { 9, 3},{ 113, 371},{ 199, 743},{ 279, 1123}, + { 390, 1462},{ 525, 1743},{ 662, 1986},{ 789, 2208}, + { 916, 2406},{ 1057, 2571},{ 1204, 2712},{ 1362, 2835}, + { 1524, 2943},{ 1676, 3040},{ 1815, 3145},{ 1959, 3249}, + { 2117, 3325},{ 2249, 3406},{ 2377, 3488},{ 2537, 3547}, + { 2706, 3597},{ 2854, 3646},{ 2999, 3705},{ 3236, 3759} + }, + /*Cb qi=42 INTER*/ + { + { 114, 44},{ 107, 353},{ 101, 670},{ 111, 1041}, + { 129, 1418},{ 148, 1775},{ 174, 2110},{ 208, 2432}, + { 244, 2746},{ 283, 3046},{ 330, 3330},{ 388, 3602}, + { 460, 3858},{ 546, 4101},{ 655, 4326},{ 793, 4530}, + { 966, 4703},{ 1165, 4851},{ 1388, 4980},{ 1630, 5088}, + { 1869, 5189},{ 2122, 5268},{ 2403, 5328},{ 2667, 5417} + } + }, + { + /*Cr qi=42 INTRA*/ + { + { 15, 7},{ 120, 390},{ 202, 771},{ 298, 1150}, + { 421, 1473},{ 553, 1743},{ 681, 1982},{ 796, 2199}, + { 923, 2388},{ 1062, 2547},{ 1225, 2678},{ 1392, 2792}, + { 1531, 2907},{ 1682, 3007},{ 1856, 3074},{ 2009, 3134}, + { 2138, 3192},{ 2274, 3257},{ 2407, 3333},{ 2536, 3393}, + { 2711, 3455},{ 2875, 3531},{ 3000, 3598},{ 3186, 3599} + }, + /*Cr qi=42 INTER*/ + { + { 87, -4},{ 95, 358},{ 97, 683},{ 113, 1052}, + { 131, 1423},{ 148, 1774},{ 170, 2116},{ 198, 2448}, + { 234, 2762},{ 276, 3062},{ 331, 3343},{ 404, 3603}, + { 494, 3844},{ 598, 4067},{ 715, 4276},{ 842, 4471}, + { 977, 4661},{ 1128, 4840},{ 1311, 4991},{ 1516, 5127}, + { 1759, 5233},{ 2050, 5300},{ 2377, 5323},{ 2710, 5304} + } + } + }, + { + { + /*Y' qi=43 INTRA*/ + { + { 99, 79},{ 557, 1244},{ 1175, 2016},{ 1882, 2408}, + { 2570, 2677},{ 3288, 2926},{ 4030, 3141},{ 4760, 3307}, + { 5458, 3435},{ 6115, 3537},{ 6743, 3608},{ 7312, 3636}, + { 7841, 3652},{ 8357, 3687},{ 8870, 3742},{ 9376, 3788}, + { 9850, 3821},{10315, 3853},{10734, 3873},{11084, 3870}, + {11442, 3862},{11800, 3874},{12160, 3879},{12618, 3876} + }, + /*Y' qi=43 INTER*/ + { + { 69, -22},{ 134, 1331},{ 294, 2601},{ 551, 3703}, + { 1056, 4563},{ 2003, 5061},{ 3276, 5215},{ 4534, 5194}, + { 5599, 5133},{ 6488, 5083},{ 7257, 5044},{ 7938, 5014}, + { 8556, 4992},{ 9124, 4975},{ 9648, 4960},{10138, 4948}, + {10594, 4939},{11039, 4926},{11462, 4919},{11847, 4912}, + {12216, 4904},{12570, 4896},{12883, 4889},{13189, 4879} + } + }, + { + /*Cb qi=43 INTRA*/ + { + { 9, 3},{ 114, 371},{ 202, 740},{ 294, 1110}, + { 417, 1440},{ 558, 1716},{ 700, 1956},{ 833, 2172}, + { 966, 2365},{ 1116, 2524},{ 1269, 2661},{ 1431, 2781}, + { 1599, 2885},{ 1756, 2980},{ 1902, 3082},{ 2051, 3185}, + { 2209, 3261},{ 2337, 3342},{ 2464, 3420},{ 2633, 3475}, + { 2809, 3525},{ 2948, 3579},{ 3094, 3633},{ 3347, 3678} + }, + /*Cb qi=43 INTER*/ + { + { 111, 44},{ 106, 353},{ 102, 670},{ 112, 1040}, + { 128, 1416},{ 148, 1771},{ 176, 2104},{ 211, 2424}, + { 250, 2734},{ 293, 3030},{ 347, 3309},{ 411, 3575}, + { 490, 3828},{ 589, 4064},{ 716, 4278},{ 869, 4472}, + { 1050, 4640},{ 1264, 4781},{ 1512, 4895},{ 1775, 4991}, + { 2042, 5069},{ 2310, 5141},{ 2593, 5207},{ 2912, 5239} + } + }, + { + /*Cr qi=43 INTRA*/ + { + { 15, 7},{ 121, 390},{ 208, 767},{ 315, 1135}, + { 449, 1449},{ 586, 1715},{ 718, 1950},{ 843, 2158}, + { 977, 2342},{ 1120, 2501},{ 1290, 2632},{ 1466, 2739}, + { 1613, 2845},{ 1763, 2945},{ 1937, 3015},{ 2093, 3070}, + { 2225, 3126},{ 2366, 3194},{ 2501, 3267},{ 2634, 3324}, + { 2815, 3385},{ 2964, 3466},{ 3087, 3538},{ 3263, 3555} + }, + /*Cr qi=43 INTER*/ + { + { 84, -4},{ 93, 358},{ 95, 683},{ 113, 1052}, + { 131, 1421},{ 148, 1770},{ 171, 2110},{ 201, 2439}, + { 240, 2750},{ 287, 3046},{ 348, 3322},{ 429, 3576}, + { 527, 3811},{ 641, 4029},{ 767, 4230},{ 904, 4422}, + { 1053, 4603},{ 1225, 4765},{ 1433, 4903},{ 1661, 5030}, + { 1928, 5121},{ 2252, 5160},{ 2604, 5164},{ 2979, 5125} + } + } + }, + { + { + /*Y' qi=44 INTRA*/ + { + { 103, 80},{ 560, 1244},{ 1183, 2009},{ 1891, 2391}, + { 2586, 2649},{ 3324, 2884},{ 4093, 3089},{ 4850, 3243}, + { 5575, 3358},{ 6252, 3452},{ 6886, 3518},{ 7459, 3546}, + { 7993, 3562},{ 8515, 3594},{ 9030, 3645},{ 9534, 3691}, + {10004, 3723},{10469, 3750},{10887, 3765},{11236, 3766}, + {11596, 3762},{11960, 3775},{12317, 3784},{12766, 3789} + }, + /*Y' qi=44 INTER*/ + { + { 77, -24},{ 145, 1332},{ 332, 2580},{ 642, 3649}, + { 1270, 4438},{ 2360, 4860},{ 3685, 4982},{ 4910, 4966}, + { 5929, 4928},{ 6785, 4900},{ 7529, 4880},{ 8198, 4863}, + { 8804, 4850},{ 9361, 4842},{ 9882, 4836},{10371, 4830}, + {10827, 4822},{11262, 4816},{11672, 4811},{12052, 4807}, + {12431, 4806},{12780, 4798},{13095, 4792},{13401, 4791} + } + }, + { + /*Cb qi=44 INTRA*/ + { + { 9, 2},{ 122, 371},{ 214, 741},{ 307, 1109}, + { 433, 1432},{ 576, 1704},{ 718, 1939},{ 855, 2152}, + { 991, 2340},{ 1141, 2497},{ 1298, 2632},{ 1463, 2749}, + { 1636, 2851},{ 1796, 2944},{ 1947, 3041},{ 2101, 3140}, + { 2260, 3219},{ 2392, 3297},{ 2527, 3366},{ 2693, 3424}, + { 2872, 3477},{ 3025, 3525},{ 3175, 3584},{ 3451, 3626} + }, + /*Cb qi=44 INTER*/ + { + { 111, 14},{ 110, 339},{ 109, 671},{ 120, 1040}, + { 139, 1410},{ 162, 1758},{ 197, 2084},{ 243, 2397}, + { 291, 2702},{ 342, 2992},{ 405, 3265},{ 484, 3521}, + { 584, 3760},{ 705, 3983},{ 855, 4185},{ 1048, 4356}, + { 1274, 4500},{ 1531, 4617},{ 1816, 4707},{ 2111, 4783}, + { 2409, 4846},{ 2720, 4901},{ 3044, 4957},{ 3391, 4985} + } + }, + { + /*Cr qi=44 INTRA*/ + { + { 17, 7},{ 128, 392},{ 219, 770},{ 329, 1135}, + { 465, 1442},{ 601, 1703},{ 734, 1935},{ 862, 2142}, + { 998, 2325},{ 1147, 2482},{ 1321, 2606},{ 1496, 2710}, + { 1649, 2813},{ 1809, 2908},{ 1984, 2977},{ 2143, 3032}, + { 2279, 3087},{ 2423, 3152},{ 2559, 3225},{ 2684, 3288}, + { 2866, 3351},{ 3025, 3426},{ 3161, 3492},{ 3372, 3500} + }, + /*Cr qi=44 INTER*/ + { + { 89, 0},{ 101, 352},{ 104, 683},{ 121, 1051}, + { 141, 1414},{ 163, 1757},{ 192, 2092},{ 231, 2415}, + { 278, 2720},{ 336, 3007},{ 412, 3273},{ 510, 3516}, + { 633, 3733},{ 769, 3936},{ 914, 4130},{ 1076, 4307}, + { 1256, 4472},{ 1469, 4617},{ 1723, 4732},{ 2012, 4822}, + { 2347, 4871},{ 2716, 4875},{ 3082, 4866},{ 3422, 4826} + } + } + }, + { + { + /*Y' qi=45 INTRA*/ + { + { 119, 78},{ 610, 1226},{ 1271, 1965},{ 2026, 2319}, + { 2768, 2550},{ 3556, 2757},{ 4369, 2938},{ 5157, 3076}, + { 5901, 3182},{ 6598, 3268},{ 7253, 3326},{ 7844, 3343}, + { 8392, 3356},{ 8922, 3386},{ 9453, 3433},{ 9973, 3474}, + {10457, 3503},{10929, 3530},{11351, 3543},{11709, 3541}, + {12068, 3537},{12434, 3547},{12805, 3555},{13268, 3563} + }, + /*Y' qi=45 INTER*/ + { + { 77, -20},{ 146, 1330},{ 342, 2566},{ 699, 3604}, + { 1439, 4332},{ 2669, 4672},{ 4075, 4727},{ 5318, 4679}, + { 6345, 4630},{ 7209, 4595},{ 7963, 4570},{ 8644, 4551}, + { 9262, 4535},{ 9831, 4525},{10370, 4515},{10872, 4506}, + {11334, 4500},{11783, 4492},{12219, 4489},{12617, 4483}, + {12995, 4477},{13350, 4472},{13674, 4466},{13968, 4468} + } + }, + { + /*Cb qi=45 INTRA*/ + { + { 9, 2},{ 122, 370},{ 219, 735},{ 324, 1096}, + { 465, 1414},{ 619, 1679},{ 771, 1905},{ 920, 2103}, + { 1070, 2276},{ 1236, 2419},{ 1410, 2539},{ 1595, 2644}, + { 1784, 2736},{ 1949, 2831},{ 2104, 2931},{ 2275, 3021}, + { 2443, 3092},{ 2586, 3166},{ 2735, 3234},{ 2904, 3288}, + { 3093, 3338},{ 3262, 3382},{ 3419, 3427},{ 3708, 3456} + }, + /*Cb qi=45 INTER*/ + { + { 103, 0},{ 109, 339},{ 109, 670},{ 119, 1039}, + { 137, 1408},{ 162, 1754},{ 199, 2076},{ 248, 2386}, + { 301, 2684},{ 360, 2967},{ 433, 3234},{ 525, 3481}, + { 640, 3713},{ 780, 3924},{ 956, 4110},{ 1176, 4266}, + { 1438, 4390},{ 1736, 4481},{ 2057, 4553},{ 2385, 4613}, + { 2718, 4656},{ 3056, 4698},{ 3416, 4733},{ 3799, 4755} + } + }, + { + /*Cr qi=45 INTRA*/ + { + { 16, 7},{ 128, 391},{ 225, 763},{ 350, 1120}, + { 500, 1420},{ 649, 1673},{ 792, 1893},{ 929, 2089}, + { 1084, 2257},{ 1250, 2401},{ 1440, 2518},{ 1633, 2614}, + { 1799, 2708},{ 1968, 2798},{ 2151, 2863},{ 2314, 2914}, + { 2453, 2968},{ 2611, 3025},{ 2759, 3095},{ 2887, 3160}, + { 3082, 3210},{ 3259, 3278},{ 3403, 3342},{ 3593, 3354} + }, + /*Cr qi=45 INTER*/ + { + { 92, 0},{ 101, 352},{ 103, 682},{ 120, 1049}, + { 140, 1412},{ 163, 1752},{ 193, 2083},{ 234, 2402}, + { 287, 2702},{ 353, 2983},{ 442, 3240},{ 557, 3471}, + { 694, 3680},{ 846, 3873},{ 1014, 4056},{ 1200, 4224}, + { 1414, 4369},{ 1664, 4495},{ 1946, 4595},{ 2278, 4654}, + { 2654, 4673},{ 3047, 4658},{ 3438, 4627},{ 3825, 4585} + } + } + }, + { + { + /*Y' qi=46 INTRA*/ + { + { 119, 78},{ 610, 1227},{ 1277, 1960},{ 2043, 2309}, + { 2805, 2529},{ 3618, 2719},{ 4452, 2887},{ 5257, 3016}, + { 6017, 3115},{ 6727, 3195},{ 7392, 3248},{ 7984, 3267}, + { 8528, 3281},{ 9059, 3310},{ 9593, 3354},{10119, 3395}, + {10599, 3425},{11064, 3450},{11493, 3464},{11850, 3466}, + {12207, 3462},{12578, 3471},{12948, 3480},{13407, 3487} + }, + /*Y' qi=46 INTER*/ + { + { 74, -14},{ 149, 1326},{ 382, 2538},{ 807, 3541}, + { 1670, 4211},{ 3000, 4499},{ 4416, 4533},{ 5628, 4490}, + { 6628, 4453},{ 7479, 4425},{ 8228, 4406},{ 8902, 4393}, + { 9521, 4380},{10090, 4371},{10623, 4364},{11124, 4356}, + {11586, 4351},{12043, 4344},{12476, 4341},{12863, 4340}, + {13244, 4337},{13610, 4329},{13936, 4324},{14246, 4329} + } + }, + { + /*Cb qi=46 INTRA*/ + { + { 11, 2},{ 132, 371},{ 234, 737},{ 340, 1094}, + { 481, 1405},{ 637, 1667},{ 791, 1891},{ 944, 2084}, + { 1099, 2253},{ 1268, 2392},{ 1444, 2507},{ 1633, 2610}, + { 1825, 2700},{ 1990, 2794},{ 2147, 2895},{ 2321, 2984}, + { 2493, 3053},{ 2640, 3126},{ 2787, 3198},{ 2954, 3253}, + { 3146, 3297},{ 3313, 3344},{ 3473, 3393},{ 3757, 3434} + }, + /*Cb qi=46 INTER*/ + { + { 97, 0},{ 109, 339},{ 108, 669},{ 120, 1035}, + { 142, 1398},{ 173, 1737},{ 221, 2052},{ 281, 2353}, + { 345, 2646},{ 415, 2924},{ 504, 3183},{ 616, 3421}, + { 749, 3643},{ 914, 3842},{ 1123, 4012},{ 1379, 4150}, + { 1685, 4250},{ 2014, 4327},{ 2366, 4382},{ 2731, 4426}, + { 3083, 4470},{ 3445, 4490},{ 3805, 4511},{ 4146, 4539} + } + }, + { + /*Cr qi=46 INTRA*/ + { + { 19, 7},{ 137, 393},{ 237, 765},{ 364, 1116}, + { 516, 1411},{ 665, 1662},{ 809, 1880},{ 951, 2072}, + { 1109, 2236},{ 1278, 2378},{ 1474, 2491},{ 1669, 2584}, + { 1835, 2678},{ 2014, 2766},{ 2203, 2828},{ 2366, 2880}, + { 2506, 2933},{ 2661, 2988},{ 2810, 3053},{ 2941, 3116}, + { 3131, 3175},{ 3310, 3243},{ 3461, 3303},{ 3656, 3321} + }, + /*Cr qi=46 INTER*/ + { + { 91, 1},{ 103, 351},{ 104, 681},{ 121, 1046}, + { 144, 1401},{ 173, 1736},{ 213, 2060},{ 265, 2373}, + { 330, 2666},{ 410, 2938},{ 517, 3185},{ 655, 3404}, + { 815, 3601},{ 989, 3784},{ 1183, 3951},{ 1400, 4104}, + { 1649, 4241},{ 1933, 4352},{ 2261, 4427},{ 2646, 4458}, + { 3057, 4446},{ 3453, 4418},{ 3820, 4385},{ 4171, 4352} + } + } + }, + { + { + /*Y' qi=47 INTRA*/ + { + { 117, 83},{ 670, 1205},{ 1408, 1904},{ 2239, 2219}, + { 3049, 2414},{ 3905, 2584},{ 4775, 2734},{ 5610, 2852}, + { 6393, 2944},{ 7121, 3017},{ 7804, 3066},{ 8407, 3081}, + { 8957, 3093},{ 9498, 3119},{10043, 3160},{10582, 3199}, + {11083, 3226},{11561, 3250},{11993, 3263},{12352, 3264}, + {12711, 3259},{13092, 3266},{13463, 3271},{13918, 3275} + }, + /*Y' qi=47 INTER*/ + { + { 74, -11},{ 148, 1325},{ 404, 2518},{ 910, 3478}, + { 1916, 4080},{ 3369, 4298},{ 4823, 4292},{ 6035, 4238}, + { 7037, 4197},{ 7894, 4168},{ 8650, 4146},{ 9337, 4129}, + { 9968, 4116},{10549, 4105},{11096, 4096},{11605, 4089}, + {12081, 4083},{12547, 4076},{12990, 4070},{13399, 4070}, + {13776, 4065},{14133, 4059},{14486, 4057},{14842, 4053} + } + }, + { + /*Cb qi=47 INTRA*/ + { + { 11, 2},{ 133, 370},{ 242, 731},{ 367, 1077}, + { 524, 1378},{ 692, 1630},{ 860, 1844},{ 1028, 2024}, + { 1203, 2178},{ 1393, 2305},{ 1582, 2413},{ 1787, 2507}, + { 1992, 2590},{ 2175, 2676},{ 2351, 2767},{ 2534, 2851}, + { 2707, 2923},{ 2862, 2994},{ 3021, 3060},{ 3193, 3111}, + { 3396, 3147},{ 3573, 3184},{ 3752, 3220},{ 4038, 3255} + }, + /*Cb qi=47 INTER*/ + { + { 101, 0},{ 107, 339},{ 108, 667},{ 120, 1033}, + { 142, 1394},{ 175, 1729},{ 227, 2040},{ 295, 2335}, + { 369, 2619},{ 452, 2888},{ 556, 3138},{ 686, 3368}, + { 850, 3574},{ 1050, 3758},{ 1299, 3910},{ 1605, 4024}, + { 1950, 4104},{ 2317, 4163},{ 2689, 4210},{ 3077, 4239}, + { 3466, 4258},{ 3840, 4278},{ 4205, 4298},{ 4515, 4340} + } + }, + { + /*Cr qi=47 INTRA*/ + { + { 19, 7},{ 138, 392},{ 248, 758},{ 396, 1094}, + { 563, 1378},{ 723, 1621},{ 881, 1829},{ 1037, 2011}, + { 1214, 2165},{ 1410, 2290},{ 1623, 2393},{ 1834, 2480}, + { 2016, 2564},{ 2203, 2647},{ 2405, 2707},{ 2569, 2757}, + { 2709, 2810},{ 2871, 2860},{ 3027, 2924},{ 3178, 2980}, + { 3375, 3034},{ 3563, 3097},{ 3724, 3151},{ 3952, 3153} + }, + /*Cr qi=47 INTER*/ + { + { 91, 1},{ 100, 351},{ 102, 681},{ 120, 1043}, + { 144, 1397},{ 175, 1729},{ 219, 2049},{ 277, 2356}, + { 353, 2640},{ 451, 2902},{ 579, 3136},{ 739, 3342}, + { 926, 3525},{ 1125, 3698},{ 1343, 3859},{ 1595, 3998}, + { 1881, 4113},{ 2208, 4205},{ 2589, 4253},{ 3014, 4250}, + { 3444, 4220},{ 3838, 4183},{ 4196, 4147},{ 4521, 4116} + } + } + }, + { + { + /*Y' qi=48 INTRA*/ + { + { 107, 87},{ 681, 1200},{ 1456, 1883},{ 2306, 2193}, + { 3122, 2386},{ 3984, 2548},{ 4862, 2693},{ 5704, 2808}, + { 6495, 2899},{ 7232, 2970},{ 7915, 3018},{ 8524, 3034}, + { 9085, 3043},{ 9635, 3068},{10192, 3108},{10735, 3145}, + {11237, 3171},{11719, 3194},{12153, 3207},{12516, 3206}, + {12888, 3202},{13266, 3210},{13637, 3218},{14101, 3219} + }, + /*Y' qi=48 INTER*/ + { + { 83, -18},{ 147, 1328},{ 398, 2519},{ 923, 3468}, + { 1979, 4047},{ 3472, 4246},{ 4936, 4232},{ 6148, 4178}, + { 7150, 4139},{ 8007, 4111},{ 8765, 4091},{ 9458, 4076}, + {10090, 4063},{10676, 4054},{11226, 4045},{11742, 4038}, + {12223, 4033},{12686, 4029},{13127, 4022},{13527, 4015}, + {13915, 4012},{14277, 4007},{14619, 4004},{14966, 4001} + } + }, + { + /*Cb qi=48 INTRA*/ + { + { 11, 2},{ 134, 369},{ 245, 730},{ 373, 1075}, + { 531, 1374},{ 698, 1625},{ 865, 1839},{ 1033, 2019}, + { 1207, 2173},{ 1397, 2300},{ 1588, 2408},{ 1795, 2501}, + { 2003, 2581},{ 2187, 2666},{ 2362, 2757},{ 2548, 2841}, + { 2719, 2912},{ 2876, 2983},{ 3034, 3047},{ 3209, 3097}, + { 3409, 3137},{ 3589, 3178},{ 3762, 3216},{ 4004, 3252} + }, + /*Cb qi=48 INTER*/ + { + { 113, 26},{ 112, 344},{ 111, 668},{ 120, 1032}, + { 141, 1392},{ 173, 1727},{ 224, 2036},{ 290, 2330}, + { 363, 2612},{ 447, 2880},{ 551, 3130},{ 685, 3358}, + { 852, 3563},{ 1061, 3742},{ 1332, 3884},{ 1654, 3993}, + { 2011, 4068},{ 2394, 4120},{ 2782, 4160},{ 3172, 4186}, + { 3557, 4209},{ 3932, 4228},{ 4306, 4237},{ 4675, 4236} + } + }, + { + /*Cr qi=48 INTRA*/ + { + { 18, 7},{ 139, 389},{ 252, 755},{ 404, 1090}, + { 573, 1372},{ 732, 1615},{ 889, 1823},{ 1045, 2005}, + { 1222, 2159},{ 1417, 2285},{ 1631, 2387},{ 1843, 2474}, + { 2027, 2558},{ 2212, 2639},{ 2413, 2697},{ 2578, 2746}, + { 2720, 2798},{ 2887, 2852},{ 3040, 2913},{ 3181, 2970}, + { 3381, 3024},{ 3581, 3081},{ 3743, 3130},{ 3948, 3133} + }, + /*Cr qi=48 INTER*/ + { + { 89, 0},{ 106, 352},{ 105, 682},{ 120, 1044}, + { 144, 1395},{ 174, 1724},{ 215, 2044},{ 270, 2350}, + { 343, 2635},{ 441, 2895},{ 571, 3129},{ 735, 3334}, + { 926, 3518},{ 1139, 3684},{ 1371, 3836},{ 1628, 3977}, + { 1933, 4089},{ 2279, 4164},{ 2672, 4204},{ 3105, 4205}, + { 3533, 4176},{ 3931, 4135},{ 4290, 4089},{ 4624, 4057} + } + } + }, + { + { + /*Y' qi=49 INTRA*/ + { + { 120, 85},{ 706, 1194},{ 1485, 1875},{ 2348, 2187}, + { 3190, 2372},{ 4076, 2521},{ 4967, 2658},{ 5819, 2771}, + { 6611, 2861},{ 7345, 2936},{ 8026, 2990},{ 8626, 3013}, + { 9182, 3030},{ 9723, 3059},{10266, 3100},{10802, 3143}, + {11293, 3179},{11768, 3206},{12201, 3221},{12556, 3225}, + {12914, 3226},{13281, 3237},{13639, 3247},{14089, 3257} + }, + /*Y' qi=49 INTER*/ + { + { 72, -11},{ 155, 1320},{ 458, 2485},{ 1090, 3386}, + { 2284, 3907},{ 3835, 4075},{ 5272, 4064},{ 6449, 4026}, + { 7426, 4003},{ 8267, 3987},{ 9017, 3976},{ 9698, 3967}, + {10328, 3962},{10913, 3959},{11452, 3954},{11961, 3950}, + {12442, 3947},{12904, 3946},{13347, 3945},{13749, 3943}, + {14123, 3941},{14490, 3941},{14826, 3939},{15153, 3937} + } + }, + { + /*Cb qi=49 INTRA*/ + { + { 11, 2},{ 145, 369},{ 262, 729},{ 393, 1070}, + { 557, 1363},{ 731, 1607},{ 907, 1811},{ 1085, 1983}, + { 1268, 2130},{ 1465, 2251},{ 1658, 2359},{ 1868, 2454}, + { 2079, 2534},{ 2264, 2621},{ 2440, 2717},{ 2625, 2802}, + { 2792, 2878},{ 2945, 2954},{ 3106, 3021},{ 3277, 3075}, + { 3466, 3119},{ 3638, 3170},{ 3824, 3213},{ 4100, 3243} + }, + /*Cb qi=49 INTER*/ + { + { 98, -6},{ 113, 343},{ 110, 669},{ 122, 1029}, + { 149, 1380},{ 192, 1706},{ 258, 2007},{ 340, 2293}, + { 426, 2569},{ 525, 2831},{ 653, 3071},{ 814, 3287}, + { 1013, 3478},{ 1262, 3637},{ 1575, 3761},{ 1936, 3851}, + { 2328, 3910},{ 2741, 3949},{ 3163, 3970},{ 3559, 3994}, + { 3936, 4025},{ 4300, 4050},{ 4655, 4060},{ 4962, 4062} + } + }, + { + /*Cr qi=49 INTRA*/ + { + { 19, 7},{ 151, 389},{ 270, 753},{ 427, 1084}, + { 602, 1360},{ 767, 1595},{ 933, 1794},{ 1098, 1968}, + { 1285, 2115},{ 1489, 2237},{ 1699, 2342},{ 1912, 2435}, + { 2101, 2519},{ 2288, 2601},{ 2486, 2663},{ 2651, 2715}, + { 2799, 2769},{ 2958, 2825},{ 3106, 2890},{ 3257, 2948}, + { 3452, 3007},{ 3634, 3075},{ 3786, 3136},{ 3959, 3164} + }, + /*Cr qi=49 INTER*/ + { + { 85, 1},{ 103, 352},{ 104, 681},{ 121, 1039}, + { 152, 1382},{ 195, 1702},{ 248, 2015},{ 316, 2316}, + { 403, 2595},{ 520, 2847},{ 676, 3068},{ 870, 3258}, + { 1091, 3429},{ 1329, 3585},{ 1597, 3725},{ 1894, 3849}, + { 2242, 3940},{ 2656, 3984},{ 3098, 3992},{ 3531, 3981}, + { 3936, 3950},{ 4304, 3915},{ 4646, 3879},{ 4915, 3861} + } + } + }, + { + { + /*Y' qi=50 INTRA*/ + { + { 122, 89},{ 798, 1170},{ 1682, 1812},{ 2613, 2096}, + { 3501, 2260},{ 4430, 2388},{ 5352, 2510},{ 6228, 2613}, + { 7043, 2698},{ 7793, 2770},{ 8486, 2823},{ 9092, 2846}, + { 9652, 2865},{10210, 2895},{10773, 2936},{11315, 2979}, + {11817, 3014},{12297, 3041},{12734, 3057},{13097, 3064}, + {13443, 3067},{13813, 3078},{14190, 3088},{14646, 3103} + }, + /*Y' qi=50 INTER*/ + { + { 73, -11},{ 154, 1318},{ 501, 2457},{ 1281, 3291}, + { 2685, 3719},{ 4356, 3810},{ 5811, 3769},{ 6988, 3726}, + { 7976, 3700},{ 8835, 3682},{ 9606, 3669},{10307, 3659}, + {10953, 3652},{11556, 3645},{12115, 3643},{12641, 3640}, + {13138, 3636},{13613, 3634},{14068, 3629},{14488, 3627}, + {14876, 3625},{15237, 3621},{15585, 3623},{15922, 3629} + } + }, + { + /*Cb qi=50 INTRA*/ + { + { 11, 2},{ 148, 368},{ 278, 724},{ 431, 1052}, + { 613, 1334},{ 806, 1567},{ 1004, 1756},{ 1203, 1915}, + { 1405, 2051},{ 1621, 2163},{ 1833, 2262},{ 2059, 2347}, + { 2280, 2424},{ 2476, 2512},{ 2670, 2598},{ 2864, 2679}, + { 3037, 2754},{ 3201, 2826},{ 3376, 2887},{ 3562, 2936}, + { 3756, 2976},{ 3932, 3022},{ 4117, 3065},{ 4385, 3094} + }, + /*Cb qi=50 INTER*/ + { + { 92, -3},{ 112, 343},{ 109, 669},{ 121, 1027}, + { 149, 1375},{ 196, 1697},{ 270, 1992},{ 366, 2267}, + { 471, 2532},{ 594, 2782},{ 747, 3011},{ 942, 3212}, + { 1189, 3384},{ 1497, 3521},{ 1875, 3613},{ 2297, 3673}, + { 2739, 3710},{ 3195, 3725},{ 3644, 3737},{ 4057, 3751}, + { 4445, 3763},{ 4841, 3769},{ 5211, 3779},{ 5568, 3769} + } + }, + { + /*Cr qi=50 INTRA*/ + { + { 19, 7},{ 155, 388},{ 290, 744},{ 474, 1060}, + { 666, 1324},{ 847, 1549},{ 1033, 1737},{ 1219, 1898}, + { 1428, 2034},{ 1653, 2147},{ 1885, 2245},{ 2115, 2329}, + { 2316, 2410},{ 2517, 2486},{ 2730, 2539},{ 2901, 2586}, + { 3042, 2638},{ 3199, 2693},{ 3366, 2755},{ 3534, 2805}, + { 3738, 2858},{ 3934, 2916},{ 4079, 2975},{ 4257, 2992} + }, + /*Cr qi=50 INTER*/ + { + { 87, 1},{ 102, 353},{ 103, 680},{ 121, 1036}, + { 153, 1377},{ 199, 1694},{ 260, 1999},{ 339, 2291}, + { 446, 2559},{ 590, 2797},{ 780, 3003},{ 1010, 3176}, + { 1267, 3331},{ 1547, 3474},{ 1874, 3594},{ 2245, 3688}, + { 2666, 3742},{ 3130, 3758},{ 3594, 3748},{ 4028, 3711}, + { 4415, 3674},{ 4771, 3641},{ 5122, 3605},{ 5482, 3569} + } + } + }, + { + { + /*Y' qi=51 INTRA*/ + { + { 115, 93},{ 819, 1164},{ 1739, 1806},{ 2695, 2101}, + { 3612, 2257},{ 4552, 2374},{ 5479, 2490},{ 6352, 2593}, + { 7158, 2683},{ 7898, 2761},{ 8580, 2823},{ 9177, 2854}, + { 9728, 2880},{10268, 2917},{10816, 2966},{11350, 3016}, + {11834, 3058},{12311, 3089},{12741, 3109},{13092, 3119}, + {13434, 3126},{13791, 3142},{14156, 3155},{14590, 3171} + }, + /*Y' qi=51 INTER*/ + { + { 58, 0},{ 171, 1307},{ 610, 2407},{ 1563, 3175}, + { 3116, 3545},{ 4789, 3624},{ 6185, 3602},{ 7320, 3583}, + { 8282, 3574},{ 9124, 3569},{ 9878, 3567},{10569, 3565}, + {11207, 3563},{11801, 3564},{12359, 3566},{12884, 3567}, + {13373, 3568},{13841, 3567},{14289, 3566},{14699, 3568}, + {15086, 3568},{15446, 3566},{15788, 3564},{16103, 3568} + } + }, + { + /*Cb qi=51 INTRA*/ + { + { 14, 3},{ 161, 369},{ 297, 722},{ 454, 1047}, + { 639, 1325},{ 833, 1554},{ 1033, 1742},{ 1236, 1897}, + { 1440, 2032},{ 1653, 2148},{ 1860, 2253},{ 2077, 2347}, + { 2288, 2432},{ 2476, 2525},{ 2661, 2621},{ 2841, 2714}, + { 3010, 2797},{ 3170, 2876},{ 3333, 2945},{ 3510, 3000}, + { 3696, 3054},{ 3865, 3114},{ 4046, 3164},{ 4317, 3200} + }, + /*Cb qi=51 INTER*/ + { + { 88, -11},{ 109, 341},{ 109, 668},{ 126, 1019}, + { 168, 1358},{ 233, 1670},{ 329, 1955},{ 451, 2219}, + { 584, 2472},{ 736, 2711},{ 931, 2923},{ 1179, 3104}, + { 1480, 3254},{ 1846, 3368},{ 2265, 3448},{ 2714, 3501}, + { 3180, 3524},{ 3638, 3529},{ 4074, 3543},{ 4485, 3560}, + { 4868, 3571},{ 5238, 3581},{ 5597, 3594},{ 5953, 3591} + } + }, + { + /*Cr qi=51 INTRA*/ + { + { 24, 7},{ 168, 388},{ 309, 742},{ 496, 1054}, + { 688, 1316},{ 873, 1538},{ 1063, 1723},{ 1252, 1882}, + { 1460, 2018},{ 1682, 2134},{ 1907, 2238},{ 2125, 2332}, + { 2317, 2422},{ 2507, 2510},{ 2705, 2575},{ 2869, 2630}, + { 3015, 2684},{ 3178, 2744},{ 3329, 2815},{ 3477, 2878}, + { 3667, 2945},{ 3848, 3016},{ 3997, 3082},{ 4174, 3121} + }, + /*Cr qi=51 INTER*/ + { + { 83, -2},{ 102, 351},{ 102, 680},{ 126, 1029}, + { 172, 1359},{ 238, 1665},{ 321, 1962},{ 422, 2246}, + { 552, 2505},{ 733, 2728},{ 970, 2912},{ 1247, 3069}, + { 1552, 3209},{ 1876, 3338},{ 2251, 3440},{ 2692, 3502}, + { 3161, 3529},{ 3637, 3525},{ 4084, 3509},{ 4487, 3479}, + { 4850, 3444},{ 5181, 3419},{ 5507, 3406},{ 5786, 3398} + } + } + }, + { + { + /*Y' qi=52 INTRA*/ + { + { 117, 93},{ 814, 1168},{ 1729, 1822},{ 2706, 2119}, + { 3655, 2262},{ 4604, 2374},{ 5528, 2490},{ 6394, 2596}, + { 7189, 2691},{ 7921, 2777},{ 8596, 2846},{ 9184, 2885}, + { 9728, 2918},{10260, 2961},{10796, 3014},{11316, 3069}, + {11793, 3115},{12267, 3150},{12692, 3172},{13037, 3185}, + {13367, 3196},{13717, 3214},{14087, 3227},{14521, 3249} + }, + /*Y' qi=52 INTER*/ + { + { 52, 0},{ 169, 1308},{ 668, 2382},{ 1735, 3112}, + { 3384, 3451},{ 5077, 3519},{ 6461, 3506},{ 7587, 3496}, + { 8545, 3494},{ 9384, 3494},{10142, 3498},{10838, 3501}, + {11475, 3503},{12078, 3508},{12640, 3511},{13162, 3513}, + {13654, 3517},{14130, 3521},{14576, 3522},{14980, 3523}, + {15369, 3523},{15737, 3522},{16071, 3521},{16382, 3516} + } + }, + { + /*Cb qi=52 INTRA*/ + { + { 14, 3},{ 163, 369},{ 299, 722},{ 457, 1044}, + { 645, 1319},{ 843, 1545},{ 1050, 1728},{ 1261, 1879}, + { 1468, 2013},{ 1678, 2132},{ 1883, 2240},{ 2093, 2338}, + { 2301, 2428},{ 2488, 2523},{ 2667, 2619},{ 2843, 2718}, + { 3010, 2805},{ 3163, 2887},{ 3323, 2963},{ 3490, 3028}, + { 3665, 3087},{ 3841, 3145},{ 4011, 3197},{ 4289, 3230} + }, + /*Cb qi=52 INTER*/ + { + { 98, -7},{ 109, 342},{ 109, 668},{ 126, 1018}, + { 170, 1355},{ 242, 1663},{ 352, 1941},{ 490, 2195}, + { 642, 2439},{ 823, 2666},{ 1052, 2868},{ 1333, 3039}, + { 1670, 3178},{ 2074, 3280},{ 2524, 3348},{ 2996, 3390}, + { 3469, 3410},{ 3923, 3420},{ 4355, 3434},{ 4771, 3451}, + { 5166, 3468},{ 5532, 3483},{ 5885, 3499},{ 6263, 3501} + } + }, + { + /*Cr qi=52 INTRA*/ + { + { 25, 7},{ 170, 388},{ 312, 741},{ 500, 1051}, + { 694, 1310},{ 883, 1529},{ 1082, 1709},{ 1280, 1864}, + { 1491, 1998},{ 1710, 2117},{ 1932, 2225},{ 2143, 2324}, + { 2328, 2418},{ 2516, 2506},{ 2708, 2578},{ 2870, 2637}, + { 3017, 2693},{ 3170, 2758},{ 3312, 2835},{ 3455, 2901}, + { 3644, 2972},{ 3827, 3049},{ 3968, 3121},{ 4115, 3166} + }, + /*Cr qi=52 INTER*/ + { + { 86, -2},{ 101, 352},{ 100, 680},{ 126, 1028}, + { 175, 1356},{ 247, 1657},{ 341, 1948},{ 458, 2224}, + { 615, 2471},{ 828, 2681},{ 1091, 2857},{ 1395, 3008}, + { 1732, 3140},{ 2095, 3257},{ 2502, 3348},{ 2968, 3402}, + { 3457, 3420},{ 3926, 3413},{ 4360, 3388},{ 4759, 3357}, + { 5128, 3329},{ 5449, 3306},{ 5741, 3295},{ 6071, 3296} + } + } + }, + { + { + /*Y' qi=53 INTRA*/ + { + { 138, 93},{ 850, 1161},{ 1773, 1810},{ 2763, 2103}, + { 3722, 2245},{ 4675, 2360},{ 5600, 2483},{ 6464, 2597}, + { 7255, 2700},{ 7982, 2792},{ 8652, 2867},{ 9237, 2913}, + { 9775, 2950},{10302, 2998},{10834, 3058},{11347, 3121}, + {11826, 3169},{12299, 3207},{12713, 3235},{13054, 3250}, + {13387, 3265},{13744, 3286},{14110, 3302},{14515, 3323} + }, + /*Y' qi=53 INTER*/ + { + { 52, 2},{ 169, 1308},{ 680, 2377},{ 1763, 3103}, + { 3410, 3450},{ 5094, 3531},{ 6469, 3526},{ 7590, 3525}, + { 8547, 3530},{ 9385, 3534},{10139, 3540},{10835, 3548}, + {11479, 3553},{12075, 3559},{12634, 3565},{13159, 3570}, + {13650, 3573},{14124, 3576},{14575, 3580},{14993, 3583}, + {15375, 3584},{15744, 3584},{16091, 3583},{16421, 3586} + } + }, + { + /*Cb qi=53 INTRA*/ + { + { 14, 3},{ 167, 367},{ 317, 717},{ 492, 1033}, + { 687, 1306},{ 887, 1531},{ 1095, 1715},{ 1309, 1866}, + { 1517, 2000},{ 1729, 2119},{ 1932, 2227},{ 2146, 2325}, + { 2358, 2414},{ 2544, 2511},{ 2724, 2611},{ 2902, 2711}, + { 3070, 2800},{ 3227, 2878},{ 3381, 2954},{ 3548, 3021}, + { 3724, 3077},{ 3888, 3140},{ 4065, 3196},{ 4359, 3225} + }, + /*Cb qi=53 INTER*/ + { + { 93, -8},{ 110, 342},{ 108, 668},{ 125, 1018}, + { 170, 1355},{ 242, 1663},{ 353, 1939},{ 494, 2192}, + { 651, 2433},{ 838, 2658},{ 1076, 2856},{ 1368, 3022}, + { 1716, 3158},{ 2123, 3260},{ 2575, 3330},{ 3042, 3373}, + { 3507, 3396},{ 3962, 3413},{ 4394, 3430},{ 4797, 3452}, + { 5169, 3476},{ 5547, 3496},{ 5914, 3510},{ 6235, 3525} + } + }, + { + /*Cr qi=53 INTRA*/ + { + { 25, 7},{ 175, 386},{ 335, 734},{ 541, 1037}, + { 737, 1296},{ 926, 1516},{ 1125, 1696},{ 1324, 1851}, + { 1540, 1984},{ 1763, 2102},{ 1989, 2210},{ 2202, 2310}, + { 2386, 2404},{ 2572, 2495},{ 2768, 2569},{ 2929, 2627}, + { 3071, 2684},{ 3231, 2749},{ 3374, 2825},{ 3514, 2894}, + { 3703, 2963},{ 3882, 3040},{ 4024, 3111},{ 4190, 3150} + }, + /*Cr qi=53 INTER*/ + { + { 87, -1},{ 99, 352},{ 100, 680},{ 125, 1027}, + { 175, 1355},{ 249, 1657},{ 343, 1946},{ 462, 2220}, + { 624, 2465},{ 844, 2671},{ 1122, 2841},{ 1435, 2989}, + { 1768, 3125},{ 2134, 3243},{ 2545, 3334},{ 3002, 3393}, + { 3490, 3412},{ 3965, 3405},{ 4401, 3384},{ 4797, 3359}, + { 5156, 3328},{ 5482, 3297},{ 5800, 3292},{ 6135, 3293} + } + } + }, + { + { + /*Y' qi=54 INTRA*/ + { + { 184, 94},{ 902, 1151},{ 1876, 1776},{ 2881, 2057}, + { 3832, 2200},{ 4785, 2315},{ 5709, 2442},{ 6570, 2562}, + { 7362, 2672},{ 8092, 2771},{ 8760, 2852},{ 9337, 2901}, + { 9874, 2943},{10402, 2995},{10928, 3059},{11443, 3126}, + {11926, 3178},{12396, 3220},{12805, 3251},{13139, 3266}, + {13466, 3280},{13822, 3304},{14184, 3322},{14585, 3342} + }, + /*Y' qi=54 INTER*/ + { + { 60, 5},{ 169, 1308},{ 683, 2375},{ 1791, 3090}, + { 3478, 3412},{ 5184, 3470},{ 6568, 3455},{ 7697, 3446}, + { 8659, 3446},{ 9503, 3447},{10266, 3450},{10971, 3454}, + {11619, 3458},{12223, 3462},{12789, 3467},{13315, 3471}, + {13811, 3475},{14291, 3479},{14743, 3479},{15148, 3481}, + {15535, 3483},{15913, 3481},{16252, 3479},{16569, 3472} + } + }, + { + /*Cb qi=54 INTRA*/ + { + { 13, 2},{ 165, 367},{ 318, 715},{ 498, 1030}, + { 698, 1301},{ 906, 1523},{ 1121, 1703},{ 1336, 1853}, + { 1549, 1984},{ 1765, 2100},{ 1974, 2207},{ 2192, 2306}, + { 2402, 2396},{ 2587, 2493},{ 2773, 2591},{ 2953, 2691}, + { 3119, 2778},{ 3277, 2858},{ 3430, 2940},{ 3603, 3004}, + { 3788, 3059},{ 3950, 3121},{ 4128, 3173},{ 4398, 3215} + }, + /*Cb qi=54 INTER*/ + { + { 100, -3},{ 109, 343},{ 107, 668},{ 125, 1018}, + { 169, 1354},{ 241, 1662},{ 353, 1938},{ 496, 2190}, + { 655, 2431},{ 843, 2655},{ 1082, 2851},{ 1381, 3015}, + { 1739, 3146},{ 2154, 3243},{ 2610, 3310},{ 3094, 3344}, + { 3581, 3358},{ 4034, 3371},{ 4457, 3384},{ 4867, 3399}, + { 5255, 3413},{ 5630, 3425},{ 6003, 3440},{ 6346, 3440} + } + }, + { + /*Cr qi=54 INTRA*/ + { + { 23, 7},{ 174, 386},{ 338, 732},{ 549, 1034}, + { 751, 1289},{ 947, 1506},{ 1150, 1685},{ 1353, 1837}, + { 1572, 1969},{ 1800, 2087},{ 2031, 2192},{ 2248, 2291}, + { 2434, 2387},{ 2622, 2477},{ 2815, 2549},{ 2976, 2607}, + { 3126, 2663},{ 3286, 2727},{ 3427, 2807},{ 3569, 2877}, + { 3761, 2941},{ 3942, 3016},{ 4084, 3093},{ 4226, 3131} + }, + /*Cr qi=54 INTER*/ + { + { 88, -2},{ 99, 351},{ 100, 680},{ 125, 1027}, + { 175, 1354},{ 248, 1656},{ 343, 1945},{ 463, 2219}, + { 626, 2463},{ 850, 2668},{ 1128, 2837},{ 1445, 2983}, + { 1791, 3111},{ 2168, 3224},{ 2597, 3309},{ 3075, 3351}, + { 3560, 3364},{ 4029, 3356},{ 4464, 3335},{ 4858, 3307}, + { 5218, 3275},{ 5547, 3256},{ 5850, 3247},{ 6171, 3214} + } + } + }, + { + { + /*Y' qi=55 INTRA*/ + { + { 178, 95},{ 968, 1137},{ 2000, 1747},{ 3013, 2027}, + { 3966, 2173},{ 4920, 2294},{ 5842, 2427},{ 6702, 2553}, + { 7489, 2668},{ 8213, 2773},{ 8875, 2858},{ 9452, 2913}, + { 9986, 2959},{10504, 3016},{11023, 3085},{11530, 3157}, + {12011, 3213},{12480, 3257},{12882, 3291},{13214, 3310}, + {13542, 3325},{13890, 3350},{14248, 3371},{14671, 3398} + }, + /*Y' qi=55 INTER*/ + { + { 59, 5},{ 170, 1307},{ 725, 2358},{ 1886, 3058}, + { 3589, 3385},{ 5284, 3459},{ 6654, 3458},{ 7771, 3461}, + { 8727, 3470},{ 9564, 3478},{10322, 3488},{11019, 3497}, + {11658, 3505},{12258, 3513},{12819, 3520},{13344, 3527}, + {13840, 3533},{14314, 3537},{14755, 3541},{15161, 3544}, + {15552, 3548},{15916, 3548},{16257, 3548},{16576, 3540} + } + }, + { + /*Cb qi=55 INTRA*/ + { + { 13, 2},{ 167, 366},{ 322, 714},{ 508, 1026}, + { 716, 1292},{ 930, 1511},{ 1148, 1690},{ 1366, 1839}, + { 1578, 1972},{ 1793, 2090},{ 2001, 2199},{ 2217, 2300}, + { 2427, 2393},{ 2609, 2495},{ 2784, 2600},{ 2961, 2704}, + { 3121, 2797},{ 3268, 2884},{ 3423, 2965},{ 3590, 3032}, + { 3764, 3096},{ 3926, 3165},{ 4101, 3223},{ 4405, 3258} + }, + /*Cb qi=55 INTER*/ + { + { 90, -4},{ 109, 344},{ 107, 668},{ 126, 1017}, + { 172, 1351},{ 249, 1657},{ 370, 1928},{ 527, 2174}, + { 702, 2407},{ 909, 2624},{ 1170, 2814},{ 1493, 2970}, + { 1869, 3097},{ 2292, 3192},{ 2752, 3258},{ 3232, 3295}, + { 3709, 3314},{ 4156, 3335},{ 4592, 3355},{ 5004, 3373}, + { 5377, 3389},{ 5737, 3411},{ 6092, 3432},{ 6473, 3423} + } + }, + { + /*Cr qi=55 INTRA*/ + { + { 23, 7},{ 175, 385},{ 342, 730},{ 561, 1028}, + { 771, 1279},{ 973, 1493},{ 1181, 1669},{ 1384, 1822}, + { 1602, 1956},{ 1830, 2076},{ 2057, 2184},{ 2270, 2288}, + { 2452, 2389},{ 2637, 2484},{ 2823, 2559},{ 2983, 2621}, + { 3129, 2682},{ 3280, 2753},{ 3417, 2833},{ 3554, 2904}, + { 3743, 2977},{ 3921, 3060},{ 4055, 3137},{ 4185, 3186} + }, + /*Cr qi=55 INTER*/ + { + { 85, 0},{ 99, 352},{ 100, 679},{ 126, 1025}, + { 178, 1351},{ 256, 1650},{ 359, 1935},{ 493, 2202}, + { 675, 2439},{ 921, 2636},{ 1220, 2799},{ 1552, 2941}, + { 1910, 3068},{ 2303, 3177},{ 2735, 3262},{ 3206, 3311}, + { 3689, 3333},{ 4152, 3327},{ 4588, 3299},{ 4978, 3272}, + { 5325, 3243},{ 5651, 3221},{ 5969, 3210},{ 6218, 3185} + } + } + }, + { + { + /*Y' qi=56 INTRA*/ + { + { 137, 104},{ 1048, 1128},{ 2147, 1760},{ 3261, 2029}, + { 4319, 2131},{ 5310, 2234},{ 6245, 2351},{ 7101, 2464}, + { 7886, 2572},{ 8610, 2675},{ 9270, 2762},{ 9840, 2818}, + {10365, 2869},{10875, 2928},{11393, 2997},{11900, 3071}, + {12371, 3128},{12834, 3172},{13233, 3208},{13562, 3228}, + {13878, 3245},{14221, 3271},{14584, 3292},{15008, 3320} + }, + /*Y' qi=56 INTER*/ + { + { 19, 21},{ 207, 1292},{ 1031, 2252},{ 2553, 2846}, + { 4463, 3085},{ 6137, 3131},{ 7441, 3151},{ 8526, 3172}, + { 9468, 3193},{10301, 3209},{11059, 3224},{11760, 3237}, + {12405, 3249},{13008, 3261},{13570, 3270},{14100, 3278}, + {14597, 3284},{15074, 3289},{15524, 3297},{15929, 3302}, + {16314, 3306},{16675, 3307},{17004, 3305},{17288, 3301} + } + }, + { + /*Cb qi=56 INTRA*/ + { + { 16, 3},{ 188, 367},{ 353, 712},{ 546, 1017}, + { 765, 1275},{ 989, 1484},{ 1221, 1653},{ 1459, 1791}, + { 1681, 1920},{ 1893, 2046},{ 2102, 2160},{ 2323, 2257}, + { 2534, 2347},{ 2720, 2447},{ 2902, 2549},{ 3075, 2654}, + { 3239, 2749},{ 3392, 2835},{ 3544, 2920},{ 3712, 2988}, + { 3882, 3052},{ 4052, 3123},{ 4227, 3181},{ 4483, 3213} + }, + /*Cb qi=56 INTER*/ + { + { 92, -1},{ 111, 343},{ 114, 665},{ 148, 1003}, + { 224, 1321},{ 345, 1609},{ 526, 1858},{ 754, 2077}, + { 1009, 2281},{ 1319, 2464},{ 1702, 2614},{ 2145, 2732}, + { 2625, 2824},{ 3123, 2890},{ 3634, 2933},{ 4137, 2954}, + { 4614, 2965},{ 5052, 2988},{ 5468, 3015},{ 5852, 3035}, + { 6213, 3060},{ 6557, 3081},{ 6906, 3094},{ 7243, 3112} + } + }, + { + /*Cr qi=56 INTRA*/ + { + { 28, 8},{ 195, 385},{ 373, 727},{ 598, 1019}, + { 816, 1263},{ 1033, 1465},{ 1260, 1630},{ 1482, 1773}, + { 1717, 1900},{ 1949, 2018},{ 2178, 2128},{ 2393, 2233}, + { 2570, 2338},{ 2749, 2435},{ 2937, 2514},{ 3097, 2577}, + { 3240, 2638},{ 3398, 2709},{ 3540, 2791},{ 3673, 2865}, + { 3869, 2938},{ 4049, 3019},{ 4179, 3095},{ 4330, 3137} + }, + /*Cr qi=56 INTER*/ + { + { 83, 0},{ 99, 353},{ 103, 676},{ 146, 1010}, + { 232, 1320},{ 355, 1601},{ 512, 1866},{ 713, 2109}, + { 988, 2312},{ 1344, 2471},{ 1750, 2602},{ 2180, 2719}, + { 2642, 2819},{ 3141, 2892},{ 3653, 2939},{ 4159, 2961}, + { 4636, 2961},{ 5072, 2945},{ 5464, 2917},{ 5813, 2895}, + { 6134, 2890},{ 6458, 2883},{ 6735, 2881},{ 6953, 2902} + } + } + }, + { + { + /*Y' qi=57 INTRA*/ + { + { 170, 106},{ 1106, 1120},{ 2246, 1740},{ 3399, 1993}, + { 4482, 2077},{ 5492, 2167},{ 6446, 2273},{ 7324, 2379}, + { 8130, 2482},{ 8866, 2578},{ 9537, 2661},{10119, 2715}, + {10646, 2762},{11161, 2820},{11694, 2886},{12214, 2957}, + {12693, 3013},{13166, 3053},{13569, 3087},{13897, 3106}, + {14224, 3122},{14568, 3148},{14931, 3167},{15390, 3192} + }, + /*Y' qi=57 INTER*/ + { + { 19, 20},{ 205, 1292},{ 1096, 2229},{ 2775, 2766}, + { 4811, 2943},{ 6512, 2964},{ 7832, 2976},{ 8940, 2990}, + { 9903, 3004},{10755, 3017},{11532, 3029},{12243, 3039}, + {12891, 3047},{13502, 3058},{14073, 3065},{14603, 3071}, + {15097, 3078},{15581, 3083},{16036, 3086},{16452, 3090}, + {16855, 3093},{17222, 3094},{17552, 3092},{17851, 3098} + } + }, + { + /*Cb qi=57 INTRA*/ + { + { 16, 3},{ 197, 365},{ 384, 704},{ 603, 1001}, + { 837, 1252},{ 1077, 1455},{ 1326, 1618},{ 1581, 1748}, + { 1819, 1871},{ 2042, 1993},{ 2264, 2104},{ 2500, 2196}, + { 2722, 2280},{ 2916, 2375},{ 3103, 2473},{ 3290, 2575}, + { 3456, 2667},{ 3612, 2748},{ 3775, 2829},{ 3958, 2896}, + { 4145, 2947},{ 4307, 3012},{ 4476, 3070},{ 4733, 3110} + }, + /*Cb qi=57 INTER*/ + { + { 94, -1},{ 111, 344},{ 112, 665},{ 147, 1002}, + { 227, 1319},{ 353, 1604},{ 543, 1849},{ 785, 2062}, + { 1066, 2257},{ 1408, 2430},{ 1827, 2568},{ 2320, 2670}, + { 2848, 2743},{ 3386, 2791},{ 3934, 2812},{ 4453, 2820}, + { 4929, 2830},{ 5368, 2842},{ 5787, 2856},{ 6190, 2875}, + { 6554, 2896},{ 6895, 2913},{ 7229, 2927},{ 7572, 2932} + } + }, + { + /*Cr qi=57 INTRA*/ + { + { 28, 8},{ 207, 383},{ 413, 716},{ 661, 999}, + { 889, 1237},{ 1123, 1433},{ 1365, 1592},{ 1603, 1731}, + { 1853, 1852},{ 2103, 1965},{ 2345, 2072},{ 2571, 2173}, + { 2763, 2271},{ 2949, 2364},{ 3146, 2438},{ 3315, 2497}, + { 3459, 2552},{ 3618, 2616},{ 3767, 2697},{ 3906, 2773}, + { 4099, 2841},{ 4281, 2916},{ 4429, 2987},{ 4569, 3030} + }, + /*Cr qi=57 INTER*/ + { + { 85, 0},{ 99, 352},{ 102, 675},{ 147, 1008}, + { 235, 1317},{ 363, 1597},{ 529, 1858},{ 748, 2094}, + { 1050, 2287},{ 1439, 2436},{ 1877, 2557},{ 2352, 2660}, + { 2869, 2740},{ 3413, 2791},{ 3962, 2815},{ 4485, 2819}, + { 4955, 2816},{ 5382, 2800},{ 5769, 2772},{ 6107, 2748}, + { 6443, 2740},{ 6754, 2739},{ 7029, 2737},{ 7284, 2745} + } + } + }, + { + { + /*Y' qi=58 INTRA*/ + { + { 164, 109},{ 1198, 1111},{ 2396, 1737},{ 3606, 1978}, + { 4727, 2048},{ 5749, 2138},{ 6708, 2243},{ 7584, 2347}, + { 8388, 2449},{ 9122, 2549},{ 9784, 2635},{10354, 2691}, + {10876, 2740},{11385, 2800},{11912, 2869},{12429, 2941}, + {12902, 2997},{13375, 3040},{13779, 3075},{14103, 3096}, + {14435, 3112},{14783, 3140},{15141, 3160},{15599, 3186} + }, + /*Y' qi=58 INTER*/ + { + { 14, 23},{ 210, 1290},{ 1277, 2178},{ 3118, 2677}, + { 5207, 2834},{ 6902, 2857},{ 8218, 2878},{ 9323, 2900}, + {10285, 2919},{11132, 2934},{11899, 2949},{12599, 2961}, + {13235, 2971},{13835, 2982},{14394, 2991},{14917, 2997}, + {15412, 3005},{15882, 3009},{16325, 3013},{16735, 3016}, + {17131, 3018},{17501, 3021},{17824, 3021},{18125, 3016} + } + }, + { + /*Cb qi=58 INTRA*/ + { + { 17, 3},{ 200, 365},{ 389, 703},{ 613, 996}, + { 853, 1243},{ 1095, 1445},{ 1349, 1604},{ 1613, 1731}, + { 1853, 1853},{ 2074, 1978},{ 2292, 2091},{ 2526, 2184}, + { 2750, 2266},{ 2945, 2360},{ 3134, 2458},{ 3320, 2561}, + { 3482, 2654},{ 3641, 2737},{ 3804, 2818},{ 3985, 2881}, + { 4168, 2935},{ 4331, 3003},{ 4499, 3060},{ 4751, 3100} + }, + /*Cb qi=58 INTER*/ + { + { 94, -1},{ 112, 345},{ 112, 665},{ 152, 998}, + { 247, 1307},{ 406, 1580},{ 644, 1810},{ 938, 2007}, + { 1271, 2189},{ 1668, 2348},{ 2151, 2470},{ 2691, 2558}, + { 3249, 2619},{ 3798, 2659},{ 4334, 2682},{ 4849, 2692}, + { 5314, 2700},{ 5747, 2721},{ 6167, 2742},{ 6547, 2765}, + { 6902, 2790},{ 7251, 2804},{ 7583, 2819},{ 7924, 2833} + } + }, + { + /*Cr qi=58 INTRA*/ + { + { 29, 8},{ 210, 382},{ 419, 714},{ 671, 993}, + { 903, 1229},{ 1141, 1422},{ 1390, 1578},{ 1635, 1713}, + { 1889, 1833},{ 2140, 1946},{ 2379, 2055},{ 2604, 2157}, + { 2794, 2256},{ 2977, 2349},{ 3174, 2422},{ 3339, 2482}, + { 3483, 2537},{ 3643, 2604},{ 3790, 2684},{ 3927, 2757}, + { 4112, 2826},{ 4294, 2900},{ 4451, 2975},{ 4600, 3011} + }, + /*Cr qi=58 INTER*/ + { + { 86, 0},{ 99, 352},{ 103, 675},{ 151, 1004}, + { 256, 1306},{ 417, 1573},{ 628, 1819},{ 901, 2040}, + { 1262, 2217},{ 1705, 2353},{ 2191, 2466},{ 2713, 2556}, + { 3268, 2622},{ 3831, 2664},{ 4374, 2682},{ 4881, 2686}, + { 5339, 2685},{ 5747, 2668},{ 6123, 2646},{ 6465, 2630}, + { 6783, 2618},{ 7082, 2623},{ 7366, 2632},{ 7673, 2654} + } + } + }, + { + { + /*Y' qi=59 INTRA*/ + { + { 142, 112},{ 1259, 1100},{ 2552, 1711},{ 3815, 1933}, + { 4955, 1987},{ 5983, 2068},{ 6949, 2165},{ 7832, 2263}, + { 8645, 2359},{ 9392, 2454},{10066, 2536},{10643, 2589}, + {11174, 2636},{11696, 2693},{12230, 2758},{12752, 2826}, + {13239, 2883},{13721, 2926},{14139, 2959},{14479, 2978}, + {14811, 2993},{15166, 3020},{15532, 3039},{16000, 3062} + }, + /*Y' qi=59 INTER*/ + { + { 8, 25},{ 211, 1289},{ 1394, 2144},{ 3421, 2580}, + { 5611, 2689},{ 7316, 2701},{ 8643, 2717},{ 9762, 2734}, + {10735, 2750},{11587, 2763},{12353, 2775},{13056, 2785}, + {13693, 2793},{14288, 2805},{14843, 2814},{15361, 2821}, + {15857, 2827},{16328, 2831},{16763, 2834},{17171, 2838}, + {17568, 2840},{17941, 2842},{18285, 2843},{18586, 2839} + } + }, + { + /*Cb qi=59 INTRA*/ + { + { 17, 3},{ 224, 363},{ 441, 696},{ 689, 982}, + { 945, 1222},{ 1204, 1416},{ 1474, 1571},{ 1751, 1695}, + { 2001, 1816},{ 2228, 1941},{ 2453, 2055},{ 2693, 2147}, + { 2924, 2227},{ 3125, 2321},{ 3321, 2416},{ 3510, 2520}, + { 3676, 2616},{ 3839, 2699},{ 4008, 2778},{ 4193, 2842}, + { 4371, 2898},{ 4535, 2965},{ 4710, 3023},{ 4921, 3068} + }, + /*Cb qi=59 INTER*/ + { + { 95, -5},{ 111, 343},{ 112, 664},{ 157, 995}, + { 258, 1302},{ 429, 1569},{ 691, 1790},{ 1017, 1977}, + { 1387, 2148},{ 1832, 2294},{ 2368, 2401},{ 2961, 2472}, + { 3553, 2518},{ 4133, 2545},{ 4688, 2557},{ 5198, 2563}, + { 5663, 2574},{ 6100, 2590},{ 6511, 2608},{ 6898, 2621}, + { 7274, 2634},{ 7631, 2655},{ 7984, 2669},{ 8361, 2669} + } + }, + { + /*Cr qi=59 INTRA*/ + { + { 31, 8},{ 240, 379},{ 480, 706},{ 748, 978}, + { 993, 1208},{ 1250, 1394},{ 1519, 1543},{ 1779, 1674}, + { 2047, 1792},{ 2307, 1904},{ 2552, 2013},{ 2780, 2116}, + { 2973, 2216},{ 3165, 2309},{ 3362, 2383},{ 3528, 2444}, + { 3677, 2499},{ 3841, 2566},{ 3995, 2646},{ 4139, 2720}, + { 4324, 2793},{ 4504, 2867},{ 4658, 2939},{ 4806, 2975} + }, + /*Cr qi=59 INTER*/ + { + { 89, -3},{ 98, 352},{ 103, 674},{ 156, 1002}, + { 268, 1300},{ 441, 1562},{ 673, 1801},{ 980, 2010}, + { 1385, 2175},{ 1868, 2301},{ 2401, 2402},{ 2984, 2474}, + { 3591, 2520},{ 4179, 2545},{ 4729, 2555},{ 5232, 2553}, + { 5679, 2545},{ 6081, 2530},{ 6447, 2510},{ 6791, 2496}, + { 7101, 2487},{ 7393, 2489},{ 7684, 2499},{ 7950, 2501} + } + } + }, + { + { + /*Y' qi=60 INTRA*/ + { + { 92, 116},{ 1361, 1085},{ 2746, 1686},{ 4050, 1895}, + { 5209, 1939},{ 6244, 2012},{ 7213, 2103},{ 8105, 2197}, + { 8928, 2290},{ 9685, 2381},{10371, 2460},{10952, 2511}, + {11487, 2556},{12026, 2611},{12574, 2674},{13102, 2739}, + {13597, 2793},{14092, 2831},{14523, 2862},{14862, 2881}, + {15198, 2897},{15568, 2923},{15949, 2941},{16416, 2964} + }, + /*Y' qi=60 INTER*/ + { + { 4, 30},{ 215, 1287},{ 1547, 2104},{ 3729, 2491}, + { 5973, 2568},{ 7672, 2577},{ 9001, 2591},{10123, 2606}, + {11094, 2620},{11943, 2632},{12709, 2643},{13409, 2652}, + {14044, 2660},{14641, 2669},{15193, 2677},{15709, 2684}, + {16201, 2689},{16675, 2693},{17118, 2696},{17522, 2701}, + {17920, 2704},{18293, 2706},{18620, 2702},{18923, 2700} + } + }, + { + /*Cb qi=60 INTRA*/ + { + { 18, 3},{ 227, 362},{ 447, 694},{ 708, 974}, + { 981, 1207},{ 1252, 1397},{ 1532, 1547},{ 1822, 1663}, + { 2082, 1780},{ 2316, 1903},{ 2548, 2013},{ 2794, 2101}, + { 3029, 2178},{ 3242, 2266},{ 3445, 2360},{ 3638, 2459}, + { 3816, 2547},{ 3980, 2628},{ 4146, 2708},{ 4344, 2766}, + { 4546, 2812},{ 4725, 2872},{ 4880, 2930},{ 5054, 2966} + }, + /*Cb qi=60 INTER*/ + { + { 97, -4},{ 112, 343},{ 114, 664},{ 162, 993}, + { 273, 1294},{ 472, 1553},{ 774, 1762},{ 1138, 1939}, + { 1543, 2102},{ 2034, 2236},{ 2620, 2329},{ 3244, 2389}, + { 3860, 2423},{ 4443, 2440},{ 4997, 2449},{ 5502, 2455}, + { 5962, 2458},{ 6413, 2466},{ 6836, 2485},{ 7217, 2506}, + { 7592, 2518},{ 7957, 2533},{ 8291, 2543},{ 8574, 2545} + } + }, + { + /*Cr qi=60 INTRA*/ + { + { 32, 8},{ 243, 379},{ 488, 702},{ 771, 968}, + { 1030, 1192},{ 1300, 1373},{ 1581, 1517},{ 1854, 1643}, + { 2127, 1757},{ 2393, 1864},{ 2645, 1968},{ 2879, 2068}, + { 3078, 2166},{ 3277, 2256},{ 3484, 2325},{ 3660, 2381}, + { 3808, 2433},{ 3970, 2496},{ 4138, 2571},{ 4288, 2643}, + { 4475, 2710},{ 4655, 2778},{ 4810, 2843},{ 4959, 2879} + }, + /*Cr qi=60 INTER*/ + { + { 86, -2},{ 99, 352},{ 103, 673},{ 160, 998}, + { 284, 1292},{ 484, 1546},{ 753, 1774},{ 1100, 1973}, + { 1546, 2129},{ 2072, 2246},{ 2652, 2334},{ 3279, 2392}, + { 3911, 2425},{ 4504, 2440},{ 5044, 2443},{ 5536, 2440}, + { 5979, 2430},{ 6381, 2413},{ 6735, 2397},{ 7062, 2382}, + { 7383, 2376},{ 7680, 2375},{ 7962, 2373},{ 8203, 2379} + } + } + }, + { + { + /*Y' qi=61 INTRA*/ + { + { 54, 121},{ 1477, 1069},{ 3061, 1638},{ 4465, 1808}, + { 5649, 1827},{ 6710, 1884},{ 7716, 1958},{ 8648, 2037}, + { 9514, 2116},{10311, 2192},{11033, 2261},{11641, 2305}, + {12202, 2342},{12771, 2387},{13356, 2440},{13924, 2493}, + {14444, 2541},{14951, 2576},{15409, 2600},{15779, 2615}, + {16131, 2626},{16521, 2648},{16921, 2663},{17409, 2694} + }, + /*Y' qi=61 INTER*/ + { + { -1, 32},{ 216, 1286},{ 1806, 2036},{ 4279, 2327}, + { 6629, 2352},{ 8347, 2352},{ 9707, 2357},{10860, 2364}, + {11857, 2372},{12726, 2377},{13508, 2382},{14225, 2387}, + {14877, 2392},{15484, 2398},{16048, 2401},{16581, 2405}, + {17092, 2409},{17573, 2409},{18016, 2410},{18427, 2413}, + {18829, 2415},{19221, 2415},{19578, 2415},{19980, 2413} + } + }, + { + /*Cb qi=61 INTRA*/ + { + { 19, 3},{ 231, 362},{ 456, 693},{ 733, 965}, + { 1032, 1188},{ 1330, 1369},{ 1637, 1508},{ 1956, 1612}, + { 2241, 1718},{ 2496, 1832},{ 2750, 1932},{ 3019, 2007}, + { 3274, 2074},{ 3505, 2154},{ 3725, 2236},{ 3943, 2323}, + { 4138, 2403},{ 4323, 2476},{ 4505, 2543},{ 4706, 2592}, + { 4909, 2630},{ 5109, 2675},{ 5292, 2724},{ 5495, 2768} + }, + /*Cb qi=61 INTER*/ + { + { 91, -2},{ 111, 344},{ 114, 663},{ 166, 989}, + { 291, 1285},{ 522, 1534},{ 875, 1729},{ 1302, 1889}, + { 1786, 2031},{ 2368, 2141},{ 3042, 2207},{ 3734, 2243}, + { 4388, 2259},{ 4982, 2264},{ 5533, 2265},{ 6043, 2262}, + { 6524, 2264},{ 6982, 2274},{ 7422, 2283},{ 7831, 2295}, + { 8198, 2308},{ 8593, 2319},{ 8965, 2329},{ 9258, 2340} + } + }, + { + /*Cr qi=61 INTRA*/ + { + { 33, 9},{ 245, 378},{ 497, 699},{ 801, 958}, + { 1087, 1171},{ 1384, 1342},{ 1692, 1474},{ 1992, 1589}, + { 2290, 1692},{ 2576, 1789},{ 2852, 1884},{ 3109, 1973}, + { 3324, 2061},{ 3544, 2142},{ 3763, 2199},{ 3945, 2244}, + { 4103, 2292},{ 4283, 2349},{ 4469, 2413},{ 4635, 2476}, + { 4836, 2534},{ 5038, 2592},{ 5210, 2649},{ 5358, 2682} + }, + /*Cr qi=61 INTER*/ + { + { 82, 0},{ 97, 353},{ 104, 672},{ 165, 995}, + { 303, 1284},{ 532, 1529},{ 852, 1742},{ 1273, 1921}, + { 1798, 2057},{ 2409, 2154},{ 3090, 2212},{ 3794, 2240}, + { 4460, 2251},{ 5057, 2249},{ 5596, 2249},{ 6085, 2245}, + { 6519, 2234},{ 6908, 2220},{ 7269, 2203},{ 7618, 2196}, + { 7949, 2198},{ 8269, 2195},{ 8554, 2196},{ 8928, 2217} + } + } + }, + { + { + /*Y' qi=62 INTRA*/ + { + { 29, 124},{ 1527, 1067},{ 3221, 1618},{ 4703, 1751}, + { 5909, 1744},{ 7001, 1779},{ 8057, 1829},{ 9049, 1885}, + { 9968, 1943},{10813, 1999},{11572, 2050},{12206, 2082}, + {12801, 2107},{13402, 2140},{14020, 2180},{14625, 2223}, + {15179, 2260},{15718, 2288},{16196, 2305},{16581, 2313}, + {16963, 2324},{17382, 2341},{17800, 2351},{18318, 2376} + }, + /*Y' qi=62 INTER*/ + { + { -8, 36},{ 218, 1284},{ 2073, 1965},{ 4814, 2159}, + { 7237, 2138},{ 8979, 2124},{10378, 2115},{11570, 2109}, + {12601, 2106},{13503, 2103},{14320, 2103},{15064, 2103}, + {15746, 2103},{16384, 2104},{16975, 2105},{17534, 2105}, + {18062, 2106},{18564, 2107},{19035, 2106},{19471, 2107}, + {19890, 2107},{20288, 2107},{20651, 2107},{21012, 2108} + } + }, + { + /*Cb qi=62 INTRA*/ + { + { 21, 3},{ 283, 360},{ 565, 683},{ 907, 938}, + { 1269, 1143},{ 1611, 1311},{ 1949, 1441},{ 2290, 1535}, + { 2596, 1632},{ 2877, 1738},{ 3162, 1828},{ 3458, 1893}, + { 3745, 1948},{ 4011, 2016},{ 4253, 2089},{ 4506, 2164}, + { 4734, 2233},{ 4943, 2294},{ 5162, 2353},{ 5381, 2393}, + { 5593, 2420},{ 5807, 2454},{ 6003, 2496},{ 6210, 2543} + }, + /*Cb qi=62 INTER*/ + { + { 91, -1},{ 110, 344},{ 113, 663},{ 169, 987}, + { 306, 1279},{ 562, 1519},{ 961, 1701},{ 1450, 1845}, + { 2013, 1967},{ 2686, 2053},{ 3437, 2095},{ 4171, 2109}, + { 4841, 2109},{ 5441, 2105},{ 6002, 2097},{ 6542, 2089}, + { 7028, 2087},{ 7491, 2088},{ 7949, 2090},{ 8377, 2089}, + { 8789, 2095},{ 9195, 2103},{ 9569, 2104},{ 9937, 2102} + } + }, + { + /*Cr qi=62 INTRA*/ + { + { 38, 8},{ 308, 374},{ 619, 685},{ 984, 925}, + { 1326, 1126},{ 1662, 1285},{ 1999, 1407},{ 2328, 1512}, + { 2659, 1604},{ 2976, 1691},{ 3285, 1774},{ 3570, 1853}, + { 3815, 1931},{ 4068, 1998},{ 4304, 2044},{ 4491, 2082}, + { 4666, 2124},{ 4870, 2174},{ 5078, 2231},{ 5262, 2285}, + { 5480, 2335},{ 5703, 2378},{ 5905, 2423},{ 6075, 2454} + }, + /*Cr qi=62 INTER*/ + { + { 79, 1},{ 95, 353},{ 102, 671},{ 169, 992}, + { 318, 1277},{ 569, 1515},{ 936, 1716},{ 1428, 1876}, + { 2034, 1993},{ 2738, 2067},{ 3511, 2095},{ 4268, 2094}, + { 4943, 2087},{ 5543, 2079},{ 6074, 2074},{ 6552, 2069}, + { 6985, 2057},{ 7366, 2043},{ 7728, 2030},{ 8086, 2021}, + { 8423, 2017},{ 8752, 2016},{ 9057, 2014},{ 9376, 2008} + } + } + }, + { + { + /*Y' qi=63 INTRA*/ + { + { -59, 134},{ 1734, 1036},{ 3743, 1521},{ 5309, 1618}, + { 6520, 1597},{ 7664, 1609},{ 8809, 1630},{ 9894, 1657}, + {10907, 1687},{11838, 1717},{12673, 1744},{13379, 1758}, + {14038, 1767},{14698, 1784},{15379, 1806},{16062, 1831}, + {16694, 1852},{17300, 1867},{17827, 1878},{18250, 1881}, + {18702, 1884},{19199, 1892},{19665, 1896},{20273, 1908} + }, + /*Y' qi=63 INTER*/ + { + { -7, 33},{ 209, 1285},{ 2309, 1904},{ 5274, 2025}, + { 7801, 1966},{ 9637, 1924},{11126, 1892},{12403, 1868}, + {13515, 1849},{14491, 1834},{15380, 1822},{16197, 1814}, + {16944, 1806},{17645, 1799},{18303, 1794},{18916, 1789}, + {19494, 1785},{20056, 1782},{20568, 1779},{21047, 1776}, + {21508, 1775},{21925, 1772},{22327, 1770},{22678, 1771} + } + }, + { + /*Cb qi=63 INTRA*/ + { + { 20, 3},{ 294, 357},{ 608, 673},{ 1047, 908}, + { 1501, 1090},{ 1898, 1240},{ 2275, 1353},{ 2654, 1427}, + { 3014, 1502},{ 3366, 1579},{ 3726, 1637},{ 4084, 1674}, + { 4425, 1703},{ 4752, 1743},{ 5058, 1791},{ 5377, 1838}, + { 5676, 1877},{ 5946, 1912},{ 6213, 1945},{ 6458, 1969}, + { 6704, 1982},{ 6969, 1997},{ 7210, 2017},{ 7439, 2037} + }, + /*Cb qi=63 INTER*/ + { + { 86, 1},{ 108, 345},{ 111, 663},{ 168, 985}, + { 307, 1276},{ 577, 1513},{ 1007, 1688},{ 1550, 1819}, + { 2189, 1921},{ 2938, 1981},{ 3744, 2002},{ 4512, 2002}, + { 5199, 1996},{ 5824, 1986},{ 6419, 1971},{ 6978, 1954}, + { 7507, 1940},{ 8015, 1932},{ 8502, 1928},{ 8978, 1920}, + { 9410, 1915},{ 9842, 1910},{10262, 1901},{10634, 1896} + } + }, + { + /*Cr qi=63 INTRA*/ + { + { 38, 7},{ 324, 367},{ 677, 670},{ 1136, 892}, + { 1562, 1070},{ 1951, 1209},{ 2326, 1313},{ 2694, 1399}, + { 3074, 1471},{ 3460, 1531},{ 3850, 1575},{ 4214, 1622}, + { 4522, 1679},{ 4819, 1723},{ 5089, 1749},{ 5315, 1769}, + { 5530, 1792},{ 5756, 1825},{ 6006, 1860},{ 6244, 1889}, + { 6514, 1924},{ 6792, 1946},{ 7026, 1962},{ 7191, 1971} + }, + /*Cr qi=63 INTER*/ + { + { 80, 2},{ 95, 354},{ 101, 671},{ 167, 990}, + { 321, 1274},{ 585, 1509},{ 984, 1702},{ 1534, 1849}, + { 2217, 1947},{ 3005, 1995},{ 3839, 1999},{ 4619, 1986}, + { 5310, 1973},{ 5933, 1961},{ 6486, 1952},{ 6988, 1942}, + { 7435, 1927},{ 7817, 1911},{ 8198, 1900},{ 8552, 1895}, + { 8881, 1890},{ 9253, 1883},{ 9598, 1876},{ 9923, 1859} + } + } + } +}; + +#endif diff --git a/thirdparty/libtheora/ocintrin.h b/thirdparty/libtheora/ocintrin.h new file mode 100644 index 0000000000..d49ebb2159 --- /dev/null +++ b/thirdparty/libtheora/ocintrin.h @@ -0,0 +1,128 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: ocintrin.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*Some common macros for potential platform-specific optimization.*/ +#include +#if !defined(_ocintrin_H) +# define _ocintrin_H (1) + +/*Some specific platforms may have optimized intrinsic or inline assembly + versions of these functions which can substantially improve performance. + We define macros for them to allow easy incorporation of these non-ANSI + features.*/ + +/*Note that we do not provide a macro for abs(), because it is provided as a + library function, which we assume is translated into an intrinsic to avoid + the function call overhead and then implemented in the smartest way for the + target platform. + With modern gcc (4.x), this is true: it uses cmov instructions if the + architecture supports it and branchless bit-twiddling if it does not (the + speed difference between the two approaches is not measurable). + Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150) + by Sun Microsystems, despite prior art dating back to at least 1996: + http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT + On gcc 3.x, however, our assumption is not true, as abs() is translated to a + conditional jump, which is horrible on deeply piplined architectures (e.g., + all consumer architectures for the past decade or more). + Also be warned that -C*abs(x) where C is a constant is mis-optimized as + abs(C*x) on every gcc release before 4.2.3. + See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */ + +/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if + given an appropriate architecture, but the branchless bit-twiddling versions + are just as fast, and do not require any special target architecture. + Earlier gcc versions (3.x) compiled both code to the same assembly + instructions, because of the way they represented ((_b)>(_a)) internally.*/ +#define OC_MAXI(_a,_b) ((_a)-((_a)-(_b)&-((_b)>(_a)))) +#define OC_MINI(_a,_b) ((_a)+((_b)-(_a)&-((_b)<(_a)))) +/*Clamps an integer into the given range. + If _a>_c, then the lower bound _a is respected over the upper bound _c (this + behavior is required to meet our documented API behavior). + _a: The lower bound. + _b: The value to clamp. + _c: The upper boud.*/ +#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) +#define OC_CLAMP255(_x) ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255)))) +/*This has a chance of compiling branchless, and is just as fast as the + bit-twiddling method, which is slightly less portable, since it relies on a + sign-extended rightshift, which is not guaranteed by ANSI (but present on + every relevant platform).*/ +#define OC_SIGNI(_a) (((_a)>0)-((_a)<0)) +/*Slightly more portable than relying on a sign-extended right-shift (which is + not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both) + compile it into the right-shift anyway.*/ +#define OC_SIGNMASK(_a) (-((_a)<0)) +/*Divides an integer by a power of two, truncating towards 0. + _dividend: The integer to divide. + _shift: The non-negative power of two to divide by. + _rmask: (1<<_shift)-1*/ +#define OC_DIV_POW2(_dividend,_shift,_rmask)\ + ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift)) +/*Divides _x by 65536, truncating towards 0.*/ +#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF) +/*Divides _x by 2, truncating towards 0.*/ +#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1) +/*Divides _x by 8, truncating towards 0.*/ +#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7) +/*Divides _x by 16, truncating towards 0.*/ +#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF) +/*Right shifts _dividend by _shift, adding _rval, and subtracting one for + negative dividends first. + When _rval is (1<<_shift-1), this is equivalent to division with rounding + ties away from zero.*/ +#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\ + ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift)) +/*Divides a _x by 2, rounding towards even numbers.*/ +#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1) +/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/ +#define OC_DIV_POW2_RE(_x,_shift) \ + ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift)) +/*Swaps two integers _a and _b if _a>_b.*/ +#define OC_SORT2I(_a,_b) \ + do{ \ + int t__; \ + t__=((_a)^(_b))&-((_b)<(_a)); \ + (_a)^=t__; \ + (_b)^=t__; \ + } \ + while(0) + +/*Accesses one of four (signed) bytes given an index. + This can be used to avoid small lookup tables.*/ +#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \ + ((signed char) \ + (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8)) +/*Accesses one of eight (unsigned) nibbles given an index. + This can be used to avoid small lookup tables.*/ +#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \ + ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \ + ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF) + + + +/*All of these macros should expect floats as arguments.*/ +#define OC_MAXF(_a,_b) ((_a)<(_b)?(_b):(_a)) +#define OC_MINF(_a,_b) ((_a)>(_b)?(_b):(_a)) +#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c))) +#define OC_FABSF(_f) ((float)fabs(_f)) +#define OC_SQRTF(_f) ((float)sqrt(_f)) +#define OC_POWF(_b,_e) ((float)pow(_b,_e)) +#define OC_LOGF(_f) ((float)log(_f)) +#define OC_IFLOORF(_f) ((int)floor(_f)) +#define OC_ICEILF(_f) ((int)ceil(_f)) + +#endif diff --git a/thirdparty/libtheora/quant.c b/thirdparty/libtheora/quant.c new file mode 100644 index 0000000000..8359f5abea --- /dev/null +++ b/thirdparty/libtheora/quant.c @@ -0,0 +1,119 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: quant.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include +#include "quant.h" +#include "decint.h" + +static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; +static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; + +/*Initializes the dequantization tables from a set of quantizer info. + Currently the dequantizer (and elsewhere enquantizer) tables are expected to + be initialized as pointing to the storage reserved for them in the + oc_theora_state (resp. oc_enc_ctx) structure. + If some tables are duplicates of others, the pointers will be adjusted to + point to a single copy of the tables, but the storage for them will not be + freed. + If you're concerned about the memory footprint, the obvious thing to do is + to move the storage out of its fixed place in the structures and allocate + it on demand. + However, a much, much better option is to only store the quantization + matrices being used for the current frame, and to recalculate these as the + qi values change between frames (this is what VP3 did).*/ +void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], + int _pp_dc_scale[64],const th_quant_info *_qinfo){ + /*Coding mode: intra or inter.*/ + int qti; + /*Y', C_b, C_r*/ + int pli; + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ + /*Quality index.*/ + int qi; + /*Range iterator.*/ + int qri; + for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){ + th_quant_base base; + ogg_uint32_t q; + int qi_start; + int qi_end; + memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], + sizeof(base)); + qi_start=qi; + if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; + else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; + /*Iterate over quality indicies in this range.*/ + for(;;){ + ogg_uint32_t qfac; + int zzi; + int ci; + /*In the original VP3.2 code, the rounding offset and the size of the + dead zone around 0 were controlled by a "sharpness" parameter. + The size of our dead zone is now controlled by the per-coefficient + quality thresholds returned by our HVS module. + We round down from a more accurate value when the quality of the + reconstruction does not fall below our threshold and it saves bits. + Hence, all of that VP3.2 code is gone from here, and the remaining + floating point code has been implemented as equivalent integer code + with exact precision.*/ + qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; + /*For postprocessing, not dequantization.*/ + if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); + /*Scale DC the coefficient from the proper table.*/ + q=(qfac/100)<<2; + q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); + _dequant[qi][pli][qti][0]=(ogg_uint16_t)q; + /*Now scale AC coefficients from the proper table.*/ + for(zzi=1;zzi<64;zzi++){ + q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2; + q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); + _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q; + } + /*If this is a duplicate of a previous matrix, use that instead. + This simple check helps us improve cache coherency later.*/ + { + int dupe; + int qtj; + int plj; + dupe=0; + for(qtj=0;qtj<=qti;qtj++){ + for(plj=0;plj<(qtj=qi_end)break; + /*Interpolate the next base matrix.*/ + for(ci=0;ci<64;ci++){ + base[ci]=(unsigned char)( + (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ + (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) + +_qinfo->qi_ranges[qti][pli].sizes[qri])/ + (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); + } + } + } + } +} diff --git a/thirdparty/libtheora/quant.h b/thirdparty/libtheora/quant.h new file mode 100644 index 0000000000..49ce13a65c --- /dev/null +++ b/thirdparty/libtheora/quant.h @@ -0,0 +1,33 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: quant.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_quant_H) +# define _quant_H (1) +# include "theora/codec.h" +# include "ocintrin.h" + +typedef ogg_uint16_t oc_quant_table[64]; + + +/*Maximum scaled quantizer value.*/ +#define OC_QUANT_MAX (1024<<2) + + +void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], + int _pp_dc_scale[64],const th_quant_info *_qinfo); + +#endif diff --git a/thirdparty/libtheora/rate.c b/thirdparty/libtheora/rate.c new file mode 100644 index 0000000000..4f43bb2e5f --- /dev/null +++ b/thirdparty/libtheora/rate.c @@ -0,0 +1,1137 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: rate.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + +/*A rough lookup table for tan(x), 0<=x>24; + if(i>=17)i=16; + t0=OC_ROUGH_TAN_LOOKUP[i]; + t1=OC_ROUGH_TAN_LOOKUP[i+1]; + d=_alpha*36-(i<<24); + return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32); +} + +/*Re-initialize the Bessel filter coefficients with the specified delay. + This does not alter the x/y state, but changes the reaction time of the + filter. + Altering the time constant of a reactive filter without alterning internal + state is something that has to be done carefuly, but our design operates at + high enough delays and with small enough time constant changes to make it + safe.*/ +static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){ + int alpha; + ogg_int64_t one48; + ogg_int64_t warp; + ogg_int64_t k1; + ogg_int64_t k2; + ogg_int64_t d; + ogg_int64_t a; + ogg_int64_t ik2; + ogg_int64_t b1; + ogg_int64_t b2; + /*This borrows some code from an unreleased version of Postfish. + See the recipe at http://unicorn.us.com/alex/2polefilters.html for details + on deriving the filter coefficients.*/ + /*alpha is Q24*/ + alpha=(1<<24)/_delay; + one48=(ogg_int64_t)1<<48; + /*warp is 7.12*/ + warp=OC_MAXI(oc_warp_alpha(alpha),1); + /*k1 is 9.12*/ + k1=3*warp; + /*k2 is 16.24.*/ + k2=k1*warp; + /*d is 16.15.*/ + d=((1<<12)+k1<<12)+k2+256>>9; + /*a is 0.32, since d is larger than both 1.0 and k2.*/ + a=(k2<<23)/d; + /*ik2 is 25.24.*/ + ik2=one48/k2; + /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/ + b1=2*a*(ik2-(1<<24)); + /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/ + b2=(one48<<8)-(4*a<<24)-b1; + /*All of the filter parameters are Q24.*/ + _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32); + _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32); + _f->g=(ogg_int32_t)(a+128>>8); +} + +/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay + and initial value. + _value is Q24.*/ +static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){ + oc_iir_filter_reinit(_f,_delay); + _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value; +} + +static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){ + ogg_int64_t c0; + ogg_int64_t c1; + ogg_int64_t g; + ogg_int64_t x0; + ogg_int64_t x1; + ogg_int64_t y0; + ogg_int64_t y1; + ogg_int64_t ya; + c0=_f->c[0]; + c1=_f->c[1]; + g=_f->g; + x0=_f->x[0]; + x1=_f->x[1]; + y0=_f->y[0]; + y1=_f->y[1]; + ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24; + _f->x[1]=(ogg_int32_t)x0; + _f->x[0]=_x; + _f->y[1]=(ogg_int32_t)y0; + _f->y[0]=(ogg_int32_t)ya; + return ya; +} + + + +/*Search for the quantizer that matches the target most closely. + We don't assume a linear ordering, but when there are ties we pick the + quantizer closest to the old one.*/ +static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old, + int _qi_min,ogg_int64_t _log_qtarget){ + ogg_int64_t best_qdiff; + int best_qi; + int qi; + best_qi=_qi_min; + best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget; + best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff); + for(qi=_qi_min+1;qi<64;qi++){ + ogg_int64_t qdiff; + qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget; + qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff); + if(qdiffstate.qis[0]; + /*If rate control is active, use the lambda for the _target_ quantizer. + This allows us to scale to rates slightly lower than we'd normally be able + to reach, and give the rate control a semblance of "fractional qi" + precision. + TODO: Add API for changing QI, and allow extra precision.*/ + if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget; + else lq=_enc->log_qavg[_qti][qi]; + /*The resulting lambda value is less than 0x500000.*/ + _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL); + /*Select additional quantizers. + The R-D optimal block AC quantizer statistics suggest that the distribution + is roughly Gaussian-like with a slight positive skew. + K-means clustering on log_qavg to select 3 quantizers produces cluster + centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}. + Experiments confirm these are relatively good choices. + + Although we do greedy R-D optimization of the qii flags to avoid switching + too frequently, this becomes ineffective at low rates, either because we + do a poor job of predicting the actual R-D cost, or the greedy + optimization is not sufficient. + Therefore adaptive quantization is disabled above an (experimentally + suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or + INTER qi=20 with current matrices). + This may need to be revised if the R-D cost estimation or qii flag + optimization strategies change.*/ + nqis=1; + if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){ + qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0, + lq+(OC_Q57(7)+5)/10); + if(qi1!=qi)_enc->state.qis[nqis++]=qi1; + qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0, + lq-(OC_Q57(6)+5)/10); + if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1; + } + _enc->state.nqis=nqis; +} + +/*Binary exponential of _log_scale with 24-bit fractional precision and + saturation. + _log_scale: A binary logarithm in Q24 format. + Return: The binary exponential in Q24 format, saturated to 2**47-1 if + _log_scale was too large.*/ +static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){ + if(_log_scale<(ogg_int32_t)23<<24){ + ogg_int64_t ret; + ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24)); + return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL; + } + return 0x7FFFFFFFFFFFLL; +} + +/*Convenience function converts Q57 value to a clamped 32-bit Q24 value + _in: input in Q57 format. + Return: same number in Q24 */ +static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){ + ogg_int64_t ret; + ret=_in+((ogg_int64_t)1<<32)>>33; + /*0x80000000 is automatically converted to unsigned on 32-bit systems. + -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to + unsigned.*/ + return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF); +} + +/*Binary exponential of _log_scale with 24-bit fractional precision and + saturation. + _log_scale: A binary logarithm in Q57 format. + Return: The binary exponential in Q24 format, saturated to 2**31-1 if + _log_scale was too large.*/ +static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){ + if(_log_scalerc.bits_per_frame=(_enc->state.info.target_bitrate* + (ogg_int64_t)_enc->state.info.fps_denominator)/ + _enc->state.info.fps_numerator; + /*Insane framerates or frame sizes mean insane bitrates. + Let's not get carried away.*/ + if(_enc->rc.bits_per_frame>0x400000000000LL){ + _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; + } + else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; + _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); + _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; + /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend + on a single keyframe interval. + We can require fully half the bits in an interval for a keyframe, so this + initial level gives us maximum flexibility for over/under-shooting in + subsequent frames.*/ + _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* + OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); + _enc->rc.fullness=_enc->rc.target; + /*Pick exponents and initial scales for quantizer selection.*/ + npixels=_enc->state.info.frame_width* + (ogg_int64_t)_enc->state.info.frame_height; + _enc->rc.log_npixels=oc_blog64(npixels); + ibpp=npixels/_enc->rc.bits_per_frame; + if(ibpp<1){ + _enc->rc.exp[0]=59; + _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8); + } + else if(ibpp<2){ + _enc->rc.exp[0]=55; + _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8); + } + else{ + _enc->rc.exp[0]=48; + _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8); + } + if(ibpp<4){ + _enc->rc.exp[1]=100; + _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8); + } + else if(ibpp<8){ + _enc->rc.exp[1]=95; + _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8); + } + else{ + _enc->rc.exp[1]=73; + _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8); + } + _enc->rc.prev_drop_count=0; + _enc->rc.log_drop_scale=OC_Q57(0); + /*Set up second order followers, initialized according to corresponding + time constants.*/ + oc_iir_filter_init(&_enc->rc.scalefilter[0],4, + oc_q57_to_q24(_enc->rc.log_scale[0])); + inter_delay=(_enc->rc.twopass? + OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1; + _enc->rc.inter_count=0; + /*We clamp the actual inter_delay to a minimum of 10 to work within the range + of values where later incrementing the delay works as designed. + 10 is not an exact choice, but rather a good working trade-off.*/ + _enc->rc.inter_delay=10; + _enc->rc.inter_delay_target=inter_delay; + oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay, + oc_q57_to_q24(_enc->rc.log_scale[1])); + oc_iir_filter_init(&_enc->rc.vfrfilter,4, + oc_bexp64_q24(_enc->rc.log_drop_scale)); +} + +void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){ + _rc->twopass=0; + _rc->twopass_buffer_bytes=0; + _rc->twopass_force_kf=0; + _rc->frame_metrics=NULL; + _rc->rate_bias=0; + if(_enc->state.info.target_bitrate>0){ + /*The buffer size is set equal to the keyframe interval, clamped to the + range [12,256] frames. + The 12 frame minimum gives us some chance to distribute bit estimation + errors. + The 256 frame maximum means we'll require 8-10 seconds of pre-buffering + at 24-30 fps, which is not unreasonable.*/ + _rc->buf_delay=_enc->keyframe_frequency_force>256? + 256:_enc->keyframe_frequency_force; + /*By default, enforce all buffer constraints.*/ + _rc->drop_frames=1; + _rc->cap_overflow=1; + _rc->cap_underflow=0; + oc_enc_rc_reset(_enc); + } +} + +void oc_rc_state_clear(oc_rc_state *_rc){ + _ogg_free(_rc->frame_metrics); +} + +void oc_enc_rc_resize(oc_enc_ctx *_enc){ + /*If encoding has not yet begun, reset the buffer state.*/ + if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc); + else{ + int idt; + /*Otherwise, update the bounds on the buffer, but not the current + fullness.*/ + _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate* + (ogg_int64_t)_enc->state.info.fps_denominator)/ + _enc->state.info.fps_numerator; + /*Insane framerates or frame sizes mean insane bitrates. + Let's not get carried away.*/ + if(_enc->rc.bits_per_frame>0x400000000000LL){ + _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL; + } + else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32; + _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12); + _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay; + _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)* + OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay); + /*Update the INTER-frame scale filter delay. + We jump to it immediately if we've already seen enough frames; otherwise + it is simply set as the new target.*/ + _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10); + if(idtrc.inter_delay,_enc->rc.inter_count)){ + oc_iir_filter_init(&_enc->rc.scalefilter[1],idt, + _enc->rc.scalefilter[1].y[0]); + _enc->rc.inter_delay=idt; + } + } + /*If we're in pass-2 mode, make sure the frame metrics array is big enough + to hold frame statistics for the full buffer.*/ + if(_enc->rc.twopass==2){ + int cfm; + int buf_delay; + int reset_window; + buf_delay=_enc->rc.buf_delay; + reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0|| + buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]); + cfm=_enc->rc.cframe_metrics; + /*Only try to resize the frame metrics buffer if a) it's too small and + b) we were using a finite buffer, or are about to start.*/ + if(cfmrc.frame_metrics!=NULL||reset_window)){ + oc_frame_metrics *fm; + int nfm; + int fmh; + fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics, + buf_delay*sizeof(*_enc->rc.frame_metrics)); + if(fm==NULL){ + /*We failed to allocate a finite buffer.*/ + /*If we don't have a valid 2-pass header yet, just return; we'll reset + the buffer size when we read the header.*/ + if(_enc->rc.frames_total[0]==0)return; + /*Otherwise revert to the largest finite buffer previously set, or to + whole-file buffering if we were still using that.*/ + _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL? + cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]; + oc_enc_rc_resize(_enc); + return; + } + _enc->rc.frame_metrics=fm; + _enc->rc.cframe_metrics=buf_delay; + /*Re-organize the circular buffer.*/ + fmh=_enc->rc.frame_metrics_head; + nfm=_enc->rc.nframe_metrics; + if(fmh+nfm>cfm){ + int shift; + shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm); + memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm)); + if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay); + } + } + /*We were using whole-file buffering; now we're not.*/ + if(reset_window){ + _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0; + _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; + _enc->rc.scale_window_end=_enc->rc.scale_window0= + _enc->state.curframe_num+_enc->prev_dup_count+1; + if(_enc->rc.twopass_buffer_bytes){ + int qti; + /*We already read the metrics for the first frame in the window.*/ + *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics; + _enc->rc.nframe_metrics++; + qti=_enc->rc.cur_metrics.frame_type; + _enc->rc.nframes[qti]++; + _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count; + _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); + _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1; + if(_enc->rc.scale_window_end-_enc->rc.scale_window0rc.twopass_buffer_bytes=0; + } + } + } + /*Otherwise, we could shrink the size of the current window, if necessary, + but leaving it like it is lets us adapt to the new buffer size more + gracefully.*/ + } +} + +/*Scale the number of frames by the number of expected drops/duplicates.*/ +static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){ + if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){ + ogg_int64_t dup_scale; + dup_scale=oc_bexp64((_rc->log_drop_scale + +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8)); + if(dup_scale<_nframes<<8){ + int dup_scalei; + dup_scalei=(int)dup_scale; + if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei; + } + else _nframes=!!_nframes; + } + return _nframes; +} + +int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){ + ogg_int64_t rate_total; + ogg_int64_t rate_bias; + int nframes[2]; + int buf_delay; + int buf_pad; + ogg_int64_t log_qtarget; + ogg_int64_t log_scale0; + ogg_int64_t log_cur_scale; + ogg_int64_t log_qexp; + int exp0; + int old_qi; + int qi; + /*Figure out how to re-distribute bits so that we hit our fullness target + before the last keyframe in our current buffer window (after the current + frame), or the end of the buffer window, whichever comes first.*/ + log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33; + buf_pad=0; + switch(_enc->rc.twopass){ + default:{ + ogg_uint32_t next_key_frame; + /*Single pass mode: assume only forced keyframes and attempt to estimate + the drop count for VFR content.*/ + next_key_frame=_qti?_enc->keyframe_frequency_force + -(_enc->state.curframe_num-_enc->state.keyframe_num):0; + nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay) + +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force; + if(nframes[0]+_qti>1){ + nframes[0]--; + buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force; + } + else buf_delay=_enc->rc.buf_delay; + nframes[1]=buf_delay-nframes[0]; + /*Downgrade the delta frame rate to correspond to the recent drop count + history.*/ + nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]); + }break; + case 1:{ + /*Pass 1 mode: use a fixed qi value.*/ + qi=_enc->state.qis[0]; + _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi]; + return qi; + }break; + case 2:{ + ogg_int64_t scale_sum[2]; + int qti; + /*Pass 2 mode: we know exactly how much of each frame type there is in + the current buffer window, and have estimates for the scales.*/ + nframes[0]=_enc->rc.nframes[0]; + nframes[1]=_enc->rc.nframes[1]; + scale_sum[0]=_enc->rc.scale_sum[0]; + scale_sum[1]=_enc->rc.scale_sum[1]; + /*The window size can be slightly larger than the buffer window for VFR + content; clamp it down, if appropriate (the excess will all be dup + frames).*/ + buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0, + _enc->rc.buf_delay); + /*If we're approaching the end of the file, add some slack to keep us + from slamming into a rail. + Our rate accuracy goes down, but it keeps the result sensible. + We position the target where the first forced keyframe beyond the end + of the file would be (for consistency with 1-pass mode).*/ + buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num + +_enc->keyframe_frequency_force-_enc->rc.scale_window0); + if(buf_delayrc.frame_metrics!=NULL){ + int fmi; + int fm_tail; + fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics; + if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics; + for(fmi=fm_tail;;){ + oc_frame_metrics *m; + fmi--; + if(fmi<0)fmi+=_enc->rc.cframe_metrics; + /*Stop before we remove the first frame.*/ + if(fmi==_enc->rc.frame_metrics_head)break; + m=_enc->rc.frame_metrics+fmi; + /*If we find a keyframe, remove it and everything past it.*/ + if(m->frame_type==OC_INTRA_FRAME){ + do{ + qti=m->frame_type; + nframes[qti]--; + scale_sum[qti]-=oc_bexp_q24(m->log_scale); + buf_delay-=m->dup_count+1; + fmi++; + if(fmi>=_enc->rc.cframe_metrics)fmi=0; + m=_enc->rc.frame_metrics+fmi; + } + while(fmi!=fm_tail); + /*And stop scanning backwards.*/ + break; + } + } + } + } + /*If we're not using the same frame type as in pass 1 (because someone + changed the keyframe interval), remove that scale estimate. + We'll add in a replacement for the correct frame type below.*/ + qti=_enc->rc.cur_metrics.frame_type; + if(qti!=_qti){ + nframes[qti]--; + scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); + } + /*Compute log_scale estimates for each frame type from the pass-1 scales + we measured in the current window.*/ + for(qti=0;qti<2;qti++){ + _enc->rc.log_scale[qti]=nframes[qti]>0? + oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24): + -_enc->rc.log_npixels; + } + /*If we're not using the same frame type as in pass 1, add a scale + estimate for the corresponding frame using the current low-pass + filter value. + This is mostly to ensure we have a valid estimate even when pass 1 had + no frames of this type in the buffer window. + TODO: We could also plan ahead and figure out how many keyframes we'll + be forced to add in the current buffer window.*/ + qti=_enc->rc.cur_metrics.frame_type; + if(qti!=_qti){ + ogg_int64_t scale; + scale=_enc->rc.log_scale[_qti]rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL; + scale*=nframes[_qti]; + nframes[_qti]++; + scale+=oc_bexp_q24(log_cur_scale>>33); + _enc->rc.log_scale[_qti]=oc_blog64(scale) + -oc_blog64(nframes[qti])-OC_Q57(24); + } + else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33; + /*Add the padding from above. + This basically reverts to 1-pass estimations in the last keyframe + interval.*/ + if(buf_pad>0){ + ogg_int64_t scale; + int nextra_frames; + /*Extend the buffer.*/ + buf_delay+=buf_pad; + /*Add virtual delta frames according to the estimated drop count.*/ + nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad); + /*And blend in the low-pass filtered scale according to how many frames + we added.*/ + scale= + oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1] + +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames; + nframes[1]+=nextra_frames; + _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24); + } + }break; + } + /*If we've been missing our target, add a penalty term.*/ + rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))* + (buf_delay-buf_pad); + /*rate_total is the total bits available over the next buf_delay frames.*/ + rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias + +buf_delay*_enc->rc.bits_per_frame; + log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels; + /*If there aren't enough bits to achieve our desired fullness level, use the + minimum quality permitted.*/ + if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG; + else{ + static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL; + ogg_int64_t log_scale1; + ogg_int64_t rlo; + ogg_int64_t rhi; + log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels; + rlo=0; + rhi=(rate_total+nframes[_qti]-1)/nframes[_qti]; + while(rlo>1; + log_rpow=oc_blog64(curr)-log_scale0; + log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti]; + if(_qti)log_rpow+=LOG_KEY_RATIO>>6; + else log_rpow-=LOG_KEY_RATIO>>6; + log_rpow*=_enc->rc.exp[1-_qti]; + rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow); + rdiff=nframes[_qti]*curr+rscale-rate_total; + if(rdiff<0)rlo=curr+1; + else if(rdiff>0)rhi=curr-1; + else break; + } + log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/ + _enc->rc.exp[_qti]<<6); + log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); + } + /*The above allocation looks only at the total rate we'll accumulate in the + next buf_delay frames. + However, we could overflow the buffer on the very next frame, so check for + that here, if we're not using a soft target.*/ + exp0=_enc->rc.exp[_qti]; + if(_enc->rc.cap_overflow){ + ogg_int64_t margin; + ogg_int64_t soft_limit; + ogg_int64_t log_soft_limit; + /*Allow 3% of the buffer for prediction error. + This should be plenty, and we don't mind if we go a bit over; we only + want to keep these bits from being completely wasted.*/ + margin=_enc->rc.max+31>>5; + /*We want to use at least this many bits next frame.*/ + soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin); + log_soft_limit=oc_blog64(soft_limit); + /*If we're predicting we won't use that many...*/ + log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; + if(log_scale0-log_qexp>32)* + ((OC_MINI(margin,soft_limit)<<32)/margin); + log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); + } + } + /*If this was not one of the initial frames, limit the change in quality.*/ + old_qi=_enc->state.qis[0]; + if(_clamp){ + ogg_int64_t log_qmin; + ogg_int64_t log_qmax; + /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the + current quantizer. + TODO: With user-specified quant matrices, we need to enlarge these limits + if they don't actually let us change qi values.*/ + log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL; + log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL; + log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax); + } + /*The above allocation looks only at the total rate we'll accumulate in the + next buf_delay frames. + However, we could bust the budget on the very next frame, so check for that + here, if we're not using a soft target.*/ + /* Disabled when our minimum qi > 0; if we saturate log_qtarget to + to the maximum possible size when we have a minimum qi, the + resulting lambda will interact very strangely with SKIP. The + resulting artifacts look like waterfalls. */ + if(_enc->state.info.quality==0){ + ogg_int64_t log_hard_limit; + /*Compute the maximum number of bits we can use in the next frame. + Allow 50% of the rate for a single frame for prediction error. + This may not be enough for keyframes or sudden changes in complexity.*/ + log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1)); + /*If we're predicting we'll use more than this...*/ + log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0; + if(log_scale0-log_qexp>log_hard_limit){ + /*Force the target to hit our limit exactly.*/ + log_qexp=log_scale0-log_hard_limit; + log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2); + /*If that target is unreasonable, oh well; we'll have to drop.*/ + log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG); + } + } + /*Compute a final estimate of the number of bits we plan to use.*/ + log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti]; + _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp); + qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi, + _enc->state.info.quality,log_qtarget); + /*Save the quantizer target for lambda calculations.*/ + _enc->rc.log_qtarget=log_qtarget; + return qi; +} + +int oc_enc_update_rc_state(oc_enc_ctx *_enc, + long _bits,int _qti,int _qi,int _trial,int _droppable){ + ogg_int64_t buf_delta; + ogg_int64_t log_scale; + int dropped; + dropped=0; + /* Drop frames also disabled for now in the case of infinite-buffer + two-pass mode */ + if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){ + _droppable=0; + } + buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count); + if(_bits<=0){ + /*We didn't code any blocks in this frame.*/ + log_scale=OC_Q57(-64); + _bits=0; + } + else{ + ogg_int64_t log_bits; + ogg_int64_t log_qexp; + /*Compute the estimated scale factor for this frame type.*/ + log_bits=oc_blog64(_bits); + log_qexp=_enc->rc.log_qtarget-OC_Q57(2); + log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]); + log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16)); + } + /*Special two-pass processing.*/ + switch(_enc->rc.twopass){ + case 1:{ + /*Pass 1 mode: save the metrics for this frame.*/ + _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale); + _enc->rc.cur_metrics.dup_count=_enc->dup_count; + _enc->rc.cur_metrics.frame_type=_enc->state.frame_type; + _enc->rc.twopass_buffer_bytes=0; + }break; + case 2:{ + /*Pass 2 mode:*/ + if(!_trial){ + ogg_int64_t next_frame_num; + int qti; + /*Move the current metrics back one frame.*/ + *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics; + next_frame_num=_enc->state.curframe_num+_enc->dup_count+1; + /*Back out the last frame's statistics from the sliding window.*/ + qti=_enc->rc.prev_metrics.frame_type; + _enc->rc.frames_left[qti]--; + _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count; + _enc->rc.nframes[qti]--; + _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count; + _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale); + _enc->rc.scale_window0=(int)next_frame_num; + /*Free the corresponding entry in the circular buffer.*/ + if(_enc->rc.frame_metrics!=NULL){ + _enc->rc.nframe_metrics--; + _enc->rc.frame_metrics_head++; + if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){ + _enc->rc.frame_metrics_head=0; + } + } + /*Mark us ready for the next 2-pass packet.*/ + _enc->rc.twopass_buffer_bytes=0; + /*Update state, so the user doesn't have to keep calling 2pass_in after + they've fed in all the data when we're using a finite buffer.*/ + _enc->prev_dup_count=_enc->dup_count; + oc_enc_rc_2pass_in(_enc,NULL,0); + } + }break; + } + /*Common to all passes:*/ + if(_bits>0){ + if(_trial){ + oc_iir_filter *f; + /*Use the estimated scale factor directly if this was a trial.*/ + f=_enc->rc.scalefilter+_qti; + f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale); + _enc->rc.log_scale[_qti]=log_scale; + } + else{ + /*Lengthen the time constant for the INTER filter as we collect more + frame statistics, until we reach our target.*/ + if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&& + _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){ + oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay); + } + /*Otherwise update the low-pass scale filter for this frame type, + regardless of whether or not we dropped this frame.*/ + _enc->rc.log_scale[_qti]=oc_iir_filter_update( + _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33; + /*If this frame busts our budget, it must be dropped.*/ + if(_droppable&&_enc->rc.fullness+buf_delta<_bits){ + _enc->rc.prev_drop_count+=1+_enc->dup_count; + _bits=0; + dropped=1; + } + else{ + ogg_uint32_t drop_count; + /*Update a low-pass filter to estimate the "real" frame rate taking + drops and duplicates into account. + This is only done if the frame is coded, as it needs the final + count of dropped frames.*/ + drop_count=_enc->rc.prev_drop_count+1; + if(drop_count>0x7F)drop_count=0x7FFFFFFF; + else drop_count<<=24; + _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update( + &_enc->rc.vfrfilter,drop_count))-OC_Q57(24); + /*Initialize the drop count for this frame to the user-requested dup + count. + It will be increased if we drop more frames.*/ + _enc->rc.prev_drop_count=_enc->dup_count; + } + } + /*Increment the INTER frame count, for filter adaptation purposes.*/ + if(_enc->rc.inter_countrc.inter_count+=_qti; + } + /*Increase the drop count.*/ + else _enc->rc.prev_drop_count+=1+_enc->dup_count; + /*And update the buffer fullness level.*/ + if(!_trial){ + _enc->rc.fullness+=buf_delta-_bits; + /*If we're too quick filling the buffer and overflow is capped, + that rate is lost forever.*/ + if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){ + _enc->rc.fullness=_enc->rc.max; + } + /*If we're too quick draining the buffer and underflow is capped, + don't try to make up that rate later.*/ + if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){ + _enc->rc.fullness=0; + } + /*Adjust the bias for the real bits we've used.*/ + _enc->rc.rate_bias-=_bits; + } + return dropped; +} + +#define OC_RC_2PASS_VERSION (1) +#define OC_RC_2PASS_HDR_SZ (38) +#define OC_RC_2PASS_PACKET_SZ (8) + +static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){ + while(_bytes-->0){ + _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF); + _val>>=8; + } +} + +int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){ + if(_enc->rc.twopass_buffer_bytes==0){ + if(_enc->rc.twopass==0){ + int qi; + /*Pick first-pass qi for scale calculations.*/ + qi=oc_enc_select_qi(_enc,0,0); + _enc->state.nqis=1; + _enc->state.qis[0]=qi; + _enc->rc.twopass=1; + _enc->rc.frames_total[0]=_enc->rc.frames_total[1]= + _enc->rc.frames_total[2]=0; + _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0; + /*Fill in dummy summary values.*/ + oc_rc_buffer_val(&_enc->rc,0x5032544F,4); + oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); + oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8); + } + else{ + int qti; + qti=_enc->rc.cur_metrics.frame_type; + _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale); + _enc->rc.frames_total[qti]++; + _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count; + oc_rc_buffer_val(&_enc->rc, + _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4); + } + } + else if(_enc->packet_state==OC_PACKET_DONE&& + _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){ + _enc->rc.twopass_buffer_bytes=0; + oc_rc_buffer_val(&_enc->rc,0x5032544F,4); + oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4); + oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1); + oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1); + oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8); + oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8); + } + else{ + /*The data for this frame has already been retrieved.*/ + *_buf=NULL; + return 0; + } + *_buf=_enc->rc.twopass_buffer; + return _enc->rc.twopass_buffer_bytes; +} + +static size_t oc_rc_buffer_fill(oc_rc_state *_rc, + unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){ + while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){ + _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++]; + } + return _consumed; +} + +static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){ + ogg_int64_t ret; + int shift; + ret=0; + shift=0; + while(_bytes-->0){ + ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<rc.twopass==0){ + _enc->rc.twopass=2; + _enc->rc.twopass_buffer_fill=0; + _enc->rc.frames_total[0]=0; + _enc->rc.nframe_metrics=0; + _enc->rc.cframe_metrics=0; + _enc->rc.frame_metrics_head=0; + _enc->rc.scale_window0=0; + _enc->rc.scale_window_end=0; + } + /*If we haven't got a valid summary header yet, try to parse one.*/ + if(_enc->rc.frames_total[0]==0){ + if(!_buf){ + int frames_needed; + /*If we're using a whole-file buffer, we just need the first frame. + Otherwise, we may need as many as one per buffer slot.*/ + frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay; + return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ + -_enc->rc.twopass_buffer_fill; + } + consumed=oc_rc_buffer_fill(&_enc->rc, + _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ); + if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){ + ogg_int64_t scale_sum[2]; + int exp[2]; + int buf_delay; + /*Read the summary header data.*/ + /*Check the magic value and version number.*/ + if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F|| + oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){ + _enc->rc.twopass_buffer_bytes=0; + return TH_ENOTFORMAT; + } + _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); + _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); + _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4); + exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1); + exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1); + scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8); + scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8); + /*Make sure the file claims to have at least one frame. + Otherwise we probably got the placeholder data from an aborted pass 1. + Also make sure the total frame count doesn't overflow an integer.*/ + buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]; + if(_enc->rc.frames_total[0]==0||buf_delay<0|| + (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]|| + (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){ + _enc->rc.frames_total[0]=0; + _enc->rc.twopass_buffer_bytes=0; + return TH_EBADHEADER; + } + /*Got a valid header; set up pass 2.*/ + _enc->rc.frames_left[0]=_enc->rc.frames_total[0]; + _enc->rc.frames_left[1]=_enc->rc.frames_total[1]; + _enc->rc.frames_left[2]=_enc->rc.frames_total[2]; + /*If the user hasn't specified a buffer size, use the whole file.*/ + if(_enc->rc.frame_metrics==NULL){ + _enc->rc.buf_delay=buf_delay; + _enc->rc.nframes[0]=_enc->rc.frames_total[0]; + _enc->rc.nframes[1]=_enc->rc.frames_total[1]; + _enc->rc.nframes[2]=_enc->rc.frames_total[2]; + _enc->rc.scale_sum[0]=scale_sum[0]; + _enc->rc.scale_sum[1]=scale_sum[1]; + _enc->rc.scale_window_end=buf_delay; + oc_enc_rc_reset(_enc); + } + _enc->rc.exp[0]=exp[0]; + _enc->rc.exp[1]=exp[1]; + /*Clear the header data from the buffer to make room for packet data.*/ + _enc->rc.twopass_buffer_fill=0; + _enc->rc.twopass_buffer_bytes=0; + } + } + if(_enc->rc.frames_total[0]!=0){ + ogg_int64_t curframe_num; + int nframes_total; + curframe_num=_enc->state.curframe_num; + if(curframe_num>=0){ + /*We just encoded a frame; make sure things matched.*/ + if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){ + _enc->rc.twopass_buffer_bytes=0; + return TH_EINVAL; + } + } + curframe_num+=_enc->prev_dup_count+1; + nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1] + +_enc->rc.frames_total[2]; + if(curframe_num>=nframes_total){ + /*We don't want any more data after the last frame, and we don't want to + allow any more frames to be encoded.*/ + _enc->rc.twopass_buffer_bytes=0; + } + else if(_enc->rc.twopass_buffer_bytes==0){ + if(_enc->rc.frame_metrics==NULL){ + /*We're using a whole-file buffer:*/ + if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill; + consumed=oc_rc_buffer_fill(&_enc->rc, + _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); + if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ + ogg_uint32_t dup_count; + ogg_int32_t log_scale; + int qti; + int arg; + /*Read the metrics for the next frame.*/ + dup_count=oc_rc_unbuffer_val(&_enc->rc,4); + log_scale=oc_rc_unbuffer_val(&_enc->rc,4); + _enc->rc.cur_metrics.log_scale=log_scale; + qti=(dup_count&0x80000000)>>31; + _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF; + _enc->rc.cur_metrics.frame_type=qti; + _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME; + /*"Helpfully" set the dup count back to what it was in pass 1.*/ + arg=_enc->rc.cur_metrics.dup_count; + th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); + /*Clear the buffer for the next frame.*/ + _enc->rc.twopass_buffer_fill=0; + } + } + else{ + int frames_needed; + /*We're using a finite buffer:*/ + frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay + -(_enc->rc.scale_window_end-_enc->rc.scale_window0), + _enc->rc.frames_left[0]+_enc->rc.frames_left[1] + -_enc->rc.nframes[0]-_enc->rc.nframes[1]); + while(frames_needed>0){ + if(!_buf){ + return OC_RC_2PASS_PACKET_SZ*frames_needed + -_enc->rc.twopass_buffer_fill; + } + consumed=oc_rc_buffer_fill(&_enc->rc, + _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ); + if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){ + oc_frame_metrics *m; + int fmi; + ogg_uint32_t dup_count; + ogg_int32_t log_scale; + int qti; + /*Read the metrics for the next frame.*/ + dup_count=oc_rc_unbuffer_val(&_enc->rc,4); + log_scale=oc_rc_unbuffer_val(&_enc->rc,4); + /*Add the to the circular buffer.*/ + fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++; + if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics; + m=_enc->rc.frame_metrics+fmi; + m->log_scale=log_scale; + qti=(dup_count&0x80000000)>>31; + m->dup_count=dup_count&0x7FFFFFFF; + m->frame_type=qti; + /*And accumulate the statistics over the window.*/ + _enc->rc.nframes[qti]++; + _enc->rc.nframes[2]+=m->dup_count; + _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale); + _enc->rc.scale_window_end+=m->dup_count+1; + /*Compute an upper bound on the number of remaining packets needed + for the current window.*/ + frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay + -(_enc->rc.scale_window_end-_enc->rc.scale_window0), + _enc->rc.frames_left[0]+_enc->rc.frames_left[1] + -_enc->rc.nframes[0]-_enc->rc.nframes[1]); + /*Clear the buffer for the next frame.*/ + _enc->rc.twopass_buffer_fill=0; + _enc->rc.twopass_buffer_bytes=0; + } + /*Go back for more data.*/ + else break; + } + /*If we've got all the frames we need, fill in the current metrics. + We're ready to go.*/ + if(frames_needed<=0){ + int arg; + *&_enc->rc.cur_metrics= + *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head); + _enc->rc.twopass_force_kf= + _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME; + /*"Helpfully" set the dup count back to what it was in pass 1.*/ + arg=_enc->rc.cur_metrics.dup_count; + th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg)); + /*Mark us ready for the next frame.*/ + _enc->rc.twopass_buffer_bytes=1; + } + } + } + } + return (int)consumed; +} diff --git a/thirdparty/libtheora/state.c b/thirdparty/libtheora/state.c new file mode 100644 index 0000000000..42ed33a9a3 --- /dev/null +++ b/thirdparty/libtheora/state.c @@ -0,0 +1,1227 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: state.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include +#include +#include "internal.h" +#if defined(OC_X86_ASM) +#if defined(_MSC_VER) +# include "x86_vc/x86int.h" +#else +# include "x86/x86int.h" +#endif +#endif +#if defined(OC_DUMP_IMAGES) +# include +# include "png.h" +#endif + +/*Returns the fragment index of the top-left block in a macro block. + This can be used to test whether or not the whole macro block is valid. + _sb_map: The super block map. + _quadi: The quadrant number. + Return: The index of the fragment of the upper left block in the macro + block, or -1 if the block lies outside the coded frame.*/ +static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ + /*It so happens that under the Hilbert curve ordering described below, the + upper-left block in each macro block is at index 0, except in macro block + 3, where it is at index 2.*/ + return _sb_map[_quadi][_quadi&_quadi<<1]; +} + +/*Fills in the mapping from block positions to fragment numbers for a single + color plane. + This function also fills in the "valid" flag of each quadrant in the super + block flags. + _sb_maps: The array of super block maps for the color plane. + _sb_flags: The array of super block flags for the color plane. + _frag0: The index of the first fragment in the plane. + _hfrags: The number of horizontal fragments in a coded frame. + _vfrags: The number of vertical fragments in a coded frame.*/ +static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], + oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ + /*Contains the (macro_block,block) indices for a 4x4 grid of + fragments. + The pattern is a 4x4 Hilbert space-filling curve. + A Hilbert curve has the nice property that as the curve grows larger, its + fractal dimension approaches 2. + The intuition is that nearby blocks in the curve are also close spatially, + with the previous element always an immediate neighbor, so that runs of + blocks should be well correlated.*/ + static const int SB_MAP[4][4][2]={ + {{0,0},{0,1},{3,2},{3,3}}, + {{0,3},{0,2},{3,1},{3,0}}, + {{1,0},{1,3},{2,0},{2,3}}, + {{1,1},{1,2},{2,1},{2,2}} + }; + ptrdiff_t yfrag; + unsigned sbi; + int y; + sbi=0; + yfrag=_frag0; + for(y=0;;y+=4){ + int imax; + int x; + /*Figure out how many columns of blocks in this super block lie within the + image.*/ + imax=_vfrags-y; + if(imax>4)imax=4; + else if(imax<=0)break; + for(x=0;;x+=4,sbi++){ + ptrdiff_t xfrag; + int jmax; + int quadi; + int i; + /*Figure out how many rows of blocks in this super block lie within the + image.*/ + jmax=_hfrags-x; + if(jmax>4)jmax=4; + else if(jmax<=0)break; + /*By default, set all fragment indices to -1.*/ + memset(_sb_maps[sbi][0],0xFF,sizeof(_sb_maps[sbi])); + /*Fill in the fragment map for this super block.*/ + xfrag=yfrag+x; + for(i=0;i=0)<nhfrags+_xfrag0+j; + } +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with chroma decimated in the X and Y directions + (4:2:0). + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + _xfrag0>>=1; + _yfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + _mb_map[1][0]=fragi+_fplanes[1].froffset; + _mb_map[2][0]=fragi+_fplanes[2].froffset; +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with chroma decimated in the Y direction. + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + int j; + _yfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + for(j=0;j<2;j++){ + _mb_map[1][j]=fragi+_fplanes[1].froffset; + _mb_map[2][j]=fragi+_fplanes[2].froffset; + fragi++; + } +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with chroma decimated in the X direction (4:2:2). + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ + ptrdiff_t fragi; + int i; + _xfrag0>>=1; + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; + for(i=0;i<2;i++){ + _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; + _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; + fragi+=_fplanes[1].nhfrags; + } +} + +/*Fills in the chroma plane fragment maps for a macro block. + This version is for use with no chroma decimation (4:4:4). + This uses the already filled-in luma plane values. + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes.*/ +static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3]){ + int k; + for(k=0;k<4;k++){ + _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; + _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; + } +} + +/*The function type used to fill in the chroma plane fragment maps for a + macro block. + _mb_map: The macro block map to fill. + _fplanes: The descriptions of the fragment planes. + _xfrag0: The X location of the upper-left hand fragment in the luma plane. + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ +typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); + +/*A table of functions used to fill in the chroma plane fragment maps for a + macro block for each type of chrominance decimation.*/ +static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ + oc_mb_fill_cmapping00, + oc_mb_fill_cmapping01, + oc_mb_fill_cmapping10, + (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11 +}; + +/*Fills in the mapping from macro blocks to their corresponding fragment + numbers in each plane. + _mb_maps: The list of macro block maps. + _mb_modes: The list of macro block modes; macro blocks completely outside + the coded region are marked invalid. + _fplanes: The descriptions of the fragment planes. + _pixel_fmt: The chroma decimation type.*/ +static void oc_mb_create_mapping(oc_mb_map _mb_maps[], + signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ + oc_mb_fill_cmapping_func mb_fill_cmapping; + unsigned sbi; + int y; + mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; + /*Loop through the luma plane super blocks.*/ + for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ + int x; + for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ + int ymb; + /*Loop through the macro blocks in each super block in display order.*/ + for(ymb=0;ymb<2;ymb++){ + int xmb; + for(xmb=0;xmb<2;xmb++){ + unsigned mbi; + int mbx; + int mby; + mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; + mbx=x|xmb<<1; + mby=y|ymb<<1; + /*Initialize fragment indices to -1.*/ + memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); + /*Make sure this macro block is within the encoded region.*/ + if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ + _mb_modes[mbi]=OC_MODE_INVALID; + continue; + } + /*Fill in the fragment indices for the luma plane.*/ + oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); + /*Fill in the fragment indices for the chroma planes.*/ + (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); + } + } + } + } +} + +/*Marks the fragments which fall all or partially outside the displayable + region of the frame. + _state: The Theora state containing the fragments to be marked.*/ +static void oc_state_border_init(oc_theora_state *_state){ + oc_fragment *frag; + oc_fragment *yfrag_end; + oc_fragment *xfrag_end; + oc_fragment_plane *fplane; + int crop_x0; + int crop_y0; + int crop_xf; + int crop_yf; + int pli; + int y; + int x; + /*The method we use here is slow, but the code is dead simple and handles + all the special cases easily. + We only ever need to do it once.*/ + /*Loop through the fragments, marking those completely outside the + displayable region and constructing a border mask for those that straddle + the border.*/ + _state->nborders=0; + yfrag_end=frag=_state->frags; + for(pli=0;pli<3;pli++){ + fplane=_state->fplanes+pli; + /*Set up the cropping rectangle for this plane.*/ + crop_x0=_state->info.pic_x; + crop_xf=_state->info.pic_x+_state->info.pic_width; + crop_y0=_state->info.pic_y; + crop_yf=_state->info.pic_y+_state->info.pic_height; + if(pli>0){ + if(!(_state->info.pixel_fmt&1)){ + crop_x0=crop_x0>>1; + crop_xf=crop_xf+1>>1; + } + if(!(_state->info.pixel_fmt&2)){ + crop_y0=crop_y0>>1; + crop_yf=crop_yf+1>>1; + } + } + y=0; + for(yfrag_end+=fplane->nfrags;fragnhfrags;frag=crop_xf||crop_y0>=crop_yf){ + frag->invalid=1; + } + /*Otherwise, check to see if it straddles the border.*/ + else if(x=crop_x0&&x+j=crop_y0&&y+i=_state->nborders){ + _state->nborders++; + _state->borders[i].mask=mask; + _state->borders[i].npixels=npixels; + } + else if(_state->borders[i].mask!=mask)continue; + frag->borderi=i; + break; + } + } + else frag->borderi=-1; + } + } + } +} + +static int oc_state_frarray_init(oc_theora_state *_state){ + int yhfrags; + int yvfrags; + int chfrags; + int cvfrags; + ptrdiff_t yfrags; + ptrdiff_t cfrags; + ptrdiff_t nfrags; + unsigned yhsbs; + unsigned yvsbs; + unsigned chsbs; + unsigned cvsbs; + unsigned ysbs; + unsigned csbs; + unsigned nsbs; + size_t nmbs; + int hdec; + int vdec; + int pli; + /*Figure out the number of fragments in each plane.*/ + /*These parameters have already been validated to be multiples of 16.*/ + yhfrags=_state->info.frame_width>>3; + yvfrags=_state->info.frame_height>>3; + hdec=!(_state->info.pixel_fmt&1); + vdec=!(_state->info.pixel_fmt&2); + chfrags=yhfrags+hdec>>hdec; + cvfrags=yvfrags+vdec>>vdec; + yfrags=yhfrags*(ptrdiff_t)yvfrags; + cfrags=chfrags*(ptrdiff_t)cvfrags; + nfrags=yfrags+2*cfrags; + /*Figure out the number of super blocks in each plane.*/ + yhsbs=yhfrags+3>>2; + yvsbs=yvfrags+3>>2; + chsbs=chfrags+3>>2; + cvsbs=cvfrags+3>>2; + ysbs=yhsbs*yvsbs; + csbs=chsbs*cvsbs; + nsbs=ysbs+2*csbs; + nmbs=(size_t)ysbs<<2; + /*Check for overflow. + We support the ridiculous upper limits of the specification (1048560 by + 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, + but for those with 32-bit pointers (or smaller!) we have to check. + If the caller wants to prevent denial-of-service by imposing a more + reasonable upper limit on the size of attempted allocations, they must do + so themselves; we have no platform independent way to determine how much + system memory there is nor an application-independent way to decide what a + "reasonable" allocation is.*/ + if(yfrags/yhfrags!=yvfrags||2*cfrags>2!=ysbs){ + return TH_EIMPL; + } + /*Initialize the fragment array.*/ + _state->fplanes[0].nhfrags=yhfrags; + _state->fplanes[0].nvfrags=yvfrags; + _state->fplanes[0].froffset=0; + _state->fplanes[0].nfrags=yfrags; + _state->fplanes[0].nhsbs=yhsbs; + _state->fplanes[0].nvsbs=yvsbs; + _state->fplanes[0].sboffset=0; + _state->fplanes[0].nsbs=ysbs; + _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; + _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; + _state->fplanes[1].froffset=yfrags; + _state->fplanes[2].froffset=yfrags+cfrags; + _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; + _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; + _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; + _state->fplanes[1].sboffset=ysbs; + _state->fplanes[2].sboffset=ysbs+csbs; + _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; + _state->nfrags=nfrags; + _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); + _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); + _state->nsbs=nsbs; + _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); + _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); + _state->nhmbs=yhsbs<<1; + _state->nvmbs=yvsbs<<1; + _state->nmbs=nmbs; + _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); + _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); + _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); + if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| + _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| + _state->coded_fragis==NULL){ + return TH_EFAULT; + } + /*Create the mapping from super blocks to fragments.*/ + for(pli=0;pli<3;pli++){ + oc_fragment_plane *fplane; + fplane=_state->fplanes+pli; + oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, + _state->sb_flags+fplane->sboffset,fplane->froffset, + fplane->nhfrags,fplane->nvfrags); + } + /*Create the mapping from macro blocks to fragments.*/ + oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, + _state->fplanes,_state->info.pixel_fmt); + /*Initialize the invalid and borderi fields of each fragment.*/ + oc_state_border_init(_state); + return 0; +} + +static void oc_state_frarray_clear(oc_theora_state *_state){ + _ogg_free(_state->coded_fragis); + _ogg_free(_state->mb_modes); + _ogg_free(_state->mb_maps); + _ogg_free(_state->sb_flags); + _ogg_free(_state->sb_maps); + _ogg_free(_state->frag_mvs); + _ogg_free(_state->frags); +} + + +/*Initializes the buffers used for reconstructed frames. + These buffers are padded with 16 extra pixels on each side, to allow + unrestricted motion vectors without special casing the boundary. + If chroma is decimated in either direction, the padding is reduced by a + factor of 2 on the appropriate sides. + _nrefs: The number of reference buffers to init; must be 3 or 4.*/ +static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ + th_info *info; + unsigned char *ref_frame_data; + size_t ref_frame_data_sz; + size_t ref_frame_sz; + size_t yplane_sz; + size_t cplane_sz; + int yhstride; + int yheight; + int chstride; + int cheight; + ptrdiff_t yoffset; + ptrdiff_t coffset; + ptrdiff_t *frag_buf_offs; + ptrdiff_t fragi; + int hdec; + int vdec; + int rfi; + int pli; + if(_nrefs<3||_nrefs>4)return TH_EINVAL; + info=&_state->info; + /*Compute the image buffer parameters for each plane.*/ + hdec=!(info->pixel_fmt&1); + vdec=!(info->pixel_fmt&2); + yhstride=info->frame_width+2*OC_UMV_PADDING; + yheight=info->frame_height+2*OC_UMV_PADDING; + chstride=yhstride>>hdec; + cheight=yheight>>vdec; + yplane_sz=yhstride*(size_t)yheight; + cplane_sz=chstride*(size_t)cheight; + yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; + coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; + ref_frame_sz=yplane_sz+2*cplane_sz; + ref_frame_data_sz=_nrefs*ref_frame_sz; + /*Check for overflow. + The same caveats apply as for oc_state_frarray_init().*/ + if(yplane_sz/yhstride!=yheight||2*cplane_szfrag_buf_offs= + _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); + if(ref_frame_data==NULL||frag_buf_offs==NULL){ + _ogg_free(frag_buf_offs); + _ogg_free(ref_frame_data); + return TH_EFAULT; + } + /*Set up the width, height and stride for the image buffers.*/ + _state->ref_frame_bufs[0][0].width=info->frame_width; + _state->ref_frame_bufs[0][0].height=info->frame_height; + _state->ref_frame_bufs[0][0].stride=yhstride; + _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= + info->frame_width>>hdec; + _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= + info->frame_height>>vdec; + _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= + chstride; + for(rfi=1;rfi<_nrefs;rfi++){ + memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], + sizeof(_state->ref_frame_bufs[0])); + } + /*Set up the data pointers for the image buffers.*/ + for(rfi=0;rfi<_nrefs;rfi++){ + _state->ref_frame_data[rfi]=ref_frame_data; + _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; + ref_frame_data+=yplane_sz; + _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; + ref_frame_data+=cplane_sz; + _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; + ref_frame_data+=cplane_sz; + /*Flip the buffer upside down. + This allows us to decode Theora's bottom-up frames in their natural + order, yet return a top-down buffer with a positive stride to the user.*/ + oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], + _state->ref_frame_bufs[rfi]); + } + _state->ref_ystride[0]=-yhstride; + _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; + /*Initialize the fragment buffer offsets.*/ + ref_frame_data=_state->ref_frame_data[0]; + fragi=0; + for(pli=0;pli<3;pli++){ + th_img_plane *iplane; + oc_fragment_plane *fplane; + unsigned char *vpix; + ptrdiff_t stride; + ptrdiff_t vfragi_end; + int nhfrags; + iplane=_state->ref_frame_bufs[0]+pli; + fplane=_state->fplanes+pli; + vpix=iplane->data; + vfragi_end=fplane->froffset+fplane->nfrags; + nhfrags=fplane->nhfrags; + stride=iplane->stride; + while(fragiref_frame_idx[OC_FRAME_GOLD]= + _state->ref_frame_idx[OC_FRAME_PREV]= + _state->ref_frame_idx[OC_FRAME_SELF]=-1; + _state->ref_frame_idx[OC_FRAME_IO]=_nrefs>3?3:-1; + return 0; +} + +static void oc_state_ref_bufs_clear(oc_theora_state *_state){ + _ogg_free(_state->frag_buf_offs); + _ogg_free(_state->ref_frame_data[0]); +} + + +void oc_state_vtable_init_c(oc_theora_state *_state){ + _state->opt_vtable.frag_copy=oc_frag_copy_c; + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; + _state->opt_vtable.idct8x8=oc_idct8x8_c; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_c; + _state->opt_vtable.restore_fpu=oc_restore_fpu_c; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; +} + +/*Initialize the accelerated function pointers.*/ +void oc_state_vtable_init(oc_theora_state *_state){ +#if defined(OC_X86_ASM) + oc_state_vtable_init_x86(_state); +#else + oc_state_vtable_init_c(_state); +#endif +} + + +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ + int ret; + /*First validate the parameters.*/ + if(_info==NULL)return TH_EFAULT; + /*The width and height of the encoded frame must be multiples of 16. + They must also, when divided by 16, fit into a 16-bit unsigned integer. + The displayable frame offset coordinates must fit into an 8-bit unsigned + integer. + Note that the offset Y in the API is specified on the opposite side from + how it is specified in the bitstream, because the Y axis is flipped in + the bitstream. + The displayable frame must fit inside the encoded frame. + The color space must be one known by the encoder.*/ + if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| + _info->frame_width<=0||_info->frame_width>=0x100000|| + _info->frame_height<=0||_info->frame_height>=0x100000|| + _info->pic_x+_info->pic_width>_info->frame_width|| + _info->pic_y+_info->pic_height>_info->frame_height|| + _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| + /*Note: the following <0 comparisons may generate spurious warnings on + platforms where enums are unsigned. + We could cast them to unsigned and just use the following >= comparison, + but there are a number of compilers which will mis-optimize this. + It's better to live with the spurious warnings.*/ + _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| + _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ + return TH_EINVAL; + } + memset(_state,0,sizeof(*_state)); + memcpy(&_state->info,_info,sizeof(*_info)); + /*Invert the sense of pic_y to match Theora's right-handed coordinate + system.*/ + _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; + _state->frame_type=OC_UNKWN_FRAME; + oc_state_vtable_init(_state); + ret=oc_state_frarray_init(_state); + if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); + if(ret<0){ + oc_state_frarray_clear(_state); + return ret; + } + /*If the keyframe_granule_shift is out of range, use the maximum allowable + value.*/ + if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ + _state->info.keyframe_granule_shift=31; + } + _state->keyframe_num=0; + _state->curframe_num=-1; + /*3.2.0 streams mark the frame index instead of the frame count. + This was changed with stream version 3.2.1 to conform to other Ogg + codecs. + We add an extra bias when computing granule positions for new streams.*/ + _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); + return 0; +} + +void oc_state_clear(oc_theora_state *_state){ + oc_state_ref_bufs_clear(_state); + oc_state_frarray_clear(_state); +} + + +/*Duplicates the pixels on the border of the image plane out into the + surrounding padding for use by unrestricted motion vectors. + This function only adds the left and right borders, and only for the fragment + rows specified. + _refi: The index of the reference buffer to pad. + _pli: The color plane. + _y0: The Y coordinate of the first row to pad. + _yend: The Y coordinate of the row to stop padding at.*/ +void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, + int _y0,int _yend){ + th_img_plane *iplane; + unsigned char *apix; + unsigned char *bpix; + unsigned char *epix; + int stride; + int hpadding; + hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); + iplane=_state->ref_frame_bufs[_refi]+_pli; + stride=iplane->stride; + apix=iplane->data+_y0*(ptrdiff_t)stride; + bpix=apix+iplane->width-1; + epix=iplane->data+_yend*(ptrdiff_t)stride; + /*Note the use of != instead of <, which allows the stride to be negative.*/ + while(apix!=epix){ + memset(apix-hpadding,apix[0],hpadding); + memset(bpix+1,bpix[0],hpadding); + apix+=stride; + bpix+=stride; + } +} + +/*Duplicates the pixels on the border of the image plane out into the + surrounding padding for use by unrestricted motion vectors. + This function only adds the top and bottom borders, and must be called after + the left and right borders are added. + _refi: The index of the reference buffer to pad. + _pli: The color plane.*/ +void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ + th_img_plane *iplane; + unsigned char *apix; + unsigned char *bpix; + unsigned char *epix; + int stride; + int hpadding; + int vpadding; + int fullw; + hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); + vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); + iplane=_state->ref_frame_bufs[_refi]+_pli; + stride=iplane->stride; + fullw=iplane->width+(hpadding<<1); + apix=iplane->data-hpadding; + bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; + epix=apix-stride*(ptrdiff_t)vpadding; + while(apix!=epix){ + memcpy(apix-stride,apix,fullw); + memcpy(bpix+stride,bpix,fullw); + apix-=stride; + bpix+=stride; + } +} + +/*Duplicates the pixels on the border of the given reference image out into + the surrounding padding for use by unrestricted motion vectors. + _state: The context containing the reference buffers. + _refi: The index of the reference buffer to pad.*/ +void oc_state_borders_fill(oc_theora_state *_state,int _refi){ + int pli; + for(pli=0;pli<3;pli++){ + oc_state_borders_fill_rows(_state,_refi,pli,0, + _state->ref_frame_bufs[_refi][pli].height); + oc_state_borders_fill_caps(_state,_refi,pli); + } +} + +/*Determines the offsets in an image buffer to use for motion compensation. + _state: The Theora state the offsets are to be computed with. + _offsets: Returns the offset for the buffer(s). + _offsets[0] is always set. + _offsets[1] is set if the motion vector has non-zero fractional + components. + _pli: The color plane index. + _dx: The X component of the motion vector. + _dy: The Y component of the motion vector. + Return: The number of offsets returned: 1 or 2.*/ +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], + int _pli,int _dx,int _dy){ + /*Here is a brief description of how Theora handles motion vectors: + Motion vector components are specified to half-pixel accuracy in + undecimated directions of each plane, and quarter-pixel accuracy in + decimated directions. + Integer parts are extracted by dividing (not shifting) by the + appropriate amount, with truncation towards zero. + These integer values are used to calculate the first offset. + + If either of the fractional parts are non-zero, then a second offset is + computed. + No third or fourth offsets are computed, even if both components have + non-zero fractional parts. + The second offset is computed by dividing (not shifting) by the + appropriate amount, always truncating _away_ from zero.*/ +#if 0 + /*This version of the code doesn't use any tables, but is slower.*/ + int ystride; + int xprec; + int yprec; + int xfrac; + int yfrac; + int offs; + ystride=_state->ref_ystride[_pli]; + /*These two variables decide whether we are in half- or quarter-pixel + precision in each component.*/ + xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); + yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); + /*These two variables are either 0 if all the fractional bits are zero or -1 + if any of them are non-zero.*/ + xfrac=OC_SIGNMASK(-(_dx&(xprec|1))); + yfrac=OC_SIGNMASK(-(_dy&(yprec|1))); + offs=(_dx>>xprec)+(_dy>>yprec)*ystride; + if(xfrac||yfrac){ + int xmask; + int ymask; + xmask=OC_SIGNMASK(_dx); + ymask=OC_SIGNMASK(_dy); + yfrac&=ystride; + _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); + _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); + return 2; + } + else{ + _offsets[0]=offs; + return 1; + } +#else + /*Using tables simplifies the code, and there's enough arithmetic to hide the + latencies of the memory references.*/ + static const signed char OC_MVMAP[2][64]={ + { + -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, + -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 + }, + { + -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, + -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 + } + }; + static const signed char OC_MVMAP2[2][64]={ + { + -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, + 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 + }, + { + -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, + 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 + } + }; + int ystride; + int qpx; + int qpy; + int mx; + int my; + int mx2; + int my2; + int offs; + ystride=_state->ref_ystride[_pli]; + qpy=_pli!=0&&!(_state->info.pixel_fmt&2); + my=OC_MVMAP[qpy][_dy+31]; + my2=OC_MVMAP2[qpy][_dy+31]; + qpx=_pli!=0&&!(_state->info.pixel_fmt&1); + mx=OC_MVMAP[qpx][_dx+31]; + mx2=OC_MVMAP2[qpx][_dx+31]; + offs=my*ystride+mx; + if(mx2||my2){ + _offsets[1]=offs+my2*ystride+mx2; + _offsets[0]=offs; + return 2; + } + _offsets[0]=offs; + return 1; +#endif +} + +void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + _state->opt_vtable.state_frag_recon(_state,_fragi,_pli,_dct_coeffs, + _last_zzi,_dc_quant); +} + +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + ogg_int16_t p; + int ci; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*LOOP VECTORIZES.*/ + for(ci=0;ci<64;ci++)_dct_coeffs[ci]=p; + } + else{ + /*First, dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8(_state,_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2(_state, + dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs); + } + else oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + _state->opt_vtable.state_frag_copy_list(_state,_fragis,_nfragis,_dst_frame, + _src_frame,_pli); +} + +void oc_state_frag_copy_list_c(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; + oc_frag_copy(_state,dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); + } +} + +static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){ + int y; + _pix-=2; + for(y=0;y<8;y++){ + int f; + f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); + /*The _bv array is used to compute the function + f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); + where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ + f=*(_bv+(f+4>>3)); + _pix[1]=OC_CLAMP255(_pix[1]+f); + _pix[2]=OC_CLAMP255(_pix[2]-f); + _pix+=_ystride; + } +} + +static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){ + int x; + _pix-=_ystride*2; + for(x=0;x<8;x++){ + int f; + f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); + /*The _bv array is used to compute the function + f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); + where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ + f=*(_bv+(f+4>>3)); + _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); + _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); + } +} + +/*Initialize the bounding values array used by the loop filter. + _bv: Storage for the array. + Return: 0 on success, or a non-zero value if no filtering need be applied.*/ +int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){ + int flimit; + int i; + flimit=_state->loop_filter_limits[_state->qis[0]]; + if(flimit==0)return 1; + memset(_bv,0,sizeof(_bv[0])*256); + for(i=0;i=0)_bv[127-i-flimit]=i-flimit; + _bv[127-i]=-i; + _bv[127+i]=i; + if(127+i+flimit<256)_bv[127+i+flimit]=flimit-i; + } + return 0; +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256], + int _refi,int _pli,int _fragy0,int _fragy_end){ + _state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli, + _fragy0,_fragy_end); +} + +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv, + int _refi,int _pli,int _fragy0,int _fragy_end){ + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + _bv+=127; + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)loop_filter_h(ref,ystride,_bv); + if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); + if(fragi+1info.frame_width; + height=_state->info.frame_height; + iframe=_state->granpos>>_state->info.keyframe_granule_shift; + pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); + sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); + fp=fopen(fname,"wb"); + if(fp==NULL)return TH_EFAULT; + image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); + if(image==NULL){ + fclose(fp); + return TH_EFAULT; + } + png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); + if(png==NULL){ + oc_free_2d(image); + fclose(fp); + return TH_EFAULT; + } + info=png_create_info_struct(png); + if(info==NULL){ + png_destroy_write_struct(&png,NULL); + oc_free_2d(image); + fclose(fp); + return TH_EFAULT; + } + if(setjmp(png_jmpbuf(png))){ + png_destroy_write_struct(&png,&info); + oc_free_2d(image); + fclose(fp); + return TH_EFAULT; + } + framei=_state->ref_frame_idx[_frame]; + y_row=_state->ref_frame_bufs[framei][0].data; + u_row=_state->ref_frame_bufs[framei][1].data; + v_row=_state->ref_frame_bufs[framei][2].data; + y_stride=_state->ref_frame_bufs[framei][0].stride; + u_stride=_state->ref_frame_bufs[framei][1].stride; + v_stride=_state->ref_frame_bufs[framei][2].stride; + /*Chroma up-sampling is just done with a box filter. + This is very likely what will actually be used in practice on a real + display, and also removes one more layer to search in for the source of + artifacts. + As an added bonus, it's dead simple.*/ + for(imgi=height;imgi-->0;){ + int dc; + y=y_row; + u=u_row; + v=v_row; + for(imgj=0;imgj<6*width;){ + float yval; + float uval; + float vval; + unsigned rval; + unsigned gval; + unsigned bval; + /*This is intentionally slow and very accurate.*/ + yval=(*y-16)*(1.0F/219); + uval=(*u-128)*(2*(1-0.114F)/224); + vval=(*v-128)*(2*(1-0.299F)/224); + rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); + gval=OC_CLAMPI(0,(int)(65535*( + yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); + bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); + image[imgi][imgj++]=(unsigned char)(rval>>8); + image[imgi][imgj++]=(unsigned char)(rval&0xFF); + image[imgi][imgj++]=(unsigned char)(gval>>8); + image[imgi][imgj++]=(unsigned char)(gval&0xFF); + image[imgi][imgj++]=(unsigned char)(bval>>8); + image[imgi][imgj++]=(unsigned char)(bval&0xFF); + dc=(y-y_row&1)|(_state->info.pixel_fmt&1); + y++; + u+=dc; + v+=dc; + } + dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); + y_row+=y_stride; + u_row+=dc&u_stride; + v_row+=dc&v_stride; + } + png_init_io(png,fp); + png_set_compression_level(png,Z_BEST_COMPRESSION); + png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, + PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); + switch(_state->info.colorspace){ + case TH_CS_ITU_REC_470M:{ + png_set_gAMA(png,info,2.2); + png_set_cHRM_fixed(png,info,31006,31616, + 67000,32000,21000,71000,14000,8000); + }break; + case TH_CS_ITU_REC_470BG:{ + png_set_gAMA(png,info,2.67); + png_set_cHRM_fixed(png,info,31271,32902, + 64000,33000,29000,60000,15000,6000); + }break; + default:break; + } + png_set_pHYs(png,info,_state->info.aspect_numerator, + _state->info.aspect_denominator,0); + png_set_rows(png,info,image); + png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); + png_write_end(png,info); + png_destroy_write_struct(&png,&info); + oc_free_2d(image); + fclose(fp); + return 0; +} +#endif + + + +ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ + oc_theora_state *state; + state=(oc_theora_state *)_encdec; + if(_granpos>=0){ + ogg_int64_t iframe; + ogg_int64_t pframe; + iframe=_granpos>>state->info.keyframe_granule_shift; + pframe=_granpos-(iframe<info.keyframe_granule_shift); + /*3.2.0 streams store the frame index in the granule position. + 3.2.1 and later store the frame count. + We return the index, so adjust the value if we have a 3.2.1 or later + stream.*/ + return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); + } + return -1; +} + +double th_granule_time(void *_encdec,ogg_int64_t _granpos){ + oc_theora_state *state; + state=(oc_theora_state *)_encdec; + if(_granpos>=0){ + return (th_granule_frame(_encdec, _granpos)+1)*( + (double)state->info.fps_denominator/state->info.fps_numerator); + } + return -1; +} diff --git a/thirdparty/libtheora/theora/codec.h b/thirdparty/libtheora/theora/codec.h new file mode 100644 index 0000000000..5c2669630c --- /dev/null +++ b/thirdparty/libtheora/theora/codec.h @@ -0,0 +1,591 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ + + ********************************************************************/ + +/**\mainpage + * + * \section intro Introduction + * + * This is the documentation for libtheora C API. + * The current reference + * implementation for Theora, a free, + * patent-unencumbered video codec. + * Theora is derived from On2's VP3 codec with additional features and + * integration with Ogg multimedia formats by + * the Xiph.Org Foundation. + * Complete documentation of the format itself is available in + * the Theora + * specification. + * + * \subsection Organization + * + * The functions documented here are actually subdivided into three + * separate libraries: + * - libtheoraenc contains the encoder interface, + * described in \ref encfuncs. + * - libtheoradec contains the decoder interface and + * routines shared with the encoder. + * You must also link to this if you link to libtheoraenc. + * The routines in this library are described in \ref decfuncs and + * \ref basefuncs. + * - libtheora contains the \ref oldfuncs. + * + * New code should link to libtheoradec and, if using encoder + * features, libtheoraenc. Together these two export both + * the standard and the legacy API, so this is all that is needed by + * any code. The older libtheora library is provided just for + * compatibility with older build configurations. + * + * In general the recommended 1.x API symbols can be distinguished + * by their th_ or TH_ namespace prefix. + * The older, legacy API uses theora_ or OC_ + * prefixes instead. + */ + +/**\file + * The shared libtheoradec and libtheoraenc C API. + * You don't need to include this directly.*/ + +#if !defined(_O_THEORA_CODEC_H_) +# define _O_THEORA_CODEC_H_ (1) +# include + +#if defined(__cplusplus) +extern "C" { +#endif + + + +/**\name Return codes*/ +/*@{*/ +/**An invalid pointer was provided.*/ +#define TH_EFAULT (-1) +/**An invalid argument was provided.*/ +#define TH_EINVAL (-10) +/**The contents of the header were incomplete, invalid, or unexpected.*/ +#define TH_EBADHEADER (-20) +/**The header does not belong to a Theora stream.*/ +#define TH_ENOTFORMAT (-21) +/**The bitstream version is too high.*/ +#define TH_EVERSION (-22) +/**The specified function is not implemented.*/ +#define TH_EIMPL (-23) +/**There were errors in the video data packet.*/ +#define TH_EBADPACKET (-24) +/**The decoded packet represented a dropped frame. + The player can continue to display the current frame, as the contents of the + decoded frame buffer have not changed.*/ +#define TH_DUPFRAME (1) +/*@}*/ + +/**The currently defined color space tags. + * See the Theora + * specification, Chapter 4, for exact details on the meaning + * of each of these color spaces.*/ +typedef enum{ + /**The color space was not specified at the encoder. + It may be conveyed by an external means.*/ + TH_CS_UNSPECIFIED, + /**A color space designed for NTSC content.*/ + TH_CS_ITU_REC_470M, + /**A color space designed for PAL/SECAM content.*/ + TH_CS_ITU_REC_470BG, + /**The total number of currently defined color spaces.*/ + TH_CS_NSPACES +}th_colorspace; + +/**The currently defined pixel format tags. + * See the Theora + * specification, Section 4.4, for details on the precise sample + * locations.*/ +typedef enum{ + /**Chroma decimation by 2 in both the X and Y directions (4:2:0). + The Cb and Cr chroma planes are half the width and half the + height of the luma plane.*/ + TH_PF_420, + /**Currently reserved.*/ + TH_PF_RSVD, + /**Chroma decimation by 2 in the X direction (4:2:2). + The Cb and Cr chroma planes are half the width of the luma plane, but full + height.*/ + TH_PF_422, + /**No chroma decimation (4:4:4). + The Cb and Cr chroma planes are full width and full height.*/ + TH_PF_444, + /**The total number of currently defined pixel formats.*/ + TH_PF_NFORMATS +}th_pixel_fmt; + + + +/**A buffer for a single color plane in an uncompressed image. + * This contains the image data in a left-to-right, top-down format. + * Each row of pixels is stored contiguously in memory, but successive + * rows need not be. + * Use \a stride to compute the offset of the next row. + * The encoder accepts both positive \a stride values (top-down in memory) + * and negative (bottom-up in memory). + * The decoder currently always generates images with positive strides.*/ +typedef struct{ + /**The width of this plane.*/ + int width; + /**The height of this plane.*/ + int height; + /**The offset in bytes between successive rows.*/ + int stride; + /**A pointer to the beginning of the first row.*/ + unsigned char *data; +}th_img_plane; + +/**A complete image buffer for an uncompressed frame. + * The chroma planes may be decimated by a factor of two in either + * direction, as indicated by th_info#pixel_fmt. + * The width and height of the Y' plane must be multiples of 16. + * They may need to be cropped for display, using the rectangle + * specified by th_info#pic_x, th_info#pic_y, th_info#pic_width, + * and th_info#pic_height. + * All samples are 8 bits. + * \note The term YUV often used to describe a colorspace is ambiguous. + * The exact parameters of the RGB to YUV conversion process aside, in + * many contexts the U and V channels actually have opposite meanings. + * To avoid this confusion, we are explicit: the name of the color + * channels are Y'CbCr, and they appear in that order, always. + * The prime symbol denotes that the Y channel is non-linear. + * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/ +typedef th_img_plane th_ycbcr_buffer[3]; + +/**Theora bitstream information. + * This contains the basic playback parameters for a stream, and corresponds to + * the initial 'info' header packet. + * To initialize an encoder, the application fills in this structure and + * passes it to th_encode_alloc(). + * A default encoding mode is chosen based on the values of the #quality and + * #target_bitrate fields. + * On decode, it is filled in by th_decode_headerin(), and then passed to + * th_decode_alloc(). + * + * Encoded Theora frames must be a multiple of 16 in size; + * this is what the #frame_width and #frame_height members represent. + * To handle arbitrary picture sizes, a crop rectangle is specified in the + * #pic_x, #pic_y, #pic_width and #pic_height members. + * + * All frame buffers contain pointers to the full, padded frame. + * However, the current encoder will not reference pixels outside of + * the cropped picture region, and the application does not need to fill them + * in. + * The decoder will allocate storage for a full frame, but the + * application should not rely on the padding containing sensible + * data. + * + * It is also generally recommended that the offsets and sizes should still be + * multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled. + * See the Theora + * specification, Section 4.4, for more details. + * + * Frame rate, in frames per second, is stored as a rational fraction, as is + * the pixel aspect ratio. + * Note that this refers to the aspect ratio of the individual pixels, not of + * the overall frame itself. + * The frame aspect ratio can be computed from pixel aspect ratio using the + * image dimensions.*/ +typedef struct{ + /**\name Theora version + * Bitstream version information.*/ + /*@{*/ + unsigned char version_major; + unsigned char version_minor; + unsigned char version_subminor; + /*@}*/ + /**The encoded frame width. + * This must be a multiple of 16, and less than 1048576.*/ + ogg_uint32_t frame_width; + /**The encoded frame height. + * This must be a multiple of 16, and less than 1048576.*/ + ogg_uint32_t frame_height; + /**The displayed picture width. + * This must be no larger than width.*/ + ogg_uint32_t pic_width; + /**The displayed picture height. + * This must be no larger than height.*/ + ogg_uint32_t pic_height; + /**The X offset of the displayed picture. + * This must be no larger than #frame_width-#pic_width or 255, whichever is + * smaller.*/ + ogg_uint32_t pic_x; + /**The Y offset of the displayed picture. + * This must be no larger than #frame_height-#pic_height, and + * #frame_height-#pic_height-#pic_y must be no larger than 255. + * This slightly funny restriction is due to the fact that the offset is + * specified from the top of the image for consistency with the standard + * graphics left-handed coordinate system used throughout this API, while + * it is stored in the encoded stream as an offset from the bottom.*/ + ogg_uint32_t pic_y; + /**\name Frame rate + * The frame rate, as a fraction. + * If either is 0, the frame rate is undefined.*/ + /*@{*/ + ogg_uint32_t fps_numerator; + ogg_uint32_t fps_denominator; + /*@}*/ + /**\name Aspect ratio + * The aspect ratio of the pixels. + * If either value is zero, the aspect ratio is undefined. + * If not specified by any external means, 1:1 should be assumed. + * The aspect ratio of the full picture can be computed as + * \code + * aspect_numerator*pic_width/(aspect_denominator*pic_height). + * \endcode */ + /*@{*/ + ogg_uint32_t aspect_numerator; + ogg_uint32_t aspect_denominator; + /*@}*/ + /**The color space.*/ + th_colorspace colorspace; + /**The pixel format.*/ + th_pixel_fmt pixel_fmt; + /**The target bit-rate in bits per second. + If initializing an encoder with this struct, set this field to a non-zero + value to activate CBR encoding by default.*/ + int target_bitrate; + /**The target quality level. + Valid values range from 0 to 63, inclusive, with higher values giving + higher quality. + If initializing an encoder with this struct, and #target_bitrate is set + to zero, VBR encoding at this quality will be activated by default.*/ + /*Currently this is set so that a qi of 0 corresponds to distortions of 24 + times the JND, and each increase by 16 halves that value. + This gives us fine discrimination at low qualities, yet effective rate + control at high qualities. + The qi value 63 is special, however. + For this, the highest quality, we use one half of a JND for our threshold. + Due to the lower bounds placed on allowable quantizers in Theora, we will + not actually be able to achieve quality this good, but this should + provide as close to visually lossless quality as Theora is capable of. + We could lift the quantizer restrictions without breaking VP3.1 + compatibility, but this would result in quantized coefficients that are + too large for the current bitstream to be able to store. + We'd have to redesign the token syntax to store these large coefficients, + which would make transcoding complex.*/ + int quality; + /**The amount to shift to extract the last keyframe number from the granule + * position. + * This can be at most 31. + * th_info_init() will set this to a default value (currently 6, + * which is good for streaming applications), but you can set it to 0 to + * make every frame a keyframe. + * The maximum distance between key frames is + * 1<<#keyframe_granule_shift. + * The keyframe frequency can be more finely controlled with + * #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted + * during encoding (for example, to force the next frame to be a keyframe), + * but it cannot be set larger than the amount permitted by this field after + * the headers have been output.*/ + int keyframe_granule_shift; +}th_info; + +/**The comment information. + * + * This structure holds the in-stream metadata corresponding to + * the 'comment' header packet. + * The comment header is meant to be used much like someone jotting a quick + * note on the label of a video. + * It should be a short, to the point text note that can be more than a couple + * words, but not more than a short paragraph. + * + * The metadata is stored as a series of (tag, value) pairs, in + * length-encoded string vectors. + * The first occurrence of the '=' character delimits the tag and value. + * A particular tag may occur more than once, and order is significant. + * The character set encoding for the strings is always UTF-8, but the tag + * names are limited to ASCII, and treated as case-insensitive. + * See the Theora + * specification, Section 6.3.3 for details. + * + * In filling in this structure, th_decode_headerin() will null-terminate + * the user_comment strings for safety. + * However, the bitstream format itself treats them as 8-bit clean vectors, + * possibly containing null characters, and so the length array should be + * treated as their authoritative length. + */ +typedef struct th_comment{ + /**The array of comment string vectors.*/ + char **user_comments; + /**An array of the corresponding length of each vector, in bytes.*/ + int *comment_lengths; + /**The total number of comment strings.*/ + int comments; + /**The null-terminated vendor string. + This identifies the software used to encode the stream.*/ + char *vendor; +}th_comment; + + + +/**A single base matrix.*/ +typedef unsigned char th_quant_base[64]; + +/**A set of \a qi ranges.*/ +typedef struct{ + /**The number of ranges in the set.*/ + int nranges; + /**The size of each of the #nranges ranges. + These must sum to 63.*/ + const int *sizes; + /**#nranges +1 base matrices. + Matrices \a i and i+1 form the endpoints of range \a i.*/ + const th_quant_base *base_matrices; +}th_quant_ranges; + +/**A complete set of quantization parameters. + The quantizer for each coefficient is calculated as: + \code + Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100), + 1024). + \endcode + + \a qti is the quantization type index: 0 for intra, 1 for inter. + ci!=0 is 0 for the DC coefficient and 1 for AC coefficients. + \a qi is the quality index, ranging between 0 (low quality) and 63 (high + quality). + \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr. + \a ci is the DCT coefficient index. + Coefficient indices correspond to the normal 2D DCT block + ordering--row-major with low frequencies first--\em not zig-zag order. + + Minimum quantizers are constant, and are given by: + \code + qmin[2][2]={{4,2},{8,4}}. + \endcode + + Parameters that can be stored in the bitstream are as follows: + - The two scale matrices ac_scale and dc_scale. + \code + scale[2][64]={dc_scale,ac_scale}. + \endcode + - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all). + In order to avoid storing a full 384 base matrices, only a sparse set of + matrices are stored, and the rest are linearly interpolated. + This is done as follows. + For each \a qti and \a pli, a series of \a n \a qi ranges is defined. + The size of each \a qi range can vary arbitrarily, but they must sum to + 63. + Then, n+1 matrices are specified, one for each endpoint of the + ranges. + For interpolation purposes, each range's endpoints are the first \a qi + value it contains and one past the last \a qi value it contains. + Fractional values are rounded to the nearest integer, with ties rounded + away from zero. + + Base matrices are stored by reference, so if the same matrices are used + multiple times, they will only appear once in the bitstream. + The bitstream is also capable of omitting an entire set of ranges and + its associated matrices if they are the same as either the previous + set (indexed in row-major order) or if the inter set is the same as the + intra set. + + - Loop filter limit values. + The same limits are used for the loop filter in all color planes, despite + potentially differing levels of quantization in each. + + For the current encoder, scale[ci!=0][qi] must be no greater + than scale[ci!=0][qi-1] and base[qti][pli][qi][ci] must + be no greater than base[qti][pli][qi-1][ci]. + These two conditions ensure that the actual quantizer for a given \a qti, + \a pli, and \a ci does not increase as \a qi increases. + This is not required by the decoder.*/ +typedef struct{ + /**The DC scaling factors.*/ + ogg_uint16_t dc_scale[64]; + /**The AC scaling factors.*/ + ogg_uint16_t ac_scale[64]; + /**The loop filter limit values.*/ + unsigned char loop_filter_limits[64]; + /**The \a qi ranges for each \a ci and \a pli.*/ + th_quant_ranges qi_ranges[2][3]; +}th_quant_info; + + + +/**The number of Huffman tables used by Theora.*/ +#define TH_NHUFFMAN_TABLES (80) +/**The number of DCT token values in each table.*/ +#define TH_NDCT_TOKENS (32) + +/**A Huffman code for a Theora DCT token. + * Each set of Huffman codes in a given table must form a complete, prefix-free + * code. + * There is no requirement that all the tokens in a table have a valid code, + * but the current encoder is not optimized to take advantage of this. + * If each of the five grouops of 16 tables does not contain at least one table + * with a code for every token, then the encoder may fail to encode certain + * frames. + * The complete table in the first group of 16 does not have to be in the same + * place as the complete table in the other groups, but the complete tables in + * the remaining four groups must all be in the same place.*/ +typedef struct{ + /**The bit pattern for the code, with the LSbit of the pattern aligned in + * the LSbit of the word.*/ + ogg_uint32_t pattern; + /**The number of bits in the code. + * This must be between 0 and 32, inclusive.*/ + int nbits; +}th_huff_code; + + + +/**\defgroup basefuncs Functions Shared by Encode and Decode*/ +/*@{*/ +/**\name Basic shared functions*/ +/*@{*/ +/**Retrieves a human-readable string to identify the library vendor and + * version. + * \return the version string.*/ +extern const char *th_version_string(void); +/**Retrieves the library version number. + * This is the highest bitstream version that the encoder library will produce, + * or that the decoder library can decode. + * This number is composed of a 16-bit major version, 8-bit minor version + * and 8 bit sub-version, composed as follows: + * \code + * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR) + * \endcode + * \return the version number.*/ +extern ogg_uint32_t th_version_number(void); +/**Converts a granule position to an absolute frame index, starting at + * 0. + * The granule position is interpreted in the context of a given + * #th_enc_ctx or #th_dec_ctx handle (either will suffice). + * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx + * handle. + * \param _granpos The granule position to convert. + * \returns The absolute frame index corresponding to \a _granpos. + * \retval -1 The given granule position was invalid (i.e. negative).*/ +extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos); +/**Converts a granule position to an absolute time in seconds. + * The granule position is interpreted in the context of a given + * #th_enc_ctx or #th_dec_ctx handle (either will suffice). + * \param _encdec A previously allocated #th_enc_ctx or #th_dec_ctx + * handle. + * \param _granpos The granule position to convert. + * \return The absolute time in seconds corresponding to \a _granpos. + * This is the "end time" for the frame, or the latest time it should + * be displayed. + * It is not the presentation time. + * \retval -1 The given granule position was invalid (i.e. negative).*/ +extern double th_granule_time(void *_encdec,ogg_int64_t _granpos); +/**Determines whether a Theora packet is a header or not. + * This function does no verification beyond checking the packet type bit, so + * it should not be used for bitstream identification; use + * th_decode_headerin() for that. + * As per the Theora specification, an empty (0-byte) packet is treated as a + * data packet (a delta frame with no coded blocks). + * \param _op An ogg_packet containing encoded Theora data. + * \retval 1 The packet is a header packet + * \retval 0 The packet is a video data packet.*/ +extern int th_packet_isheader(ogg_packet *_op); +/**Determines whether a theora packet is a key frame or not. + * This function does no verification beyond checking the packet type and + * key frame bits, so it should not be used for bitstream identification; use + * th_decode_headerin() for that. + * As per the Theora specification, an empty (0-byte) packet is treated as a + * delta frame (with no coded blocks). + * \param _op An ogg_packet containing encoded Theora data. + * \retval 1 The packet contains a key frame. + * \retval 0 The packet contains a delta frame. + * \retval -1 The packet is not a video data packet.*/ +extern int th_packet_iskeyframe(ogg_packet *_op); +/*@}*/ + + +/**\name Functions for manipulating header data*/ +/*@{*/ +/**Initializes a th_info structure. + * This should be called on a freshly allocated #th_info structure before + * attempting to use it. + * \param _info The #th_info struct to initialize.*/ +extern void th_info_init(th_info *_info); +/**Clears a #th_info structure. + * This should be called on a #th_info structure after it is no longer + * needed. + * \param _info The #th_info struct to clear.*/ +extern void th_info_clear(th_info *_info); + +/**Initialize a #th_comment structure. + * This should be called on a freshly allocated #th_comment structure + * before attempting to use it. + * \param _tc The #th_comment struct to initialize.*/ +extern void th_comment_init(th_comment *_tc); +/**Add a comment to an initialized #th_comment structure. + * \note Neither th_comment_add() nor th_comment_add_tag() support + * comments containing null values, although the bitstream format does + * support them. + * To add such comments you will need to manipulate the #th_comment + * structure directly. + * \param _tc The #th_comment struct to add the comment to. + * \param _comment Must be a null-terminated UTF-8 string containing the + * comment in "TAG=the value" form.*/ +extern void th_comment_add(th_comment *_tc, char *_comment); +/**Add a comment to an initialized #th_comment structure. + * \note Neither th_comment_add() nor th_comment_add_tag() support + * comments containing null values, although the bitstream format does + * support them. + * To add such comments you will need to manipulate the #th_comment + * structure directly. + * \param _tc The #th_comment struct to add the comment to. + * \param _tag A null-terminated string containing the tag associated with + * the comment. + * \param _val The corresponding value as a null-terminated string.*/ +extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val); +/**Look up a comment value by its tag. + * \param _tc An initialized #th_comment structure. + * \param _tag The tag to look up. + * \param _count The instance of the tag. + * The same tag can appear multiple times, each with a distinct + * value, so an index is required to retrieve them all. + * The order in which these values appear is significant and + * should be preserved. + * Use th_comment_query_count() to get the legal range for + * the \a _count parameter. + * \return A pointer to the queried tag's value. + * This points directly to data in the #th_comment structure. + * It should not be modified or freed by the application, and + * modifications to the structure may invalidate the pointer. + * \retval NULL If no matching tag is found.*/ +extern char *th_comment_query(th_comment *_tc,char *_tag,int _count); +/**Look up the number of instances of a tag. + * Call this first when querying for a specific tag and then iterate over the + * number of instances with separate calls to th_comment_query() to + * retrieve all the values for that tag in order. + * \param _tc An initialized #th_comment structure. + * \param _tag The tag to look up. + * \return The number on instances of this particular tag.*/ +extern int th_comment_query_count(th_comment *_tc,char *_tag); +/**Clears a #th_comment structure. + * This should be called on a #th_comment structure after it is no longer + * needed. + * It will free all memory used by the structure members. + * \param _tc The #th_comment struct to clear.*/ +extern void th_comment_clear(th_comment *_tc); +/*@}*/ +/*@}*/ + + + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/thirdparty/libtheora/theora/theora.h b/thirdparty/libtheora/theora/theora.h new file mode 100644 index 0000000000..af6eb6f380 --- /dev/null +++ b/thirdparty/libtheora/theora/theora.h @@ -0,0 +1,784 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.17 2003/12/06 18:06:19 arc Exp $ + + ********************************************************************/ + +#ifndef _O_THEORA_H_ +#define _O_THEORA_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +#include /* for size_t */ + +#include + +/** \file + * The libtheora pre-1.0 legacy C API. + * + * \ingroup oldfuncs + * + * \section intro Introduction + * + * This is the documentation for the libtheora legacy C API, declared in + * the theora.h header, which describes the old interface used before + * the 1.0 release. This API was widely deployed for several years and + * remains supported, but for new code we recommend the cleaner API + * declared in theoradec.h and theoraenc.h. + * + * libtheora is the reference implementation for + * Theora, a free video codec. + * Theora is derived from On2's VP3 codec with improved integration with + * Ogg multimedia formats by Xiph.Org. + * + * \section overview Overview + * + * This library will both decode and encode theora packets to/from raw YUV + * frames. In either case, the packets will most likely either come from or + * need to be embedded in an Ogg stream. Use + * libogg or + * liboggz + * to extract/package these packets. + * + * \section decoding Decoding Process + * + * Decoding can be separated into the following steps: + * -# initialise theora_info and theora_comment structures using + * theora_info_init() and theora_comment_init(): + \verbatim + theora_info info; + theora_comment comment; + + theora_info_init(&info); + theora_comment_init(&comment); + \endverbatim + * -# retrieve header packets from Ogg stream (there should be 3) and decode + * into theora_info and theora_comment structures using + * theora_decode_header(). See \ref identification for more information on + * identifying which packets are theora packets. + \verbatim + int i; + for (i = 0; i < 3; i++) + { + (get a theora packet "op" from the Ogg stream) + theora_decode_header(&info, &comment, op); + } + \endverbatim + * -# initialise the decoder based on the information retrieved into the + * theora_info struct by theora_decode_header(). You will need a + * theora_state struct. + \verbatim + theora_state state; + + theora_decode_init(&state, &info); + \endverbatim + * -# pass in packets and retrieve decoded frames! See the yuv_buffer + * documentation for information on how to retrieve raw YUV data. + \verbatim + yuf_buffer buffer; + while (last packet was not e_o_s) { + (get a theora packet "op" from the Ogg stream) + theora_decode_packetin(&state, op); + theora_decode_YUVout(&state, &buffer); + } + \endverbatim + * + * + * \subsection identification Identifying Theora Packets + * + * All streams inside an Ogg file have a unique serial_no attached to the + * stream. Typically, you will want to + * - retrieve the serial_no for each b_o_s (beginning of stream) page + * encountered within the Ogg file; + * - test the first (only) packet on that page to determine if it is a theora + * packet; + * - once you have found a theora b_o_s page then use the retrieved serial_no + * to identify future packets belonging to the same theora stream. + * + * Note that you \e cannot use theora_packet_isheader() to determine if a + * packet is a theora packet or not, as this function does not perform any + * checking beyond whether a header bit is present. Instead, use the + * theora_decode_header() function and check the return value; or examine the + * header bytes at the beginning of the Ogg page. + */ + + +/** \defgroup oldfuncs Legacy pre-1.0 C API */ +/* @{ */ + +/** + * A YUV buffer for passing uncompressed frames to and from the codec. + * This holds a Y'CbCr frame in planar format. The CbCr planes can be + * subsampled and have their own separate dimensions and row stride + * offsets. Note that the strides may be negative in some + * configurations. For theora the width and height of the largest plane + * must be a multiple of 16. The actual meaningful picture size and + * offset are stored in the theora_info structure; frames returned by + * the decoder may need to be cropped for display. + * + * All samples are 8 bits. Within each plane samples are ordered by + * row from the top of the frame to the bottom. Within each row samples + * are ordered from left to right. + * + * During decode, the yuv_buffer struct is allocated by the user, but all + * fields (including luma and chroma pointers) are filled by the library. + * These pointers address library-internal memory and their contents should + * not be modified. + * + * Conversely, during encode the user allocates the struct and fills out all + * fields. The user also manages the data addressed by the luma and chroma + * pointers. See the encoder_example.c and dump_video.c example files in + * theora/examples/ for more information. + */ +typedef struct { + int y_width; /**< Width of the Y' luminance plane */ + int y_height; /**< Height of the luminance plane */ + int y_stride; /**< Offset in bytes between successive rows */ + + int uv_width; /**< Width of the Cb and Cr chroma planes */ + int uv_height; /**< Height of the chroma planes */ + int uv_stride; /**< Offset between successive chroma rows */ + unsigned char *y; /**< Pointer to start of luminance data */ + unsigned char *u; /**< Pointer to start of Cb data */ + unsigned char *v; /**< Pointer to start of Cr data */ + +} yuv_buffer; + +/** + * A Colorspace. + */ +typedef enum { + OC_CS_UNSPECIFIED, /**< The colorspace is unknown or unspecified */ + OC_CS_ITU_REC_470M, /**< This is the best option for 'NTSC' content */ + OC_CS_ITU_REC_470BG, /**< This is the best option for 'PAL' content */ + OC_CS_NSPACES /**< This marks the end of the defined colorspaces */ +} theora_colorspace; + +/** + * A Chroma subsampling + * + * These enumerate the available chroma subsampling options supported + * by the theora format. See Section 4.4 of the specification for + * exact definitions. + */ +typedef enum { + OC_PF_420, /**< Chroma subsampling by 2 in each direction (4:2:0) */ + OC_PF_RSVD, /**< Reserved value */ + OC_PF_422, /**< Horizonatal chroma subsampling by 2 (4:2:2) */ + OC_PF_444, /**< No chroma subsampling at all (4:4:4) */ +} theora_pixelformat; + +/** + * Theora bitstream info. + * Contains the basic playback parameters for a stream, + * corresponding to the initial 'info' header packet. + * + * Encoded theora frames must be a multiple of 16 in width and height. + * To handle other frame sizes, a crop rectangle is specified in + * frame_height and frame_width, offset_x and * offset_y. The offset + * and size should still be a multiple of 2 to avoid chroma sampling + * shifts. Offset values in this structure are measured from the + * upper left of the image. + * + * Frame rate, in frames per second, is stored as a rational + * fraction. Aspect ratio is also stored as a rational fraction, and + * refers to the aspect ratio of the frame pixels, not of the + * overall frame itself. + * + * See + * examples/encoder_example.c for usage examples of the + * other paramters and good default settings for the encoder parameters. + */ +typedef struct { + ogg_uint32_t width; /**< encoded frame width */ + ogg_uint32_t height; /**< encoded frame height */ + ogg_uint32_t frame_width; /**< display frame width */ + ogg_uint32_t frame_height; /**< display frame height */ + ogg_uint32_t offset_x; /**< horizontal offset of the displayed frame */ + ogg_uint32_t offset_y; /**< vertical offset of the displayed frame */ + ogg_uint32_t fps_numerator; /**< frame rate numerator **/ + ogg_uint32_t fps_denominator; /**< frame rate denominator **/ + ogg_uint32_t aspect_numerator; /**< pixel aspect ratio numerator */ + ogg_uint32_t aspect_denominator; /**< pixel aspect ratio denominator */ + theora_colorspace colorspace; /**< colorspace */ + int target_bitrate; /**< nominal bitrate in bits per second */ + int quality; /**< Nominal quality setting, 0-63 */ + int quick_p; /**< Quick encode/decode */ + + /* decode only */ + unsigned char version_major; + unsigned char version_minor; + unsigned char version_subminor; + + void *codec_setup; + + /* encode only */ + int dropframes_p; + int keyframe_auto_p; + ogg_uint32_t keyframe_frequency; + ogg_uint32_t keyframe_frequency_force; /* also used for decode init to + get granpos shift correct */ + ogg_uint32_t keyframe_data_target_bitrate; + ogg_int32_t keyframe_auto_threshold; + ogg_uint32_t keyframe_mindistance; + ogg_int32_t noise_sensitivity; + ogg_int32_t sharpness; + + theora_pixelformat pixelformat; /**< chroma subsampling mode to expect */ + +} theora_info; + +/** Codec internal state and context. + */ +typedef struct{ + theora_info *i; + ogg_int64_t granulepos; + + void *internal_encode; + void *internal_decode; + +} theora_state; + +/** + * Comment header metadata. + * + * This structure holds the in-stream metadata corresponding to + * the 'comment' header packet. + * + * Meta data is stored as a series of (tag, value) pairs, in + * length-encoded string vectors. The first occurence of the + * '=' character delimits the tag and value. A particular tag + * may occur more than once. The character set encoding for + * the strings is always UTF-8, but the tag names are limited + * to case-insensitive ASCII. See the spec for details. + * + * In filling in this structure, theora_decode_header() will + * null-terminate the user_comment strings for safety. However, + * the bitstream format itself treats them as 8-bit clean, + * and so the length array should be treated as authoritative + * for their length. + */ +typedef struct theora_comment{ + char **user_comments; /**< An array of comment string vectors */ + int *comment_lengths; /**< An array of corresponding string vector lengths in bytes */ + int comments; /**< The total number of comment string vectors */ + char *vendor; /**< The vendor string identifying the encoder, null terminated */ + +} theora_comment; + + +/**\name theora_control() codes */ +/* \anchor decctlcodes_old + * These are the available request codes for theora_control() + * when called with a decoder instance. + * By convention decoder control codes are odd, to distinguish + * them from \ref encctlcodes_old "encoder control codes" which + * are even. + * + * Note that since the 1.0 release, both the legacy and the final + * implementation accept all the same control codes, but only the + * final API declares the newer codes. + * + * Keep any experimental or vendor-specific values above \c 0x8000.*/ + +/*@{*/ + +/**Get the maximum post-processing level. + * The decoder supports a post-processing filter that can improve + * the appearance of the decoded images. This returns the highest + * level setting for this post-processor, corresponding to maximum + * improvement and computational expense. + */ +#define TH_DECCTL_GET_PPLEVEL_MAX (1) + +/**Set the post-processing level. + * Sets the level of post-processing to use when decoding the + * compressed stream. This must be a value between zero (off) + * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. + */ +#define TH_DECCTL_SET_PPLEVEL (3) + +/**Sets the maximum distance between key frames. + * This can be changed during an encode, but will be bounded by + * 1<. + * If it is set before encoding begins, th_info#keyframe_granule_shift will + * be enlarged appropriately. + * + * \param[in] buf ogg_uint32_t: The maximum distance between key + * frames. + * \param[out] buf ogg_uint32_t: The actual maximum distance set. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). + * \retval OC_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) + +/**Set the granule position. + * Call this after a seek, to update the internal granulepos + * in the decoder, to insure that subsequent frames are marked + * properly. If you track timestamps yourself and do not use + * the granule postion returned by the decoder, then you do + * not need to use this control. + */ +#define TH_DECCTL_SET_GRANPOS (5) + +/**\anchor encctlcodes_old */ + +/**Sets the quantization parameters to use. + * The parameters are copied, not stored by reference, so they can be freed + * after this call. + * NULL may be specified to revert to the default parameters. + * + * \param[in] buf #th_quant_info + * \retval OC_FAULT \a theora_state is NULL. + * \retval OC_EINVAL Encoding has already begun, the quantization parameters + * are not acceptable to this version of the encoder, + * \a buf is NULL and \a buf_sz is not zero, + * or \a buf is non-NULL and \a buf_sz is + * not sizeof(#th_quant_info). + * \retval OC_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUANT_PARAMS (2) + +/**Disables any encoder features that would prevent lossless transcoding back + * to VP3. + * This primarily means disabling block-level QI values and not using 4MV mode + * when any of the luma blocks in a macro block are not coded. + * It also includes using the VP3 quantization tables and Huffman codes; if you + * set them explicitly after calling this function, the resulting stream will + * not be VP3-compatible. + * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source + * material, or when using a picture region smaller than the full frame (e.g. + * a non-multiple-of-16 width or height), then non-VP3 bitstream features will + * still be disabled, but the stream will still not be VP3-compatible, as VP3 + * was not capable of encoding such formats. + * If you call this after encoding has already begun, then the quantization + * tables and codebooks cannot be changed, but the frame-level features will + * be enabled or disabled as requested. + * + * \param[in] buf int: a non-zero value to enable VP3 compatibility, + * or 0 to disable it (the default). + * \param[out] buf int: 1 if all bitstream features required for + * VP3-compatibility could be set, and 0 otherwise. + * The latter will be returned if the pixel format is not + * 4:2:0, the picture region is smaller than the full frame, + * or if encoding has begun, preventing the quantization + * tables and codebooks from being set. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int). + * \retval OC_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_VP3_COMPATIBLE (10) + +/**Gets the maximum speed level. + * Higher speed levels favor quicker encoding over better quality per bit. + * Depending on the encoding mode, and the internal algorithms used, quality + * may actually improve, but in this case bitrate will also likely increase. + * In any case, overall rate/distortion performance will probably decrease. + * The maximum value, and the meaning of each value, may change depending on + * the current encoding mode (VBR vs. CQI, etc.). + * + * \param[out] buf int: The maximum encoding speed level. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int). + * \retval OC_IMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL_MAX (12) + +/**Sets the speed level. + * By default a speed value of 1 is used. + * + * \param[in] buf int: The new encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval OC_FAULT \a theora_state or \a buf is NULL. + * \retval OC_EINVAL \a buf_sz is not sizeof(int), or the + * encoding speed level is out of bounds. + * The maximum encoding speed level may be + * implementation- and encoding mode-specific, and can be + * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * \retval OC_IMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_SPLEVEL (14) + +/*@}*/ + +#define OC_FAULT -1 /**< General failure */ +#define OC_EINVAL -10 /**< Library encountered invalid internal data */ +#define OC_DISABLED -11 /**< Requested action is disabled */ +#define OC_BADHEADER -20 /**< Header packet was corrupt/invalid */ +#define OC_NOTFORMAT -21 /**< Packet is not a theora packet */ +#define OC_VERSION -22 /**< Bitstream version is not handled */ +#define OC_IMPL -23 /**< Feature or action not implemented */ +#define OC_BADPACKET -24 /**< Packet is corrupt */ +#define OC_NEWPACKET -25 /**< Packet is an (ignorable) unhandled extension */ +#define OC_DUPFRAME 1 /**< Packet is a dropped frame */ + +/** + * Retrieve a human-readable string to identify the encoder vendor and version. + * \returns A version string. + */ +extern const char *theora_version_string(void); + +/** + * Retrieve a 32-bit version number. + * This number is composed of a 16-bit major version, 8-bit minor version + * and 8 bit sub-version, composed as follows: +
+   (VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
+
+* \returns The version number. +*/ +extern ogg_uint32_t theora_version_number(void); + +/** + * Initialize the theora encoder. + * \param th The theora_state handle to initialize for encoding. + * \param ti A theora_info struct filled with the desired encoding parameters. + * \retval 0 Success + */ +extern int theora_encode_init(theora_state *th, theora_info *ti); + +/** + * Submit a YUV buffer to the theora encoder. + * \param t A theora_state handle previously initialized for encoding. + * \param yuv A buffer of YUV data to encode. Note that both the yuv_buffer + * struct and the luma/chroma buffers within should be allocated by + * the user. + * \retval OC_EINVAL Encoder is not ready, or is finished. + * \retval -1 The size of the given frame differs from those previously input + * \retval 0 Success + */ +extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv); + +/** + * Request the next packet of encoded video. + * The encoded data is placed in a user-provided ogg_packet structure. + * \param t A theora_state handle previously initialized for encoding. + * \param last_p whether this is the last packet the encoder should produce. + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to encoded + * data. The memory for the encoded data is owned by libtheora. + * \retval 0 No internal storage exists OR no packet is ready + * \retval -1 The encoding process has completed + * \retval 1 Success + */ +extern int theora_encode_packetout( theora_state *t, int last_p, + ogg_packet *op); + +/** + * Request a packet containing the initial header. + * A pointer to the header data is placed in a user-provided ogg_packet + * structure. + * \param t A theora_state handle previously initialized for encoding. + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to the header + * data. The memory for the header data is owned by libtheora. + * \retval 0 Success + */ +extern int theora_encode_header(theora_state *t, ogg_packet *op); + +/** + * Request a comment header packet from provided metadata. + * A pointer to the comment data is placed in a user-provided ogg_packet + * structure. + * \param tc A theora_comment structure filled with the desired metadata + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to the encoded + * comment data. The memory for the comment data is owned by + * libtheora. + * \retval 0 Success + */ +extern int theora_encode_comment(theora_comment *tc, ogg_packet *op); + +/** + * Request a packet containing the codebook tables for the stream. + * A pointer to the codebook data is placed in a user-provided ogg_packet + * structure. + * \param t A theora_state handle previously initialized for encoding. + * \param op An ogg_packet structure to fill. libtheora will set all + * elements of this structure, including a pointer to the codebook + * data. The memory for the header data is owned by libtheora. + * \retval 0 Success + */ +extern int theora_encode_tables(theora_state *t, ogg_packet *op); + +/** + * Decode an Ogg packet, with the expectation that the packet contains + * an initial header, comment data or codebook tables. + * + * \param ci A theora_info structure to fill. This must have been previously + * initialized with theora_info_init(). If \a op contains an initial + * header, theora_decode_header() will fill \a ci with the + * parsed header values. If \a op contains codebook tables, + * theora_decode_header() will parse these and attach an internal + * representation to \a ci->codec_setup. + * \param cc A theora_comment structure to fill. If \a op contains comment + * data, theora_decode_header() will fill \a cc with the parsed + * comments. + * \param op An ogg_packet structure which you expect contains an initial + * header, comment data or codebook tables. + * + * \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet + * has the signature of an initial packet, but op is + * not a b_o_s packet; OR this packet has the signature + * of an initial header packet, but an initial header + * packet has already been seen; OR this packet has the + * signature of a comment packet, but the initial header + * has not yet been seen; OR this packet has the signature + * of a comment packet, but contains invalid data; OR + * this packet has the signature of codebook tables, + * but the initial header or comments have not yet + * been seen; OR this packet has the signature of codebook + * tables, but contains invalid data; + * OR the stream being decoded has a compatible version + * but this packet does not have the signature of a + * theora initial header, comments, or codebook packet + * \retval OC_VERSION The packet data of \a op is an initial header with + * a version which is incompatible with this version of + * libtheora. + * \retval OC_NEWPACKET the stream being decoded has an incompatible (future) + * version and contains an unknown signature. + * \retval 0 Success + * + * \note The normal usage is that theora_decode_header() be called on the + * first three packets of a theora logical bitstream in succession. + */ +extern int theora_decode_header(theora_info *ci, theora_comment *cc, + ogg_packet *op); + +/** + * Initialize a theora_state handle for decoding. + * \param th The theora_state handle to initialize. + * \param c A theora_info struct filled with the desired decoding parameters. + * This is of course usually obtained from a previous call to + * theora_decode_header(). + * \retval 0 Success + */ +extern int theora_decode_init(theora_state *th, theora_info *c); + +/** + * Input a packet containing encoded data into the theora decoder. + * \param th A theora_state handle previously initialized for decoding. + * \param op An ogg_packet containing encoded theora data. + * \retval 0 Success + * \retval OC_BADPACKET \a op does not contain encoded video data + */ +extern int theora_decode_packetin(theora_state *th,ogg_packet *op); + +/** + * Output the next available frame of decoded YUV data. + * \param th A theora_state handle previously initialized for decoding. + * \param yuv A yuv_buffer in which libtheora should place the decoded data. + * Note that the buffer struct itself is allocated by the user, but + * that the luma and chroma pointers will be filled in by the + * library. Also note that these luma and chroma regions should be + * considered read-only by the user. + * \retval 0 Success + */ +extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv); + +/** + * Report whether a theora packet is a header or not + * This function does no verification beyond checking the header + * flag bit so it should not be used for bitstream identification; + * use theora_decode_header() for that. + * + * \param op An ogg_packet containing encoded theora data. + * \retval 1 The packet is a header packet + * \retval 0 The packet is not a header packet (and so contains frame data) + * + * Thus function was added in the 1.0alpha4 release. + */ +extern int theora_packet_isheader(ogg_packet *op); + +/** + * Report whether a theora packet is a keyframe or not + * + * \param op An ogg_packet containing encoded theora data. + * \retval 1 The packet contains a keyframe image + * \retval 0 The packet is contains an interframe delta + * \retval -1 The packet is not an image data packet at all + * + * Thus function was added in the 1.0alpha4 release. + */ +extern int theora_packet_iskeyframe(ogg_packet *op); + +/** + * Report the granulepos shift radix + * + * When embedded in Ogg, Theora uses a two-part granulepos, + * splitting the 64-bit field into two pieces. The more-significant + * section represents the frame count at the last keyframe, + * and the less-significant section represents the count of + * frames since the last keyframe. In this way the overall + * field is still non-decreasing with time, but usefully encodes + * a pointer to the last keyframe, which is necessary for + * correctly restarting decode after a seek. + * + * This function reports the number of bits used to represent + * the distance to the last keyframe, and thus how the granulepos + * field must be shifted or masked to obtain the two parts. + * + * Since libtheora returns compressed data in an ogg_packet + * structure, this may be generally useful even if the Theora + * packets are not being used in an Ogg container. + * + * \param ti A previously initialized theora_info struct + * \returns The bit shift dividing the two granulepos fields + * + * This function was added in the 1.0alpha5 release. + */ +int theora_granule_shift(theora_info *ti); + +/** + * Convert a granulepos to an absolute frame index, starting at 0. + * The granulepos is interpreted in the context of a given theora_state handle. + * + * Note that while the granulepos encodes the frame count (i.e. starting + * from 1) this call returns the frame index, starting from zero. Thus + * One can calculate the presentation time by multiplying the index by + * the rate. + * + * \param th A previously initialized theora_state handle (encode or decode) + * \param granulepos The granulepos to convert. + * \returns The frame index corresponding to \a granulepos. + * \retval -1 The given granulepos is undefined (i.e. negative) + * + * Thus function was added in the 1.0alpha4 release. + */ +extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos); + +/** + * Convert a granulepos to absolute time in seconds. The granulepos is + * interpreted in the context of a given theora_state handle, and gives + * the end time of a frame's presentation as used in Ogg mux ordering. + * + * \param th A previously initialized theora_state handle (encode or decode) + * \param granulepos The granulepos to convert. + * \returns The absolute time in seconds corresponding to \a granulepos. + * This is the "end time" for the frame, or the latest time it should + * be displayed. + * It is not the presentation time. + * \retval -1. The given granulepos is undefined (i.e. negative), or + * \retval -1. The function has been disabled because floating + * point support is not available. + */ +extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos); + +/** + * Initialize a theora_info structure. All values within the given theora_info + * structure are initialized, and space is allocated within libtheora for + * internal codec setup data. + * \param c A theora_info struct to initialize. + */ +extern void theora_info_init(theora_info *c); + +/** + * Clear a theora_info structure. All values within the given theora_info + * structure are cleared, and associated internal codec setup data is freed. + * \param c A theora_info struct to initialize. + */ +extern void theora_info_clear(theora_info *c); + +/** + * Free all internal data associated with a theora_state handle. + * \param t A theora_state handle. + */ +extern void theora_clear(theora_state *t); + +/** + * Initialize an allocated theora_comment structure + * \param tc An allocated theora_comment structure + **/ +extern void theora_comment_init(theora_comment *tc); + +/** + * Add a comment to an initialized theora_comment structure + * \param tc A previously initialized theora comment structure + * \param comment A null-terminated string encoding the comment in the form + * "TAG=the value" + * + * Neither theora_comment_add() nor theora_comment_add_tag() support + * comments containing null values, although the bitstream format + * supports this. To add such comments you will need to manipulate + * the theora_comment structure directly. + **/ + +extern void theora_comment_add(theora_comment *tc, char *comment); + +/** + * Add a comment to an initialized theora_comment structure. + * \param tc A previously initialized theora comment structure + * \param tag A null-terminated string containing the tag + * associated with the comment. + * \param value The corresponding value as a null-terminated string + * + * Neither theora_comment_add() nor theora_comment_add_tag() support + * comments containing null values, although the bitstream format + * supports this. To add such comments you will need to manipulate + * the theora_comment structure directly. + **/ +extern void theora_comment_add_tag(theora_comment *tc, + char *tag, char *value); + +/** + * Look up a comment value by tag. + * \param tc Tn initialized theora_comment structure + * \param tag The tag to look up + * \param count The instance of the tag. The same tag can appear multiple + * times, each with a distinct and ordered value, so an index + * is required to retrieve them all. + * \returns A pointer to the queried tag's value + * \retval NULL No matching tag is found + * + * \note Use theora_comment_query_count() to get the legal range for the + * count parameter. + **/ + +extern char *theora_comment_query(theora_comment *tc, char *tag, int count); + +/** Look up the number of instances of a tag. + * \param tc An initialized theora_comment structure + * \param tag The tag to look up + * \returns The number on instances of a particular tag. + * + * Call this first when querying for a specific tag and then interate + * over the number of instances with separate calls to + * theora_comment_query() to retrieve all instances in order. + **/ +extern int theora_comment_query_count(theora_comment *tc, char *tag); + +/** + * Clear an allocated theora_comment struct so that it can be freed. + * \param tc An allocated theora_comment structure. + **/ +extern void theora_comment_clear(theora_comment *tc); + +/**Encoder control function. + * This is used to provide advanced control the encoding process. + * \param th A #theora_state handle. + * \param req The control code to process. + * See \ref encctlcodes_old "the list of available + * control codes" for details. + * \param buf The parameters for this control code. + * \param buf_sz The size of the parameter buffer.*/ +extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz); + +/* @} */ /* end oldfuncs doxygen group */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _O_THEORA_H_ */ diff --git a/thirdparty/libtheora/theora/theoradec.h b/thirdparty/libtheora/theora/theoradec.h new file mode 100644 index 0000000000..b20f0e3a64 --- /dev/null +++ b/thirdparty/libtheora/theora/theoradec.h @@ -0,0 +1,325 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ + + ********************************************************************/ + +/**\file + * The libtheoradec C decoding API.*/ + +#if !defined(_O_THEORA_THEORADEC_H_) +# define _O_THEORA_THEORADEC_H_ (1) +# include +# include +# include "codec.h" + +#if defined(__cplusplus) +extern "C" { +#endif + + + +/**\name th_decode_ctl() codes + * \anchor decctlcodes + * These are the available request codes for th_decode_ctl(). + * By convention, these are odd, to distinguish them from the + * \ref encctlcodes "encoder control codes". + * Keep any experimental or vendor-specific values above \c 0x8000.*/ +/*@{*/ +/**Gets the maximum post-processing level. + * The decoder supports a post-processing filter that can improve + * the appearance of the decoded images. This returns the highest + * level setting for this post-processor, corresponding to maximum + * improvement and computational expense. + * + * \param[out] _buf int: The maximum post-processing level. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_DECCTL_GET_PPLEVEL_MAX (1) +/**Sets the post-processing level. + * By default, post-processing is disabled. + * + * Sets the level of post-processing to use when decoding the + * compressed stream. This must be a value between zero (off) + * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX. + * + * \param[in] _buf int: The new post-processing level. + * 0 to disable; larger values use more CPU. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * post-processing level is out of bounds. + * The maximum post-processing level may be + * implementation-specific, and can be obtained via + * #TH_DECCTL_GET_PPLEVEL_MAX. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_DECCTL_SET_PPLEVEL (3) +/**Sets the granule position. + * Call this after a seek, before decoding the first frame, to ensure that the + * proper granule position is returned for all subsequent frames. + * If you track timestamps yourself and do not use the granule position + * returned by the decoder, then you need not call this function. + * + * \param[in] _buf ogg_int64_t: The granule position of the next + * frame. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_int64_t), or the + * granule position is negative.*/ +#define TH_DECCTL_SET_GRANPOS (5) +/**Sets the striped decode callback function. + * If set, this function will be called as each piece of a frame is fully + * decoded in th_decode_packetin(). + * You can pass in a #th_stripe_callback with + * th_stripe_callback#stripe_decoded set to NULL to disable the + * callbacks at any point. + * Enabling striped decode does not prevent you from calling + * th_decode_ycbcr_out() after the frame is fully decoded. + * + * \param[in] _buf #th_stripe_callback: The callback parameters. + * \retval TH_EFAULT \a _dec_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not + * sizeof(th_stripe_callback).*/ +#define TH_DECCTL_SET_STRIPE_CB (7) + +/**Enables telemetry and sets the macroblock display mode */ +#define TH_DECCTL_SET_TELEMETRY_MBMODE (9) +/**Enables telemetry and sets the motion vector display mode */ +#define TH_DECCTL_SET_TELEMETRY_MV (11) +/**Enables telemetry and sets the adaptive quantization display mode */ +#define TH_DECCTL_SET_TELEMETRY_QI (13) +/**Enables telemetry and sets the bitstream breakdown visualization mode */ +#define TH_DECCTL_SET_TELEMETRY_BITS (15) +/*@}*/ + + + +/**A callback function for striped decode. + * This is a function pointer to an application-provided function that will be + * called each time a section of the image is fully decoded in + * th_decode_packetin(). + * This allows the application to process the section immediately, while it is + * still in cache. + * Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily + * decrease with each call until it reaches 0, at which point the full frame + * is decoded. + * The number of fragment rows made available in each call depends on the pixel + * format and the number of post-processing filters enabled, and may not even + * be constant for the entire frame. + * If a non-NULL \a _granpos pointer is passed to + * th_decode_packetin(), the granule position for the frame will be stored + * in it before the first callback is made. + * If an entire frame is dropped (a 0-byte packet), then no callbacks will be + * made at all for that frame. + * \param _ctx An application-provided context pointer. + * \param _buf The image buffer for the decoded frame. + * \param _yfrag0 The Y coordinate of the first row of 8x8 fragments + * decoded. + * Multiply this by 8 to obtain the pixel row number in the + * luma plane. + * If the chroma planes are subsampled in the Y direction, + * this will always be divisible by two. + * \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past + * the newly decoded section. + * If the chroma planes are subsampled in the Y direction, + * this will always be divisible by two. + * I.e., this section contains fragment rows + * \a _yfrag0 ...\a _yfrag_end -1.*/ +typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf, + int _yfrag0,int _yfrag_end); + +/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/ +typedef struct{ + /**An application-provided context pointer. + * This will be passed back verbatim to the application.*/ + void *ctx; + /**The callback function pointer.*/ + th_stripe_decoded_func stripe_decoded; +}th_stripe_callback; + + + +/**\name Decoder state + The following data structures are opaque, and their contents are not + publicly defined by this API. + Referring to their internals directly is unsupported, and may break without + warning.*/ +/*@{*/ +/**The decoder context.*/ +typedef struct th_dec_ctx th_dec_ctx; +/**Setup information. + This contains auxiliary information (Huffman tables and quantization + parameters) decoded from the setup header by th_decode_headerin() to be + passed to th_decode_alloc(). + It can be re-used to initialize any number of decoders, and can be freed + via th_setup_free() at any time.*/ +typedef struct th_setup_info th_setup_info; +/*@}*/ + + + +/**\defgroup decfuncs Functions for Decoding*/ +/*@{*/ +/**\name Functions for decoding + * You must link to libtheoradec if you use any of the + * functions in this section. + * + * The functions are listed in the order they are used in a typical decode. + * The basic steps are: + * - Parse the header packets by repeatedly calling th_decode_headerin(). + * - Allocate a #th_dec_ctx handle with th_decode_alloc(). + * - Call th_setup_free() to free any memory used for codec setup + * information. + * - Perform any additional decoder configuration with th_decode_ctl(). + * - For each video data packet: + * - Submit the packet to the decoder via th_decode_packetin(). + * - Retrieve the uncompressed video data via th_decode_ycbcr_out(). + * - Call th_decode_free() to release all decoder memory.*/ +/*@{*/ +/**Decodes the header packets of a Theora stream. + * This should be called on the initial packets of the stream, in succession, + * until it returns 0, indicating that all headers have been + * processed, or an error is encountered. + * At least three header packets are required, and additional optional header + * packets may follow. + * This can be used on the first packet of any logical stream to determine if + * that stream is a Theora stream. + * \param _info A #th_info structure to fill in. + * This must have been previously initialized with + * th_info_init(). + * The application may immediately begin using the contents of + * this structure after the first header is decoded, though it + * must continue to be passed in on all subsequent calls. + * \param _tc A #th_comment structure to fill in. + * The application may immediately begin using the contents of + * this structure after the second header is decoded, though it + * must continue to be passed in on all subsequent calls. + * \param _setup Returns a pointer to additional, private setup information + * needed by the decoder. + * The contents of this pointer must be initialized to + * NULL on the first call, and the returned value must + * continue to be passed in on all subsequent calls. + * \param _op An ogg_packet structure which contains one of the + * initial packets of an Ogg logical stream. + * \return A positive value indicates that a Theora header was successfully + * processed. + * \retval 0 The first video data packet was encountered after all + * required header packets were parsed. + * The packet just passed in on this call should be saved + * and fed to th_decode_packetin() to begin decoding + * video data. + * \retval TH_EFAULT One of \a _info, \a _tc, or \a _setup was + * NULL. + * \retval TH_EBADHEADER \a _op was NULL, the packet was not the next + * header packet in the expected sequence, or the format + * of the header data was invalid. + * \retval TH_EVERSION The packet data was a Theora info header, but for a + * bitstream version not decodable with this version of + * libtheoradec. + * \retval TH_ENOTFORMAT The packet was not a Theora header. + */ +extern int th_decode_headerin(th_info *_info,th_comment *_tc, + th_setup_info **_setup,ogg_packet *_op); +/**Allocates a decoder instance. + * + * Security Warning: The Theora format supports very large frame sizes, + * potentially even larger than the address space of a 32-bit machine, and + * creating a decoder context allocates the space for several frames of data. + * If the allocation fails here, your program will crash, possibly at some + * future point because the OS kernel returned a valid memory range and will + * only fail when it tries to map the pages in it the first time they are + * used. + * Even if it succeeds, you may experience a denial of service if the frame + * size is large enough to cause excessive paging. + * If you are integrating libtheora in a larger application where such things + * are undesirable, it is highly recommended that you check the frame size in + * \a _info before calling this function and refuse to decode streams where it + * is larger than some reasonable maximum. + * libtheora will not check this for you, because there may be machines that + * can handle such streams and applications that wish to. + * \param _info A #th_info struct filled via th_decode_headerin(). + * \param _setup A #th_setup_info handle returned via + * th_decode_headerin(). + * \return The initialized #th_dec_ctx handle. + * \retval NULL If the decoding parameters were invalid.*/ +extern th_dec_ctx *th_decode_alloc(const th_info *_info, + const th_setup_info *_setup); +/**Releases all storage used for the decoder setup information. + * This should be called after you no longer want to create any decoders for + * a stream whose headers you have parsed with th_decode_headerin(). + * \param _setup The setup information to free. + * This can safely be NULL.*/ +extern void th_setup_free(th_setup_info *_setup); +/**Decoder control function. + * This is used to provide advanced control of the decoding process. + * \param _dec A #th_dec_ctx handle. + * \param _req The control code to process. + * See \ref decctlcodes "the list of available control codes" + * for details. + * \param _buf The parameters for this control code. + * \param _buf_sz The size of the parameter buffer.*/ +extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, + size_t _buf_sz); +/**Submits a packet containing encoded video data to the decoder. + * \param _dec A #th_dec_ctx handle. + * \param _op An ogg_packet containing encoded video data. + * \param _granpos Returns the granule position of the decoded packet. + * If non-NULL, the granule position for this specific + * packet is stored in this location. + * This is computed incrementally from previously decoded + * packets. + * After a seek, the correct granule position must be set via + * #TH_DECCTL_SET_GRANPOS for this to work properly. + * \retval 0 Success. + * A new decoded frame can be retrieved by calling + * th_decode_ycbcr_out(). + * \retval TH_DUPFRAME The packet represented a dropped (0-byte) frame. + * The player can skip the call to th_decode_ycbcr_out(), + * as the contents of the decoded frame buffer have not + * changed. + * \retval TH_EFAULT \a _dec or \a _op was NULL. + * \retval TH_EBADPACKET \a _op does not contain encoded video data. + * \retval TH_EIMPL The video data uses bitstream features which this + * library does not support.*/ +extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, + ogg_int64_t *_granpos); +/**Outputs the next available frame of decoded Y'CbCr data. + * If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB, + * then the application does not need to call this function. + * \param _dec A #th_dec_ctx handle. + * \param _ycbcr A video buffer structure to fill in. + * libtheoradec will fill in all the members of this + * structure, including the pointers to the uncompressed video + * data. + * The memory for this video data is owned by + * libtheoradec. + * It may be freed or overwritten without notification when + * subsequent frames are decoded. + * \retval 0 Success + * \retval TH_EFAULT \a _dec or \a _ycbcr was NULL. + */ +extern int th_decode_ycbcr_out(th_dec_ctx *_dec, + th_ycbcr_buffer _ycbcr); +/**Frees an allocated decoder instance. + * \param _dec A #th_dec_ctx handle.*/ +extern void th_decode_free(th_dec_ctx *_dec); +/*@}*/ +/*@}*/ + + + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/thirdparty/libtheora/theora/theoraenc.h b/thirdparty/libtheora/theora/theoraenc.h new file mode 100644 index 0000000000..fdf2ab21e2 --- /dev/null +++ b/thirdparty/libtheora/theora/theoraenc.h @@ -0,0 +1,486 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $ + + ********************************************************************/ + +/**\file + * The libtheoraenc C encoding API.*/ + +#if !defined(_O_THEORA_THEORAENC_H_) +# define _O_THEORA_THEORAENC_H_ (1) +# include +# include +# include "codec.h" + +#if defined(__cplusplus) +extern "C" { +#endif + + + +/**\name th_encode_ctl() codes + * \anchor encctlcodes + * These are the available request codes for th_encode_ctl(). + * By convention, these are even, to distinguish them from the + * \ref decctlcodes "decoder control codes". + * Keep any experimental or vendor-specific values above \c 0x8000.*/ +/*@{*/ +/**Sets the Huffman tables to use. + * The tables are copied, not stored by reference, so they can be freed after + * this call. + * NULL may be specified to revert to the default tables. + * + * \param[in] _buf #th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS] + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL Encoding has already begun or one or more of the given + * tables is not full or prefix-free, \a _buf is + * NULL and \a _buf_sz is not zero, or \a _buf is + * non-NULL and \a _buf_sz is not + * sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_HUFFMAN_CODES (0) +/**Sets the quantization parameters to use. + * The parameters are copied, not stored by reference, so they can be freed + * after this call. + * NULL may be specified to revert to the default parameters. + * + * \param[in] _buf #th_quant_info + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL Encoding has already begun, \a _buf is + * NULL and \a _buf_sz is not zero, + * or \a _buf is non-NULL and + * \a _buf_sz is not sizeof(#th_quant_info). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUANT_PARAMS (2) +/**Sets the maximum distance between key frames. + * This can be changed during an encode, but will be bounded by + * 1<. + * If it is set before encoding begins, th_info#keyframe_granule_shift will + * be enlarged appropriately. + * + * \param[in] _buf ogg_uint32_t: The maximum distance between key + * frames. + * \param[out] _buf ogg_uint32_t: The actual maximum distance set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(ogg_uint32_t). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) +/**Disables any encoder features that would prevent lossless transcoding back + * to VP3. + * This primarily means disabling block-adaptive quantization and always coding + * all four luma blocks in a macro block when 4MV is used. + * It also includes using the VP3 quantization tables and Huffman codes; if you + * set them explicitly after calling this function, the resulting stream will + * not be VP3-compatible. + * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source + * material, or when using a picture region smaller than the full frame (e.g. + * a non-multiple-of-16 width or height), then non-VP3 bitstream features will + * still be disabled, but the stream will still not be VP3-compatible, as VP3 + * was not capable of encoding such formats. + * If you call this after encoding has already begun, then the quantization + * tables and codebooks cannot be changed, but the frame-level features will + * be enabled or disabled as requested. + * + * \param[in] _buf int: a non-zero value to enable VP3 compatibility, + * or 0 to disable it (the default). + * \param[out] _buf int: 1 if all bitstream features required for + * VP3-compatibility could be set, and 0 otherwise. + * The latter will be returned if the pixel format is not + * 4:2:0, the picture region is smaller than the full frame, + * or if encoding has begun, preventing the quantization + * tables and codebooks from being set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_VP3_COMPATIBLE (10) +/**Gets the maximum speed level. + * Higher speed levels favor quicker encoding over better quality per bit. + * Depending on the encoding mode, and the internal algorithms used, quality + * may actually improve, but in this case bitrate will also likely increase. + * In any case, overall rate/distortion performance will probably decrease. + * The maximum value, and the meaning of each value, may change depending on + * the current encoding mode (VBR vs. constant quality, etc.). + * + * \param[out] _buf int: The maximum encoding speed level. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL_MAX (12) +/**Sets the speed level. + * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL. + * + * \param[in] _buf int: The new encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * encoding speed level is out of bounds. + * The maximum encoding speed level may be + * implementation- and encoding mode-specific, and can be + * obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_SPLEVEL (14) +/**Gets the current speed level. + * The default speed level may vary according to encoder implementation, but if + * this control code is not supported (it returns #TH_EIMPL), the default may + * be assumed to be the slowest available speed (0). + * The maximum encoding speed level may be implementation- and encoding + * mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX. + * + * \param[out] _buf int: The current encoding speed level. + * 0 is slowest, larger values use less CPU. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int). + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_GET_SPLEVEL (16) +/**Sets the number of duplicates of the next frame to produce. + * Although libtheora can encode duplicate frames very cheaply, it costs some + * amount of CPU to detect them, and a run of duplicates cannot span a + * keyframe boundary. + * This control code tells the encoder to produce the specified number of extra + * duplicates of the next frame. + * This allows the encoder to make smarter keyframe placement decisions and + * rate control decisions, and reduces CPU usage as well, when compared to + * just submitting the same frame for encoding multiple times. + * This setting only applies to the next frame submitted for encoding. + * You MUST call th_encode_packetout() repeatedly until it returns 0, or the + * extra duplicate frames will be lost. + * + * \param[in] _buf int: The number of duplicates to produce. + * If this is negative or zero, no duplicates will be produced. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or the + * number of duplicates is greater than or equal to the + * maximum keyframe interval. + * In the latter case, NO duplicate frames will be produced. + * You must ensure that the maximum keyframe interval is set + * larger than the maximum number of duplicates you will + * ever wish to insert prior to encoding. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_DUP_COUNT (18) +/**Modifies the default bitrate management behavior. + * Use to allow or disallow frame dropping, and to enable or disable capping + * bit reservoir overflows and underflows. + * See \ref encctlcodes "the list of available flags". + * The flags are set by default to + * #TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW. + * + * \param[in] _buf int: Any combination of + * \ref ratectlflags "the available flags": + * - #TH_RATECTL_DROP_FRAMES: Enable frame dropping. + * - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later + * use. + * - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls + * later. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int) or rate control + * is not enabled. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_RATE_FLAGS (20) +/**Sets the size of the bitrate management bit reservoir as a function + * of number of frames. + * The reservoir size affects how quickly bitrate management reacts to + * instantaneous changes in the video complexity. + * Larger reservoirs react more slowly, and provide better overall quality, but + * require more buffering by a client, adding more latency to live streams. + * By default, libtheora sets the reservoir to the maximum distance between + * keyframes, subject to a minimum and maximum limit. + * This call may be used to increase or decrease the reservoir, increasing or + * decreasing the allowed temporary variance in bitrate. + * An implementation may impose some limits on the size of a reservoir it can + * handle, in which case the actual reservoir size may not be exactly what was + * requested. + * The actual value set will be returned. + * + * \param[in] _buf int: Requested size of the reservoir measured in + * frames. + * \param[out] _buf int: The actual size of the reservoir set. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(int), or rate control + * is not enabled. The buffer has an implementation + * defined minimum and maximum size and the value in _buf + * will be adjusted to match the actual value set. + * \retval TH_EIMPL Not supported by this implementation in the current + * encoding mode.*/ +#define TH_ENCCTL_SET_RATE_BUFFER (22) +/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics. + * Pass 1 mode must be enabled before the first frame is encoded, and a target + * bitrate must have already been specified to the encoder. + * Although this does not have to be the exact rate that will be used in the + * second pass, closer values may produce better results. + * The first call returns the size of the two-pass header data, along with some + * placeholder content, and sets the encoder into pass 1 mode implicitly. + * This call sets the encoder to pass 1 mode implicitly. + * Then, a subsequent call must be made after each call to + * th_encode_ycbcr_in() to retrieve the metrics for that frame. + * An additional, final call must be made to retrieve the summary data, + * containing such information as the total number of frames, etc. + * This must be stored in place of the placeholder data that was returned + * in the first call, before the frame metrics data. + * All of this data must be presented back to the encoder during pass 2 using + * #TH_ENCCTL_2PASS_IN. + * + * \param[out] char *_buf: Returns a pointer to internal storage + * containing the two pass metrics data. + * This storage is only valid until the next call, or until the + * encoder context is freed, and must be copied by the + * application. + * \retval >=0 The number of bytes of metric data available in the + * returned buffer. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL \a _buf_sz is not sizeof(char *), no target + * bitrate has been set, or the first call was made after + * the first frame was submitted for encoding. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_2PASS_OUT (24) +/**Submits two-pass encoding metric data collected the first encoding pass to + * the second pass. + * The first call must be made before the first frame is encoded, and a target + * bitrate must have already been specified to the encoder. + * It sets the encoder to pass 2 mode implicitly; this cannot be disabled. + * The encoder may require reading data from some or all of the frames in + * advance, depending on, e.g., the reservoir size used in the second pass. + * You must call this function repeatedly before each frame to provide data + * until either a) it fails to consume all of the data presented or b) all of + * the pass 1 data has been consumed. + * In the first case, you must save the remaining data to be presented after + * the next frame. + * You can call this function with a NULL argument to get an upper bound on + * the number of bytes that will be required before the next frame. + * + * When pass 2 is first enabled, the default bit reservoir is set to the entire + * file; this gives maximum flexibility but can lead to very high peak rates. + * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER + * (e.g., to set it to the keyframe interval for non-live streaming), however, + * you may then need to provide more data before the next frame. + * + * \param[in] _buf char[]: A buffer containing the data returned by + * #TH_ENCCTL_2PASS_OUT in pass 1. + * You may pass NULL for \a _buf to return an upper + * bound on the number of additional bytes needed before the + * next frame. + * The summary data returned at the end of pass 1 must be at + * the head of the buffer on the first call with a + * non-NULL \a _buf, and the placeholder data + * returned at the start of pass 1 should be omitted. + * After each call you should advance this buffer by the number + * of bytes consumed. + * \retval >0 The number of bytes of metric data required/consumed. + * \retval 0 No more data is required before the next frame. + * \retval TH_EFAULT \a _enc_ctx is NULL. + * \retval TH_EINVAL No target bitrate has been set, or the first call was + * made after the first frame was submitted for + * encoding. + * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible + * implementation of this library. + * \retval TH_EBADHEADER The data was invalid; this may be returned when + * attempting to read an aborted pass 1 file that still + * has the placeholder data in place of the summary + * data. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_2PASS_IN (26) +/**Sets the current encoding quality. + * This is only valid so long as no bitrate has been specified, either through + * the #th_info struct used to initialize the encoder or through + * #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future + * version). + * If it is set before the headers are emitted, the target quality encoded in + * them will be updated. + * + * \param[in] _buf int: The new target quality, in the range 0...63, + * inclusive. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL A target bitrate has already been specified, or the + * quality index was not in the range 0...63. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_QUALITY (28) +/**Sets the current encoding bitrate. + * Once a bitrate is set, the encoder must use a rate-controlled mode for all + * future frames (this restriction may be relaxed in a future version). + * If it is set before the headers are emitted, the target bitrate encoded in + * them will be updated. + * Due to the buffer delay, the exact bitrate of each section of the encode is + * not guaranteed. + * The encoder may have already used more bits than allowed for the frames it + * has encoded, expecting to make them up in future frames, or it may have + * used fewer, holding the excess in reserve. + * The exact transition between the two bitrates is not well-defined by this + * API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS. + * After a number of frames equal to the buffer delay, one may expect further + * output to average at the target bitrate. + * + * \param[in] _buf long: The new target bitrate, in bits per second. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc_ctx or \a _buf is NULL. + * \retval TH_EINVAL The target bitrate was not positive. + * \retval TH_EIMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_BITRATE (30) + +/*@}*/ + + +/**\name TH_ENCCTL_SET_RATE_FLAGS flags + * \anchor ratectlflags + * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/ +/*@{*/ +/**Drop frames to keep within bitrate buffer constraints. + * This can have a severe impact on quality, but is the only way to ensure that + * bitrate targets are met at low rates during sudden bursts of activity.*/ +#define TH_RATECTL_DROP_FRAMES (0x1) +/**Ignore bitrate buffer overflows. + * If the encoder uses so few bits that the reservoir of available bits + * overflows, ignore the excess. + * The encoder will not try to use these extra bits in future frames. + * At high rates this may cause the result to be undersized, but allows a + * client to play the stream using a finite buffer; it should normally be + * enabled.*/ +#define TH_RATECTL_CAP_OVERFLOW (0x2) +/**Ignore bitrate buffer underflows. + * If the encoder uses so many bits that the reservoir of available bits + * underflows, ignore the deficit. + * The encoder will not try to make up these extra bits in future frames. + * At low rates this may cause the result to be oversized; it should normally + * be disabled.*/ +#define TH_RATECTL_CAP_UNDERFLOW (0x4) +/*@}*/ + + + +/**The quantization parameters used by VP3.*/ +extern const th_quant_info TH_VP31_QUANT_INFO; + +/**The Huffman tables used by VP3.*/ +extern const th_huff_code + TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]; + + + +/**\name Encoder state + The following data structure is opaque, and its contents are not publicly + defined by this API. + Referring to its internals directly is unsupported, and may break without + warning.*/ +/*@{*/ +/**The encoder context.*/ +typedef struct th_enc_ctx th_enc_ctx; +/*@}*/ + + + +/**\defgroup encfuncs Functions for Encoding*/ +/*@{*/ +/**\name Functions for encoding + * You must link to libtheoraenc and libtheoradec + * if you use any of the functions in this section. + * + * The functions are listed in the order they are used in a typical encode. + * The basic steps are: + * - Fill in a #th_info structure with details on the format of the video you + * wish to encode. + * - Allocate a #th_enc_ctx handle with th_encode_alloc(). + * - Perform any additional encoder configuration required with + * th_encode_ctl(). + * - Repeatedly call th_encode_flushheader() to retrieve all the header + * packets. + * - For each uncompressed frame: + * - Submit the uncompressed frame via th_encode_ycbcr_in() + * - Repeatedly call th_encode_packetout() to retrieve any video data packets + * that are ready. + * - Call th_encode_free() to release all encoder memory.*/ +/*@{*/ +/**Allocates an encoder instance. + * \param _info A #th_info struct filled with the desired encoding parameters. + * \return The initialized #th_enc_ctx handle. + * \retval NULL If the encoding parameters were invalid.*/ +extern th_enc_ctx *th_encode_alloc(const th_info *_info); +/**Encoder control function. + * This is used to provide advanced control the encoding process. + * \param _enc A #th_enc_ctx handle. + * \param _req The control code to process. + * See \ref encctlcodes "the list of available control codes" + * for details. + * \param _buf The parameters for this control code. + * \param _buf_sz The size of the parameter buffer.*/ +extern int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz); +/**Outputs the next header packet. + * This should be called repeatedly after encoder initialization until it + * returns 0 in order to get all of the header packets, in order, before + * encoding actual video data. + * \param _enc A #th_enc_ctx handle. + * \param _comments The metadata to place in the comment header, when it is + * encoded. + * \param _op An ogg_packet structure to fill. + * All of the elements of this structure will be set, + * including a pointer to the header data. + * The memory for the header data is owned by + * libtheoraenc, and may be invalidated when the + * next encoder function is called. + * \return A positive value indicates that a header packet was successfully + * produced. + * \retval 0 No packet was produced, and no more header packets remain. + * \retval TH_EFAULT \a _enc, \a _comments, or \a _op was NULL.*/ +extern int th_encode_flushheader(th_enc_ctx *_enc, + th_comment *_comments,ogg_packet *_op); +/**Submits an uncompressed frame to the encoder. + * \param _enc A #th_enc_ctx handle. + * \param _ycbcr A buffer of Y'CbCr data to encode. + * \retval 0 Success. + * \retval TH_EFAULT \a _enc or \a _ycbcr is NULL. + * \retval TH_EINVAL The buffer size does not match the frame size the encoder + * was initialized with, or encoding has already + * completed.*/ +extern int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr); +/**Retrieves encoded video data packets. + * This should be called repeatedly after each frame is submitted to flush any + * encoded packets, until it returns 0. + * The encoder will not buffer these packets as subsequent frames are + * compressed, so a failure to do so will result in lost video data. + * \note Currently the encoder operates in a one-frame-in, one-packet-out + * manner. + * However, this may be changed in the future. + * \param _enc A #th_enc_ctx handle. + * \param _last Set this flag to a non-zero value if no more uncompressed + * frames will be submitted. + * This ensures that a proper EOS flag is set on the last packet. + * \param _op An ogg_packet structure to fill. + * All of the elements of this structure will be set, including a + * pointer to the video data. + * The memory for the video data is owned by + * libtheoraenc, and may be invalidated when the next + * encoder function is called. + * \return A positive value indicates that a video data packet was successfully + * produced. + * \retval 0 No packet was produced, and no more encoded video data + * remains. + * \retval TH_EFAULT \a _enc or \a _op was NULL.*/ +extern int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op); +/**Frees an allocated encoder instance. + * \param _enc A #th_enc_ctx handle.*/ +extern void th_encode_free(th_enc_ctx *_enc); +/*@}*/ +/*@}*/ + + + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/thirdparty/libtheora/tokenize.c b/thirdparty/libtheora/tokenize.c new file mode 100644 index 0000000000..60574c3594 --- /dev/null +++ b/thirdparty/libtheora/tokenize.c @@ -0,0 +1,1072 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: tokenize.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ +#include +#include +#include "encint.h" + + + +static int oc_make_eob_token(int _run_count){ + if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1; + else{ + int cat; + cat=OC_ILOGNZ_32(_run_count)-3; + cat=OC_MINI(cat,3); + return OC_DCT_REPEAT_RUN0_TOKEN+cat; + } +} + +static int oc_make_eob_token_full(int _run_count,int *_eb){ + if(_run_count<4){ + *_eb=0; + return OC_DCT_EOB1_TOKEN+_run_count-1; + } + else{ + int cat; + cat=OC_ILOGNZ_32(_run_count)-3; + cat=OC_MINI(cat,3); + *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat); + return OC_DCT_REPEAT_RUN0_TOKEN+cat; + } +} + +/*Returns the number of blocks ended by an EOB token.*/ +static int oc_decode_eob_token(int _token,int _eb){ + return (0x20820C41U>>_token*5&0x1F)+_eb; +} + +/*TODO: This is now only used during DCT tokenization, and never for runs; it + should be simplified.*/ +static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){ + int neg; + int zero_run; + int token; + int eb; + neg=_val<0; + _val=abs(_val); + zero_run=_zzj-_zzi; + if(zero_run>0){ + int adj; + /*Implement a minor restriction on stack 1 so that we know during DC fixups + that extending a dctrun token from stack 1 will never overflow.*/ + adj=_zzi!=1; + if(_val<2&&zero_run<17+adj){ + if(zero_run<6){ + token=OC_DCT_RUN_CAT1A+zero_run-1; + eb=neg; + } + else if(zero_run<10){ + token=OC_DCT_RUN_CAT1B; + eb=zero_run-6+(neg<<2); + } + else{ + token=OC_DCT_RUN_CAT1C; + eb=zero_run-10+(neg<<3); + } + } + else if(_val<4&&zero_run<3+adj){ + if(zero_run<2){ + token=OC_DCT_RUN_CAT2A; + eb=_val-2+(neg<<1); + } + else{ + token=OC_DCT_RUN_CAT2B; + eb=zero_run-2+(_val-2<<1)+(neg<<2); + } + } + else{ + if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN; + else token=OC_DCT_ZRL_TOKEN; + eb=zero_run-1; + } + } + else if(_val<3){ + token=OC_ONE_TOKEN+(_val-1<<1)+neg; + eb=0; + } + else if(_val<7){ + token=OC_DCT_VAL_CAT2+_val-3; + eb=neg; + } + else if(_val<9){ + token=OC_DCT_VAL_CAT3; + eb=_val-7+(neg<<1); + } + else if(_val<13){ + token=OC_DCT_VAL_CAT4; + eb=_val-9+(neg<<2); + } + else if(_val<21){ + token=OC_DCT_VAL_CAT5; + eb=_val-13+(neg<<3); + } + else if(_val<37){ + token=OC_DCT_VAL_CAT6; + eb=_val-21+(neg<<4); + } + else if(_val<69){ + token=OC_DCT_VAL_CAT7; + eb=_val-37+(neg<<5); + } + else{ + token=OC_DCT_VAL_CAT8; + eb=_val-69+(neg<<9); + } + *_eb=eb; + return token; +} + +/*Token logging to allow a few fragments of efficient rollback. + Late SKIP analysis is tied up in the tokenization process, so we need to be + able to undo a fragment's tokens on a whim.*/ + +static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ + 0,16,16,16,16,16,32,32, + 32,32,32,32,32,32,32,48, + 48,48,48,48,48,48,48,48, + 48,48,48,48,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64 +}; + +static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){ + return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits + +OC_DCT_TOKEN_EXTRA_BITS[_token]; +} + +static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc, + oc_token_checkpoint *_cp,int _pli,int _zzi){ + _cp->pli=_pli; + _cp->zzi=_zzi; + _cp->eob_run=_enc->eob_run[_pli][_zzi]; + _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi]; +} + +void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, + const oc_token_checkpoint *_stack,int _n){ + int i; + for(i=_n;i-->0;){ + int pli; + int zzi; + pli=_stack[i].pli; + zzi=_stack[i].zzi; + _enc->eob_run[pli][zzi]=_stack[i].eob_run; + _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens; + } +} + +static void oc_enc_token_log(oc_enc_ctx *_enc, + int _pli,int _zzi,int _token,int _eb){ + ptrdiff_t ti; + ti=_enc->ndct_tokens[_pli][_zzi]++; + _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token; + _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb; +} + +static void oc_enc_eob_log(oc_enc_ctx *_enc, + int _pli,int _zzi,int _run_count){ + int token; + int eb; + token=oc_make_eob_token_full(_run_count,&eb); + oc_enc_token_log(_enc,_pli,_zzi,token,eb); +} + + +void oc_enc_tokenize_start(oc_enc_ctx *_enc){ + memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens)); + memset(_enc->eob_run,0,sizeof(_enc->eob_run)); + memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs)); + memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last)); +} + +typedef struct oc_quant_token oc_quant_token; + +/*A single node in the Viterbi trellis. + We maintain up to 2 of these per coefficient: + - A token to code if the value is zero (EOB, zero run, or combo token). + - A token to code if the value is not zero (DCT value token).*/ +struct oc_quant_token{ + unsigned char next; + signed char token; + ogg_int16_t eb; + ogg_uint32_t cost; + int bits; + int qc; +}; + +/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then + dequantizes and de-zig-zags the result. + The DC coefficient is not preserved; it should be restored by the caller.*/ +int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, + ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, + int _zzi,oc_token_checkpoint **_stack,int _acmin){ + oc_token_checkpoint *stack; + ogg_int64_t zflags; + ogg_int64_t nzflags; + ogg_int64_t best_flags; + ogg_uint32_t d2_accum[64]; + oc_quant_token tokens[64][2]; + ogg_uint16_t *eob_run; + const unsigned char *dct_fzig_zag; + ogg_uint32_t cost; + int bits; + int eob; + int token; + int eb; + int next; + int huffi; + int zzi; + int ti; + int zzj; + int qc; + huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1]; + eob_run=_enc->eob_run[_pli]; + memset(tokens[0],0,sizeof(tokens[0])); + best_flags=nzflags=0; + zflags=1; + d2_accum[0]=0; + zzj=64; + for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){ + ogg_int32_t lambda; + ogg_uint32_t best_cost; + int best_bits=best_bits; + int best_next=best_next; + int best_token=best_token; + int best_eb=best_eb; + int best_qc=best_qc; + int flush_bits; + ogg_uint32_t d2; + int dq; + int e; + int c; + int s; + int tj; + lambda=_enc->lambda; + qc=_qdct[zzi]; + s=-(qc<0); + qc=qc+s^s; + c=_dct[OC_FZIG_ZAG[zzi]]; + if(qc<=1){ + ogg_uint32_t sum_d2; + int nzeros; + int dc_reserve; + /*The hard case: try a zero run.*/ + if(!qc){ + /*Skip runs that are already quantized to zeros. + If we considered each zero coefficient in turn, we might + theoretically find a better way to partition long zero runs (e.g., + a run of > 17 zeros followed by a 1 might be better coded as a short + zero run followed by a combo token, rather than the longer zero + token followed by a 1 value token), but zeros are so common that + this becomes very computationally expensive (quadratic instead of + linear in the number of coefficients), for a marginal gain.*/ + while(zzi>1&&!_qdct[zzi-1])zzi--; + /*The distortion of coefficients originally quantized to zero is + treated as zero (since we'll never quantize them to anything else).*/ + d2=0; + } + else{ + c=c+s^s; + d2=c*(ogg_int32_t)c; + } + eob=eob_run[zzi]; + nzeros=zzj-zzi; + zzj&=63; + sum_d2=d2+d2_accum[zzj]; + d2_accum[zzi]=sum_d2; + flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; + /*We reserve 1 spot for combo run tokens that start in the 1st AC stack + to ensure they can be extended to include the DC coefficient if + necessary; this greatly simplifies stack-rewriting later on.*/ + dc_reserve=zzi+62>>6; + best_cost=0xFFFFFFFF; + for(;;){ + if(nzflags>>zzj&1){ + int cat; + int val; + int val_s; + int zzk; + int tk; + next=tokens[zzj][1].next; + tk=next&1; + zzk=next>>1; + /*Try a pure zero run to this point.*/ + cat=nzeros+55>>6; + token=OC_DCT_SHORT_ZRL_TOKEN+cat; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + d2=sum_d2-d2_accum[zzj]; + cost=d2+lambda*bits+tokens[zzj][1].cost; + if(cost<=best_cost){ + best_next=(zzj<<1)+1; + best_token=token; + best_eb=nzeros-1; + best_cost=cost; + best_bits=bits+tokens[zzj][1].bits; + best_qc=0; + } + if(nzeros<16+dc_reserve){ + val=_qdct[zzj]; + val_s=-(val<0); + val=val+val_s^val_s; + if(val<=2){ + /*Try a +/- 1 combo token.*/ + if(nzeros<6){ + token=OC_DCT_RUN_CAT1A+nzeros-1; + eb=-val_s; + } + else{ + cat=nzeros+54>>6; + token=OC_DCT_RUN_CAT1B+cat; + eb=(-val_s<>1; + token=OC_DCT_RUN_CAT2A+cat; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + val=2+((val+val_s^val_s)>2); + e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val; + d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; + cost=d2+lambda*bits+tokens[zzk][tk].cost; + if(cost<=best_cost){ + best_cost=cost; + best_bits=bits+tokens[zzk][tk].bits; + best_next=next; + best_token=token; + best_eb=(-val_s<<1+cat)+(val-2<>1); + best_qc=val+val_s^val_s; + } + } + } + /*zzj can't be coded as a zero, so stop trying to extend the run.*/ + if(!(zflags>>zzj&1))break; + } + /*We could try to consider _all_ potentially non-zero coefficients, but + if we already found a bunch of them not worth coding, it's fairly + unlikely they would now be worth coding from this position; skipping + them saves a lot of work.*/ + zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63; + if(zzj==0){ + /*We made it all the way to the end of the block; try an EOB token.*/ + if(eob<4095){ + bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1)) + -flush_bits; + } + else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN); + cost=sum_d2+bits*lambda; + /*If the best route so far is still a pure zero run to the end of the + block, force coding it as an EOB. + Even if it's not optimal for this block, it has a good chance of + getting combined with an EOB token from subsequent blocks, saving + bits overall.*/ + if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){ + best_next=0; + /*This token is just a marker; in reality we may not emit any + tokens, but update eob_run[] instead.*/ + best_token=OC_DCT_EOB1_TOKEN; + best_eb=0; + best_cost=cost; + best_bits=bits; + best_qc=0; + } + break; + } + nzeros=zzj-zzi; + } + tokens[zzi][0].next=(unsigned char)best_next; + tokens[zzi][0].token=(signed char)best_token; + tokens[zzi][0].eb=(ogg_int16_t)best_eb; + tokens[zzi][0].cost=best_cost; + tokens[zzi][0].bits=best_bits; + tokens[zzi][0].qc=best_qc; + zflags|=(ogg_int64_t)1<>zzj&1; + next=(zzj<<1)+tj; + tokens[zzi][1].next=(unsigned char)next; + tokens[zzi][1].token=(signed char)token; + tokens[zzi][1].eb=0; + tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost; + tokens[zzi][1].bits=bits+tokens[zzj][tj].bits; + tokens[zzi][1].qc=1+s^s; + nzflags|=(ogg_int64_t)1<0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; + if(qc<=2){ + e=2*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_TWO_TOKEN-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=OC_ONE_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_bits=bits; + best_cost=cost; + qc--; + } + best_eb=0; + } + else if(qc<=3){ + e=3*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT2; + best_eb=-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=OC_TWO_TOKEN-s; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=0; + best_bits=bits; + best_cost=cost; + qc--; + } + } + else if(qc<=6){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT2+qc-3; + best_eb=-s; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e-=dq; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_bits=bits; + best_cost=cost; + qc--; + } + } + else if(qc<=8){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT3; + best_eb=(-s<<1)+qc-7; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=6*dq-c; + d2=e*(ogg_int32_t)e; + token=OC_DCT_VAL_CAT2+3; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=-s; + best_bits=bits; + best_cost=cost; + qc=6; + } + } + else if(qc<=12){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT4; + best_eb=(-s<<2)+qc-9; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=8*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<1)+1; + best_bits=bits; + best_cost=cost; + qc=8; + } + } + else if(qc<=20){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT5; + best_eb=(-s<<3)+qc-13; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=12*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<2)+3; + best_bits=bits; + best_cost=cost; + qc=12; + } + } + else if(qc<=36){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT6; + best_eb=(-s<<4)+qc-21; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=20*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost<=best_cost){ + best_token=token; + best_eb=(-s<<3)+7; + best_bits=bits; + best_cost=cost; + qc=20; + } + } + else if(qc<=68){ + e=qc*dq-c; + d2=e*(ogg_int32_t)e; + best_token=OC_DCT_VAL_CAT7; + best_eb=(-s<<5)+qc-37; + best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); + best_cost=d2+lambda*best_bits; + e=36*dq-c; + d2=e*(ogg_int32_t)e; + token=best_token-1; + bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); + cost=d2+lambda*bits; + if(cost>zzj&1; + next=(zzj<<1)+tj; + tokens[zzi][1].next=(unsigned char)next; + tokens[zzi][1].token=(signed char)best_token; + tokens[zzi][1].eb=best_eb; + tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost; + tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits; + tokens[zzi][1].qc=qc+s^s; + nzflags|=(ogg_int64_t)1<state.opt_data.dct_fzig_zag; + zzi=1; + ti=best_flags>>1&1; + bits=tokens[zzi][ti].bits; + do{ + oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi); + eob=eob_run[zzi]; + if(tokens[zzi][ti].token=4095){ + oc_enc_eob_log(_enc,_pli,zzi,eob); + eob=0; + } + eob_run[zzi]=eob; + /*We don't include the actual EOB cost for this block in the return value. + It will be paid for by the fragment that terminates the EOB run.*/ + bits-=tokens[zzi][ti].bits; + zzi=_zzi; + break; + } + /*Emit pending EOB run if any.*/ + if(eob>0){ + oc_enc_eob_log(_enc,_pli,zzi,eob); + eob_run[zzi]=0; + } + oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb); + next=tokens[zzi][ti].next; + qc=tokens[zzi][ti].qc; + zzj=(next>>1)-1&63; + /*TODO: It may be worth saving the dequantized coefficient in the trellis + above; we had to compute it to measure the error anyway.*/ + _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]); + zzi=next>>1; + ti=next&1; + } + while(zzi); + *_stack=stack; + return bits; +} + +void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, + int _pli,int _fragy0,int _frag_yend){ + const oc_fragment_plane *fplane; + const oc_fragment *frags; + ogg_int16_t *frag_dc; + ptrdiff_t fragi; + int *pred_last; + int nhfrags; + int fragx; + int fragy; + fplane=_enc->state.fplanes+_pli; + frags=_enc->state.frags; + frag_dc=_enc->frag_dc; + pred_last=_enc->dc_pred_last[_pli]; + nhfrags=fplane->nhfrags; + fragi=fplane->froffset+_fragy0*nhfrags; + for(fragy=_fragy0;fragy<_frag_yend;fragy++){ + if(fragy==0){ + /*For the first row, all of the cases reduce to just using the previous + predictor for the same reference frame.*/ + for(fragx=0;fragx=nhfrags)ur_ref=-1; + else{ + ur_ref=u_frags[fragi+1].coded? + OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; + } + if(frags[fragi].coded){ + int pred; + int ref; + ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); + /*We break out a separate case based on which of our neighbors use + the same reference frames. + This is somewhat faster than trying to make a generic case which + handles all of them, since it reduces lots of poorly predicted + jumps to one switch statement, and also lets a number of the + multiplications be optimized out by strength reduction.*/ + switch((l_ref==ref)|(ul_ref==ref)<<1| + (u_ref==ref)<<2|(ur_ref==ref)<<3){ + default:pred=pred_last[ref];break; + case 1: + case 3:pred=frags[fragi-1].dc;break; + case 2:pred=u_frags[fragi-1].dc;break; + case 4: + case 6: + case 12:pred=u_frags[fragi].dc;break; + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; + case 8:pred=u_frags[fragi+1].dc;break; + case 9: + case 11: + case 13:{ + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; + }break; + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; + case 14:{ + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) + +10*u_frags[fragi].dc)/16; + }break; + case 7: + case 15:{ + int p0; + int p1; + int p2; + p0=frags[fragi-1].dc; + p1=u_frags[fragi-1].dc; + p2=u_frags[fragi].dc; + pred=(29*(p0+p2)-26*p1)/32; + if(abs(pred-p2)>128)pred=p2; + else if(abs(pred-p0)>128)pred=p0; + else if(abs(pred-p1)>128)pred=p1; + }break; + } + frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred); + pred_last[ref]=frags[fragi].dc; + l_ref=ref; + } + else l_ref=-1; + ul_ref=u_ref; + u_ref=ur_ref; + } + } + } +} + +void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, + const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, + int _prev_ndct_tokens1,int _prev_eob_run1){ + const ogg_int16_t *frag_dc; + ptrdiff_t fragii; + unsigned char *dct_tokens0; + unsigned char *dct_tokens1; + ogg_uint16_t *extra_bits0; + ogg_uint16_t *extra_bits1; + ptrdiff_t ti0; + ptrdiff_t ti1r; + ptrdiff_t ti1w; + int eob_run0; + int eob_run1; + int neobs1; + int token; + int eb; + int token1=token1; + int eb1=eb1; + /*Return immediately if there are no coded fragments; otherwise we'd flush + any trailing EOB run into the AC 1 list and never read it back out.*/ + if(_ncoded_fragis<=0)return; + frag_dc=_enc->frag_dc; + dct_tokens0=_enc->dct_tokens[_pli][0]; + dct_tokens1=_enc->dct_tokens[_pli][1]; + extra_bits0=_enc->extra_bits[_pli][0]; + extra_bits1=_enc->extra_bits[_pli][1]; + ti0=_enc->ndct_tokens[_pli][0]; + ti1w=ti1r=_prev_ndct_tokens1; + eob_run0=_enc->eob_run[_pli][0]; + /*Flush any trailing EOB run for the 1st AC coefficient. + This is needed to allow us to track tokens to the end of the list.*/ + eob_run1=_enc->eob_run[_pli][1]; + if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1); + /*If there was an active EOB run at the start of the 1st AC stack, read it + in and decode it.*/ + if(_prev_eob_run1>0){ + token1=dct_tokens1[ti1r]; + eb1=extra_bits1[ti1r]; + ti1r++; + eob_run1=oc_decode_eob_token(token1,eb1); + /*Consume the portion of the run that came before these fragments.*/ + neobs1=eob_run1-_prev_eob_run1; + } + else eob_run1=neobs1=0; + for(fragii=0;fragii<_ncoded_fragis;fragii++){ + int val; + /*All tokens in the 1st AC coefficient stack are regenerated as the DC + coefficients are produced. + This can be done in-place; stack 1 cannot get larger.*/ + if(!neobs1){ + /*There's no active EOB run in stack 1; read the next token.*/ + token1=dct_tokens1[ti1r]; + eb1=extra_bits1[ti1r]; + ti1r++; + if(token10){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + token=oc_make_dct_token_full(0,0,val,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + } + else{ + /*Zero DC value; that means the entry in stack 1 might need to be coded + from stack 0. + This requires a stack 1 fixup.*/ + if(neobs1>0){ + /*We're in the middle of an active EOB run in stack 1. + Move it to stack 0.*/ + if(++eob_run0>=4095){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + eob_run1--; + } + else{ + /*No active EOB run in stack 1, so we can't extend one in stack 0. + Flush it if we've got it.*/ + if(eob_run0>0){ + token=oc_make_eob_token_full(eob_run0,&eb); + dct_tokens0[ti0]=(unsigned char)token; + extra_bits0[ti0]=(ogg_uint16_t)eb; + ti0++; + eob_run0=0; + } + /*Stack 1 token is one of: a pure zero run token, a single + coefficient token, or a zero run/coefficient combo token. + A zero run token is expanded and moved to token stack 0, and the + stack 1 entry dropped. + A single coefficient value may be transformed into combo token that + is moved to stack 0, or if it cannot be combined, it is left alone + and a single length-1 zero run is emitted in stack 0. + A combo token is extended and moved to stack 0. + During AC coding, we restrict the run lengths on combo tokens for + stack 1 to guarantee we can extend them.*/ + switch(token1){ + case OC_DCT_SHORT_ZRL_TOKEN:{ + if(eb1<7){ + dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + continue; + } + /*Fall through.*/ + } + case OC_DCT_ZRL_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_ONE_TOKEN: + case OC_MINUS_ONE_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1A; + extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_TWO_TOKEN: + case OC_MINUS_TWO_TOKEN:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; + extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_VAL_CAT2:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; + extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1A: + case OC_DCT_RUN_CAT1A+1: + case OC_DCT_RUN_CAT1A+2: + case OC_DCT_RUN_CAT1A+3:{ + dct_tokens0[ti0]=(unsigned char)(token1+1); + extra_bits0[ti0]=(ogg_uint16_t)eb1; + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1A+4:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT1B:{ + if((eb1&3)<3){ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + continue; + } + eb1=((eb1&4)<<1)-1; + /*Fall through.*/ + } + case OC_DCT_RUN_CAT1C:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT1C; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + case OC_DCT_RUN_CAT2A:{ + eb1=(eb1<<1)-1; + /*Fall through.*/ + } + case OC_DCT_RUN_CAT2B:{ + dct_tokens0[ti0]=OC_DCT_RUN_CAT2B; + extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); + ti0++; + /*Don't write the AC coefficient back out.*/ + }continue; + } + /*We can't merge tokens, write a short zero run and keep going.*/ + dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; + extra_bits0[ti0]=0; + ti0++; + } + } + if(!neobs1){ + /*Flush any (inactive) EOB run.*/ + if(eob_run1>0){ + token=oc_make_eob_token_full(eob_run1,&eb); + dct_tokens1[ti1w]=(unsigned char)token; + extra_bits1[ti1w]=(ogg_uint16_t)eb; + ti1w++; + eob_run1=0; + } + /*There's no active EOB run, so log the current token.*/ + dct_tokens1[ti1w]=(unsigned char)token1; + extra_bits1[ti1w]=(ogg_uint16_t)eb1; + ti1w++; + } + else{ + /*Otherwise consume one EOB from the current run.*/ + neobs1--; + /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/ + if(eob_run1-neobs1>=4095){ + token=oc_make_eob_token_full(4095,&eb); + dct_tokens1[ti1w]=(unsigned char)token; + extra_bits1[ti1w]=(ogg_uint16_t)eb; + ti1w++; + eob_run1-=4095; + } + } + } + /*Save the current state.*/ + _enc->ndct_tokens[_pli][0]=ti0; + _enc->ndct_tokens[_pli][1]=ti1w; + _enc->eob_run[_pli][0]=eob_run0; + _enc->eob_run[_pli][1]=eob_run1; +} + +/*Final EOB run welding.*/ +void oc_enc_tokenize_finish(oc_enc_ctx *_enc){ + int pli; + int zzi; + /*Emit final EOB runs.*/ + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ + int eob_run; + eob_run=_enc->eob_run[pli][zzi]; + if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run); + } + /*Merge the final EOB run of one token list with the start of the next, if + possible.*/ + for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){ + int old_tok1; + int old_tok2; + int old_eb1; + int old_eb2; + int new_tok; + int new_eb; + int zzj; + int plj; + ptrdiff_t ti=ti; + int run_count; + /*Make sure this coefficient has tokens at all.*/ + if(_enc->ndct_tokens[pli][zzi]<=0)continue; + /*Ensure the first token is an EOB run.*/ + old_tok2=_enc->dct_tokens[pli][zzi][0]; + if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue; + /*Search for a previous coefficient that has any tokens at all.*/ + old_tok1=OC_NDCT_EOB_TOKEN_MAX; + for(zzj=zzi,plj=pli;zzj>=0;zzj--){ + while(plj-->0){ + ti=_enc->ndct_tokens[plj][zzj]-1; + if(ti>=_enc->dct_token_offs[plj][zzj]){ + old_tok1=_enc->dct_tokens[plj][zzj][ti]; + break; + } + } + if(plj>=0)break; + plj=3; + } + /*Ensure its last token was an EOB run.*/ + if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue; + /*Pull off the associated extra bits, if any, and decode the runs.*/ + old_eb1=_enc->extra_bits[plj][zzj][ti]; + old_eb2=_enc->extra_bits[pli][zzi][0]; + run_count=oc_decode_eob_token(old_tok1,old_eb1) + +oc_decode_eob_token(old_tok2,old_eb2); + /*We can't possibly combine these into one run. + It might be possible to split them more optimally, but we'll just leave + them as-is.*/ + if(run_count>=4096)continue; + /*We CAN combine them into one run.*/ + new_tok=oc_make_eob_token_full(run_count,&new_eb); + _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok; + _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb; + _enc->dct_token_offs[pli][zzi]++; + } +} diff --git a/thirdparty/libtheora/x86/mmxencfrag.c b/thirdparty/libtheora/x86/mmxencfrag.c new file mode 100644 index 0000000000..c79ff01fcc --- /dev/null +++ b/thirdparty/libtheora/x86/mmxencfrag.c @@ -0,0 +1,900 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $ + + ********************************************************************/ +#include +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + ptrdiff_t ystride3; + ptrdiff_t ret; + __asm__ __volatile__( + /*Load the first 4 rows of each block.*/ + "movq (%[src]),%%mm0\n\t" + "movq (%[ref]),%%mm1\n\t" + "movq (%[src],%[ystride]),%%mm2\n\t" + "movq (%[ref],%[ystride]),%%mm3\n\t" + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" + "movq (%[src],%[ystride],2),%%mm4\n\t" + "movq (%[ref],%[ystride],2),%%mm5\n\t" + "movq (%[src],%[ystride3]),%%mm6\n\t" + "movq (%[ref],%[ystride3]),%%mm7\n\t" + /*Compute their SADs and add them in %%mm0*/ + "psadbw %%mm1,%%mm0\n\t" + "psadbw %%mm3,%%mm2\n\t" + "lea (%[src],%[ystride],4),%[src]\n\t" + "paddw %%mm2,%%mm0\n\t" + "lea (%[ref],%[ystride],4),%[ref]\n\t" + /*Load the next 3 rows as registers become available.*/ + "movq (%[src]),%%mm2\n\t" + "movq (%[ref]),%%mm3\n\t" + "psadbw %%mm5,%%mm4\n\t" + "psadbw %%mm7,%%mm6\n\t" + "paddw %%mm4,%%mm0\n\t" + "movq (%[ref],%[ystride]),%%mm5\n\t" + "movq (%[src],%[ystride]),%%mm4\n\t" + "paddw %%mm6,%%mm0\n\t" + "movq (%[ref],%[ystride],2),%%mm7\n\t" + "movq (%[src],%[ystride],2),%%mm6\n\t" + /*Start adding their SADs to %%mm0*/ + "psadbw %%mm3,%%mm2\n\t" + "psadbw %%mm5,%%mm4\n\t" + "paddw %%mm2,%%mm0\n\t" + "psadbw %%mm7,%%mm6\n\t" + /*Load last row as registers become available.*/ + "movq (%[src],%[ystride3]),%%mm2\n\t" + "movq (%[ref],%[ystride3]),%%mm3\n\t" + /*And finish adding up their SADs.*/ + "paddw %%mm4,%%mm0\n\t" + "psadbw %%mm3,%%mm2\n\t" + "paddw %%mm6,%%mm0\n\t" + "paddw %%mm2,%%mm0\n\t" + "movd %%mm0,%[ret]\n\t" + :[ret]"=a"(ret),[src]"+%r"(_src),[ref]"+r"(_ref),[ystride3]"=&r"(ystride3) + :[ystride]"r"((ptrdiff_t)_ystride) + ); + return (unsigned)ret; +} + +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + /*Early termination is for suckers.*/ + return oc_enc_frag_sad_mmxext(_src,_ref,_ystride); +} + +/*Assumes the first two rows of %[ref1] and %[ref2] are in %%mm0...%%mm3, the + first two rows of %[src] are in %%mm4,%%mm5, and {1}x8 is in %%mm7. + We pre-load the next two rows of data as registers become available.*/ +#define OC_SAD2_LOOP \ + "#OC_SAD2_LOOP\n\t" \ + /*We want to compute (%%mm0+%%mm1>>1) on unsigned bytes without overflow, but \ + pavgb computes (%%mm0+%%mm1+1>>1). \ + The latter is exactly 1 too large when the low bit of two corresponding \ + bytes is only set in one of them. \ + Therefore we pxor the operands, pand to mask out the low bits, and psubb to \ + correct the output of pavgb.*/ \ + "movq %%mm0,%%mm6\n\t" \ + "lea (%[ref1],%[ystride],2),%[ref1]\n\t" \ + "pxor %%mm1,%%mm0\n\t" \ + "pavgb %%mm1,%%mm6\n\t" \ + "lea (%[ref2],%[ystride],2),%[ref2]\n\t" \ + "movq %%mm2,%%mm1\n\t" \ + "pand %%mm7,%%mm0\n\t" \ + "pavgb %%mm3,%%mm2\n\t" \ + "pxor %%mm3,%%mm1\n\t" \ + "movq (%[ref2],%[ystride]),%%mm3\n\t" \ + "psubb %%mm0,%%mm6\n\t" \ + "movq (%[ref1]),%%mm0\n\t" \ + "pand %%mm7,%%mm1\n\t" \ + "psadbw %%mm6,%%mm4\n\t" \ + "movd %[ret],%%mm6\n\t" \ + "psubb %%mm1,%%mm2\n\t" \ + "movq (%[ref2]),%%mm1\n\t" \ + "lea (%[src],%[ystride],2),%[src]\n\t" \ + "psadbw %%mm2,%%mm5\n\t" \ + "movq (%[ref1],%[ystride]),%%mm2\n\t" \ + "paddw %%mm4,%%mm5\n\t" \ + "movq (%[src]),%%mm4\n\t" \ + "paddw %%mm5,%%mm6\n\t" \ + "movq (%[src],%[ystride]),%%mm5\n\t" \ + "movd %%mm6,%[ret]\n\t" \ + +/*Same as above, but does not pre-load the next two rows.*/ +#define OC_SAD2_TAIL \ + "#OC_SAD2_TAIL\n\t" \ + "movq %%mm0,%%mm6\n\t" \ + "pavgb %%mm1,%%mm0\n\t" \ + "pxor %%mm1,%%mm6\n\t" \ + "movq %%mm2,%%mm1\n\t" \ + "pand %%mm7,%%mm6\n\t" \ + "pavgb %%mm3,%%mm2\n\t" \ + "pxor %%mm3,%%mm1\n\t" \ + "psubb %%mm6,%%mm0\n\t" \ + "pand %%mm7,%%mm1\n\t" \ + "psadbw %%mm0,%%mm4\n\t" \ + "psubb %%mm1,%%mm2\n\t" \ + "movd %[ret],%%mm6\n\t" \ + "psadbw %%mm2,%%mm5\n\t" \ + "paddw %%mm4,%%mm5\n\t" \ + "paddw %%mm5,%%mm6\n\t" \ + "movd %%mm6,%[ret]\n\t" \ + +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + ptrdiff_t ret; + __asm__ __volatile__( + "movq (%[ref1]),%%mm0\n\t" + "movq (%[ref2]),%%mm1\n\t" + "movq (%[ref1],%[ystride]),%%mm2\n\t" + "movq (%[ref2],%[ystride]),%%mm3\n\t" + "xor %[ret],%[ret]\n\t" + "movq (%[src]),%%mm4\n\t" + "pxor %%mm7,%%mm7\n\t" + "pcmpeqb %%mm6,%%mm6\n\t" + "movq (%[src],%[ystride]),%%mm5\n\t" + "psubb %%mm6,%%mm7\n\t" + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_TAIL + :[ret]"=&a"(ret),[src]"+r"(_src),[ref1]"+%r"(_ref1),[ref2]"+r"(_ref2) + :[ystride]"r"((ptrdiff_t)_ystride) + ); + return (unsigned)ret; +} + +/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their + 16-bit difference in %%mm0...%%mm7.*/ +#define OC_LOAD_SUB_8x4(_off) \ + "#OC_LOAD_SUB_8x4\n\t" \ + "movd "_off"(%[src]),%%mm0\n\t" \ + "movd "_off"(%[ref]),%%mm4\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm1\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm5\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "movd "_off"(%[src]),%%mm2\n\t" \ + "movd "_off"(%[ref]),%%mm7\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm3\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm6\n\t" \ + "punpcklbw %%mm4,%%mm0\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "punpcklbw %%mm4,%%mm4\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + "movd "_off"(%[src]),%%mm4\n\t" \ + "movq %%mm0,"_off"*2(%[buf])\n\t" \ + "movd "_off"(%[ref]),%%mm0\n\t" \ + "punpcklbw %%mm5,%%mm1\n\t" \ + "punpcklbw %%mm5,%%mm5\n\t" \ + "psubw %%mm5,%%mm1\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm2\n\t" \ + "punpcklbw %%mm7,%%mm7\n\t" \ + "psubw %%mm7,%%mm2\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm7\n\t" \ + "punpcklbw %%mm6,%%mm3\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "punpcklbw %%mm6,%%mm6\n\t" \ + "psubw %%mm6,%%mm3\n\t" \ + "movd "_off"(%[src]),%%mm6\n\t" \ + "punpcklbw %%mm0,%%mm4\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "punpcklbw %%mm0,%%mm0\n\t" \ + "lea (%[src],%[src_ystride],2),%[src]\n\t" \ + "psubw %%mm0,%%mm4\n\t" \ + "movd "_off"(%[ref]),%%mm0\n\t" \ + "punpcklbw %%mm7,%%mm5\n\t" \ + "neg %[src_ystride]\n\t" \ + "punpcklbw %%mm7,%%mm7\n\t" \ + "psubw %%mm7,%%mm5\n\t" \ + "movd "_off"(%[src],%[src_ystride]),%%mm7\n\t" \ + "punpcklbw %%mm0,%%mm6\n\t" \ + "lea (%[ref],%[ref_ystride],2),%[ref]\n\t" \ + "punpcklbw %%mm0,%%mm0\n\t" \ + "neg %[ref_ystride]\n\t" \ + "psubw %%mm0,%%mm6\n\t" \ + "movd "_off"(%[ref],%[ref_ystride]),%%mm0\n\t" \ + "lea (%[src],%[src_ystride],8),%[src]\n\t" \ + "punpcklbw %%mm0,%%mm7\n\t" \ + "neg %[src_ystride]\n\t" \ + "punpcklbw %%mm0,%%mm0\n\t" \ + "lea (%[ref],%[ref_ystride],8),%[ref]\n\t" \ + "psubw %%mm0,%%mm7\n\t" \ + "neg %[ref_ystride]\n\t" \ + "movq "_off"*2(%[buf]),%%mm0\n\t" \ + +/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/ +#define OC_LOAD_8x4(_off) \ + "#OC_LOAD_8x4\n\t" \ + "movd "_off"(%[src]),%%mm0\n\t" \ + "movd "_off"(%[src],%[ystride]),%%mm1\n\t" \ + "movd "_off"(%[src],%[ystride],2),%%mm2\n\t" \ + "pxor %%mm7,%%mm7\n\t" \ + "movd "_off"(%[src],%[ystride3]),%%mm3\n\t" \ + "punpcklbw %%mm7,%%mm0\n\t" \ + "movd "_off"(%[src4]),%%mm4\n\t" \ + "punpcklbw %%mm7,%%mm1\n\t" \ + "movd "_off"(%[src4],%[ystride]),%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm2\n\t" \ + "movd "_off"(%[src4],%[ystride],2),%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm3\n\t" \ + "movd "_off"(%[src4],%[ystride3]),%%mm7\n\t" \ + "punpcklbw %%mm4,%%mm4\n\t" \ + "punpcklbw %%mm5,%%mm5\n\t" \ + "psrlw $8,%%mm4\n\t" \ + "psrlw $8,%%mm5\n\t" \ + "punpcklbw %%mm6,%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm7\n\t" \ + "psrlw $8,%%mm6\n\t" \ + "psrlw $8,%%mm7\n\t" \ + +/*Performs the first two stages of an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to + perform this stage in place with no temporary registers).*/ +#define OC_HADAMARD_AB_8x4 \ + "#OC_HADAMARD_AB_8x4\n\t" \ + /*Stage A: \ + Outputs 0-3 are swapped with 4-7 here.*/ \ + "paddw %%mm1,%%mm5\n\t" \ + "paddw %%mm2,%%mm6\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "psubw %%mm5,%%mm1\n\t" \ + "psubw %%mm6,%%mm2\n\t" \ + "paddw %%mm3,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm0,%%mm0\n\t" \ + "psubw %%mm7,%%mm3\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + /*Stage B:*/ \ + "paddw %%mm2,%%mm0\n\t" \ + "paddw %%mm3,%%mm1\n\t" \ + "paddw %%mm6,%%mm4\n\t" \ + "paddw %%mm7,%%mm5\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + "paddw %%mm7,%%mm7\n\t" \ + "psubw %%mm0,%%mm2\n\t" \ + "psubw %%mm1,%%mm3\n\t" \ + "psubw %%mm4,%%mm6\n\t" \ + "psubw %%mm5,%%mm7\n\t" \ + +/*Performs the last stage of an 8-point 1-D Hadamard transform in place. + Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in + place with no temporary registers).*/ +#define OC_HADAMARD_C_8x4 \ + "#OC_HADAMARD_C_8x4\n\t" \ + /*Stage C:*/ \ + "paddw %%mm1,%%mm0\n\t" \ + "paddw %%mm3,%%mm2\n\t" \ + "paddw %%mm5,%%mm4\n\t" \ + "paddw %%mm7,%%mm6\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + "paddw %%mm7,%%mm7\n\t" \ + "psubw %%mm0,%%mm1\n\t" \ + "psubw %%mm2,%%mm3\n\t" \ + "psubw %%mm4,%%mm5\n\t" \ + "psubw %%mm6,%%mm7\n\t" \ + +/*Performs an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform + in place with no temporary registers).*/ +#define OC_HADAMARD_8x4 \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_8x4 \ + +/*Performs the first part of the final stage of the Hadamard transform and + summing of absolute values. + At the end of this part, %%mm1 will contain the DC coefficient of the + transform.*/ +#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ + /*We use the fact that \ + (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \ + to merge the final butterfly with the abs and the first stage of \ + accumulation. \ + Thus we can avoid using pabsw, which is not available until SSSE3. \ + Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \ + implementation would be (3+3)*8+7=55 instructions (+4 for spilling \ + registers). \ + Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \ + This implementation is only 26 (+4 for spilling registers).*/ \ + "#OC_HADAMARD_C_ABS_ACCUM_A_8x4\n\t" \ + "movq %%mm7,"_r7"(%[buf])\n\t" \ + "movq %%mm6,"_r6"(%[buf])\n\t" \ + /*mm7={0x7FFF}x4 \ + mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \ + "pcmpeqb %%mm7,%%mm7\n\t" \ + "movq %%mm0,%%mm6\n\t" \ + "psrlw $1,%%mm7\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "pmaxsw %%mm1,%%mm0\n\t" \ + "paddsw %%mm7,%%mm6\n\t" \ + "psubw %%mm6,%%mm0\n\t" \ + /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \ + mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm4,%%mm1\n\t" \ + "pmaxsw %%mm3,%%mm2\n\t" \ + "pmaxsw %%mm5,%%mm4\n\t" \ + "paddw %%mm3,%%mm6\n\t" \ + "paddw %%mm5,%%mm1\n\t" \ + "movq "_r7"(%[buf]),%%mm3\n\t" \ + +/*Performs the second part of the final stage of the Hadamard transform and + summing of absolute values.*/ +#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ + "#OC_HADAMARD_C_ABS_ACCUM_B_8x4\n\t" \ + "paddsw %%mm7,%%mm6\n\t" \ + "movq "_r6"(%[buf]),%%mm5\n\t" \ + "paddsw %%mm7,%%mm1\n\t" \ + "psubw %%mm6,%%mm2\n\t" \ + "psubw %%mm1,%%mm4\n\t" \ + /*mm7={1}x4 (needed for the horizontal add that follows) \ + mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \ + "movq %%mm3,%%mm6\n\t" \ + "pmaxsw %%mm5,%%mm3\n\t" \ + "paddw %%mm2,%%mm0\n\t" \ + "paddw %%mm5,%%mm6\n\t" \ + "paddw %%mm4,%%mm0\n\t" \ + "paddsw %%mm7,%%mm6\n\t" \ + "paddw %%mm3,%%mm0\n\t" \ + "psrlw $14,%%mm7\n\t" \ + "psubw %%mm6,%%mm0\n\t" \ + +/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the + absolute value of each component, and accumulates everything into mm0. + This is the only portion of SATD which requires MMXEXT (we could use plain + MMX, but it takes 4 instructions and an extra register to work around the + lack of a pmaxsw, which is a pretty serious penalty).*/ +#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \ + OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ + OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ + +/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each + component, and accumulates everything into mm0. + Note that mm0 will have an extra 4 added to each column, and that after + removing this value, the remainder will be half the conventional value.*/ +#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) + +/*Performs two 4x4 transposes (mostly) in place. + On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7} + contains rows {a,b,c,d}. + On output, {0x40,0x50,0x60,0x70}+_off(%[buf]) contains {e,f,g,h}^T, and + {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/ +#define OC_TRANSPOSE_4x4x2(_off) \ + "#OC_TRANSPOSE_4x4x2\n\t" \ + /*First 4x4 transpose:*/ \ + "movq %%mm5,0x10+"_off"(%[buf])\n\t" \ + /*mm0 = e3 e2 e1 e0 \ + mm1 = f3 f2 f1 f0 \ + mm2 = g3 g2 g1 g0 \ + mm3 = h3 h2 h1 h0*/ \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm3,%%mm2\n\t" \ + "punpckhwd %%mm3,%%mm5\n\t" \ + "movq %%mm0,%%mm3\n\t" \ + "punpcklwd %%mm1,%%mm0\n\t" \ + "punpckhwd %%mm1,%%mm3\n\t" \ + /*mm0 = f1 e1 f0 e0 \ + mm3 = f3 e3 f2 e2 \ + mm2 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + "movq %%mm0,%%mm1\n\t" \ + "punpckldq %%mm2,%%mm0\n\t" \ + "punpckhdq %%mm2,%%mm1\n\t" \ + "movq %%mm3,%%mm2\n\t" \ + "punpckhdq %%mm5,%%mm3\n\t" \ + "movq %%mm0,0x40+"_off"(%[buf])\n\t" \ + "punpckldq %%mm5,%%mm2\n\t" \ + /*mm0 = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + "movq 0x10+"_off"(%[buf]),%%mm5\n\t" \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm5 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm7 = d3 d2 d1 d0*/ \ + "movq %%mm6,%%mm0\n\t" \ + "punpcklwd %%mm7,%%mm6\n\t" \ + "movq %%mm1,0x50+"_off"(%[buf])\n\t" \ + "punpckhwd %%mm7,%%mm0\n\t" \ + "movq %%mm4,%%mm7\n\t" \ + "punpcklwd %%mm5,%%mm4\n\t" \ + "movq %%mm2,0x60+"_off"(%[buf])\n\t" \ + "punpckhwd %%mm5,%%mm7\n\t" \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + "movq %%mm4,%%mm5\n\t" \ + "punpckldq %%mm6,%%mm4\n\t" \ + "movq %%mm3,0x70+"_off"(%[buf])\n\t" \ + "punpckhdq %%mm6,%%mm5\n\t" \ + "movq %%mm7,%%mm6\n\t" \ + "punpckhdq %%mm0,%%mm7\n\t" \ + "punpckldq %%mm0,%%mm6\n\t" \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ + +static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src, + int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret; + unsigned ret2; + bufp=buf; + __asm__ __volatile__( + OC_LOAD_SUB_8x4("0x00") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x00") + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + "movq %%mm4,0x00(%[buf])\n\t" + "movq %%mm5,0x10(%[buf])\n\t" + "movq %%mm6,0x20(%[buf])\n\t" + "movq %%mm7,0x30(%[buf])\n\t" + OC_LOAD_SUB_8x4("0x04") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x08") + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + "movq 0x10(%[buf]),%%mm1\n\t" + "movq 0x20(%[buf]),%%mm2\n\t" + "movq 0x30(%[buf]),%%mm3\n\t" + "movq 0x00(%[buf]),%%mm0\n\t" + OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38") + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + "mov %[thresh],%[ret2]\n\t" + "pmaddwd %%mm7,%%mm0\n\t" + "movq 0x50(%[buf]),%%mm1\n\t" + "movq 0x58(%[buf]),%%mm5\n\t" + "movq %%mm0,%%mm4\n\t" + "movq 0x60(%[buf]),%%mm2\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "movq 0x68(%[buf]),%%mm6\n\t" + "paddd %%mm0,%%mm4\n\t" + "movq 0x70(%[buf]),%%mm3\n\t" + "movd %%mm4,%[ret]\n\t" + "movq 0x78(%[buf]),%%mm7\n\t" + /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4 + added to them, and a factor of two removed; correct the final sum here.*/ + "lea -32(%[ret],%[ret]),%[ret]\n\t" + "movq 0x40(%[buf]),%%mm0\n\t" + "cmp %[ret2],%[ret]\n\t" + "movq 0x48(%[buf]),%%mm4\n\t" + "jae 1f\n\t" + OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78") + "pmaddwd %%mm7,%%mm0\n\t" + /*There isn't much to stick in here to hide the latency this time, but the + alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose + latency is even worse.*/ + "sub $32,%[ret]\n\t" + "movq %%mm0,%%mm4\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "paddd %%mm0,%%mm4\n\t" + "movd %%mm4,%[ret2]\n\t" + "lea (%[ret],%[ret2],2),%[ret]\n\t" + ".p2align 4,,15\n\t" + "1:\n\t" + /*Although it looks like we're using 7 registers here, gcc can alias %[ret] + and %[ret2] with some of the inputs, since for once we don't write to + them until after we're done using everything but %[buf] (which is also + listed as an output to ensure gcc _doesn't_ alias them against it).*/ + /*Note that _src_ystride and _ref_ystride must be given non-overlapping + constraints, otherewise if gcc can prove they're equal it will allocate + them to the same register (which is bad); _src and _ref face a similar + problem, though those are never actually the same.*/ + :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp) + :[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride), + [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride), + [thresh]"m"(_thresh) + /*We have to use neg, so we actually clobber the condition codes for once + (not to mention cmp, sub, and add).*/ + :"cc" + ); + return ret; +} + +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh); +} + +/*Our internal implementation of frag_copy2 takes an extra stride parameter so + we can share code with oc_enc_frag_satd2_thresh_mmxext().*/ +static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){ + __asm__ __volatile__( + /*Load the first 3 rows.*/ + "movq (%[src1]),%%mm0\n\t" + "movq (%[src2]),%%mm1\n\t" + "movq (%[src1],%[src_ystride]),%%mm2\n\t" + "lea (%[src1],%[src_ystride],2),%[src1]\n\t" + "movq (%[src2],%[src_ystride]),%%mm3\n\t" + "lea (%[src2],%[src_ystride],2),%[src2]\n\t" + "pxor %%mm7,%%mm7\n\t" + "movq (%[src1]),%%mm4\n\t" + "pcmpeqb %%mm6,%%mm6\n\t" + "movq (%[src2]),%%mm5\n\t" + /*mm7={1}x8.*/ + "psubb %%mm6,%%mm7\n\t" + /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ + "movq %%mm0,%%mm6\n\t" + "pxor %%mm1,%%mm0\n\t" + "pavgb %%mm1,%%mm6\n\t" + /*%%mm1 is free, start averaging %%mm3 into %%mm2 using %%mm1.*/ + "movq %%mm2,%%mm1\n\t" + "pand %%mm7,%%mm0\n\t" + "pavgb %%mm3,%%mm2\n\t" + "pxor %%mm3,%%mm1\n\t" + /*%%mm3 is free.*/ + "psubb %%mm0,%%mm6\n\t" + /*%%mm0 is free, start loading the next row.*/ + "movq (%[src1],%[src_ystride]),%%mm0\n\t" + /*Start averaging %%mm5 and %%mm4 using %%mm3.*/ + "movq %%mm4,%%mm3\n\t" + /*%%mm6 (row 0) is done; write it out.*/ + "movq %%mm6,(%[dst])\n\t" + "pand %%mm7,%%mm1\n\t" + "pavgb %%mm5,%%mm4\n\t" + "psubb %%mm1,%%mm2\n\t" + /*%%mm1 is free, continue loading the next row.*/ + "movq (%[src2],%[src_ystride]),%%mm1\n\t" + "pxor %%mm5,%%mm3\n\t" + "lea (%[src1],%[src_ystride],2),%[src1]\n\t" + /*%%mm2 (row 1) is done; write it out.*/ + "movq %%mm2,(%[dst],%[dst_ystride])\n\t" + "pand %%mm7,%%mm3\n\t" + /*Start loading the next row.*/ + "movq (%[src1]),%%mm2\n\t" + "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "psubb %%mm3,%%mm4\n\t" + "lea (%[src2],%[src_ystride],2),%[src2]\n\t" + /*%%mm4 (row 2) is done; write it out.*/ + "movq %%mm4,(%[dst])\n\t" + /*Continue loading the next row.*/ + "movq (%[src2]),%%mm3\n\t" + /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ + "movq %%mm0,%%mm6\n\t" + "pxor %%mm1,%%mm0\n\t" + /*Start loading the next row.*/ + "movq (%[src1],%[src_ystride]),%%mm4\n\t" + "pavgb %%mm1,%%mm6\n\t" + /*%%mm1 is free; start averaging %%mm3 into %%mm2 using %%mm1.*/ + "movq %%mm2,%%mm1\n\t" + "pand %%mm7,%%mm0\n\t" + /*Continue loading the next row.*/ + "movq (%[src2],%[src_ystride]),%%mm5\n\t" + "pavgb %%mm3,%%mm2\n\t" + "lea (%[src1],%[src_ystride],2),%[src1]\n\t" + "pxor %%mm3,%%mm1\n\t" + /*%%mm3 is free.*/ + "psubb %%mm0,%%mm6\n\t" + /*%%mm0 is free, start loading the next row.*/ + "movq (%[src1]),%%mm0\n\t" + /*Start averaging %%mm5 into %%mm4 using %%mm3.*/ + "movq %%mm4,%%mm3\n\t" + /*%%mm6 (row 3) is done; write it out.*/ + "movq %%mm6,(%[dst],%[dst_ystride])\n\t" + "pand %%mm7,%%mm1\n\t" + "lea (%[src2],%[src_ystride],2),%[src2]\n\t" + "pavgb %%mm5,%%mm4\n\t" + "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "psubb %%mm1,%%mm2\n\t" + /*%%mm1 is free; continue loading the next row.*/ + "movq (%[src2]),%%mm1\n\t" + "pxor %%mm5,%%mm3\n\t" + /*%%mm2 (row 4) is done; write it out.*/ + "movq %%mm2,(%[dst])\n\t" + "pand %%mm7,%%mm3\n\t" + /*Start loading the next row.*/ + "movq (%[src1],%[src_ystride]),%%mm2\n\t" + "psubb %%mm3,%%mm4\n\t" + /*Start averaging %%mm0 and %%mm1 into %%mm6.*/ + "movq %%mm0,%%mm6\n\t" + /*Continue loading the next row.*/ + "movq (%[src2],%[src_ystride]),%%mm3\n\t" + /*%%mm4 (row 5) is done; write it out.*/ + "movq %%mm4,(%[dst],%[dst_ystride])\n\t" + "pxor %%mm1,%%mm0\n\t" + "pavgb %%mm1,%%mm6\n\t" + /*%%mm4 is free; start averaging %%mm3 into %%mm2 using %%mm4.*/ + "movq %%mm2,%%mm4\n\t" + "pand %%mm7,%%mm0\n\t" + "pavgb %%mm3,%%mm2\n\t" + "pxor %%mm3,%%mm4\n\t" + "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" + "psubb %%mm0,%%mm6\n\t" + "pand %%mm7,%%mm4\n\t" + /*%%mm6 (row 6) is done, write it out.*/ + "movq %%mm6,(%[dst])\n\t" + "psubb %%mm4,%%mm2\n\t" + /*%%mm2 (row 7) is done, write it out.*/ + "movq %%mm2,(%[dst],%[dst_ystride])\n\t" + :[dst]"+r"(_dst),[src1]"+%r"(_src1),[src2]"+r"(_src2) + :[dst_ystride]"r"((ptrdiff_t)_dst_ystride), + [src_ystride]"r"((ptrdiff_t)_src_ystride) + :"memory" + ); +} + +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + OC_ALIGN8(unsigned char ref[64]); + oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride); + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh); +} + +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src, + int _ystride){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret; + unsigned ret2; + bufp=buf; + __asm__ __volatile__( + OC_LOAD_8x4("0x00") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x00") + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + "movq %%mm4,0x00(%[buf])\n\t" + "movq %%mm5,0x10(%[buf])\n\t" + "movq %%mm6,0x20(%[buf])\n\t" + "movq %%mm7,0x30(%[buf])\n\t" + OC_LOAD_8x4("0x04") + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2("0x08") + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + "movq 0x10(%[buf]),%%mm1\n\t" + "movq 0x20(%[buf]),%%mm2\n\t" + "movq 0x30(%[buf]),%%mm3\n\t" + "movq 0x00(%[buf]),%%mm0\n\t" + /*We split out the stages here so we can save the DC coefficient in the + middle.*/ + OC_HADAMARD_AB_8x4 + OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38") + "movd %%mm1,%[ret]\n\t" + OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38") + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + "pmaddwd %%mm7,%%mm0\n\t" + "movq 0x50(%[buf]),%%mm1\n\t" + "movq 0x58(%[buf]),%%mm5\n\t" + "movq 0x60(%[buf]),%%mm2\n\t" + "movq %%mm0,%%mm4\n\t" + "movq 0x68(%[buf]),%%mm6\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "movq 0x70(%[buf]),%%mm3\n\t" + "paddd %%mm0,%%mm4\n\t" + "movq 0x78(%[buf]),%%mm7\n\t" + "movd %%mm4,%[ret2]\n\t" + "movq 0x40(%[buf]),%%mm0\n\t" + "movq 0x48(%[buf]),%%mm4\n\t" + OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78") + "pmaddwd %%mm7,%%mm0\n\t" + /*We assume that the DC coefficient is always positive (which is true, + because the input to the INTRA transform was not a difference).*/ + "movzx %w[ret],%[ret]\n\t" + "add %[ret2],%[ret2]\n\t" + "sub %[ret],%[ret2]\n\t" + "movq %%mm0,%%mm4\n\t" + "punpckhdq %%mm0,%%mm0\n\t" + "paddd %%mm0,%%mm4\n\t" + "movd %%mm4,%[ret]\n\t" + "lea -64(%[ret2],%[ret],2),%[ret]\n\t" + /*Although it looks like we're using 7 registers here, gcc can alias %[ret] + and %[ret2] with some of the inputs, since for once we don't write to + them until after we're done using everything but %[buf] (which is also + listed as an output to ensure gcc _doesn't_ alias them against it).*/ + :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp) + :[src]"r"(_src),[src4]"r"(_src+4*_ystride), + [ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride) + /*We have to use sub, so we actually clobber the condition codes for once + (not to mention add).*/ + :"cc" + ); + return ret; +} + +void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64], + const unsigned char *_src,const unsigned char *_ref,int _ystride){ + int i; + __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); + for(i=4;i-->0;){ + __asm__ __volatile__( + /*mm0=[src]*/ + "movq (%[src]),%%mm0\n\t" + /*mm1=[ref]*/ + "movq (%[ref]),%%mm1\n\t" + /*mm4=[src+ystride]*/ + "movq (%[src],%[ystride]),%%mm4\n\t" + /*mm5=[ref+ystride]*/ + "movq (%[ref],%[ystride]),%%mm5\n\t" + /*Compute [src]-[ref].*/ + "movq %%mm0,%%mm2\n\t" + "punpcklbw %%mm7,%%mm0\n\t" + "movq %%mm1,%%mm3\n\t" + "punpckhbw %%mm7,%%mm2\n\t" + "punpcklbw %%mm7,%%mm1\n\t" + "punpckhbw %%mm7,%%mm3\n\t" + "psubw %%mm1,%%mm0\n\t" + "psubw %%mm3,%%mm2\n\t" + /*Compute [src+ystride]-[ref+ystride].*/ + "movq %%mm4,%%mm1\n\t" + "punpcklbw %%mm7,%%mm4\n\t" + "movq %%mm5,%%mm3\n\t" + "punpckhbw %%mm7,%%mm1\n\t" + "lea (%[src],%[ystride],2),%[src]\n\t" + "punpcklbw %%mm7,%%mm5\n\t" + "lea (%[ref],%[ystride],2),%[ref]\n\t" + "punpckhbw %%mm7,%%mm3\n\t" + "psubw %%mm5,%%mm4\n\t" + "psubw %%mm3,%%mm1\n\t" + /*Write the answer out.*/ + "movq %%mm0,0x00(%[residue])\n\t" + "movq %%mm2,0x08(%[residue])\n\t" + "movq %%mm4,0x10(%[residue])\n\t" + "movq %%mm1,0x18(%[residue])\n\t" + "lea 0x20(%[residue]),%[residue]\n\t" + :[residue]"+r"(_residue),[src]"+r"(_src),[ref]"+r"(_ref) + :[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); + } +} + +void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64], + const unsigned char *_src,int _ystride){ + ptrdiff_t ystride3; + __asm__ __volatile__( + /*mm0=[src]*/ + "movq (%[src]),%%mm0\n\t" + /*mm1=[src+ystride]*/ + "movq (%[src],%[ystride]),%%mm1\n\t" + /*mm6={-1}x4*/ + "pcmpeqw %%mm6,%%mm6\n\t" + /*mm2=[src+2*ystride]*/ + "movq (%[src],%[ystride],2),%%mm2\n\t" + /*[ystride3]=3*[ystride]*/ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" + /*mm6={1}x4*/ + "psllw $15,%%mm6\n\t" + /*mm3=[src+3*ystride]*/ + "movq (%[src],%[ystride3]),%%mm3\n\t" + /*mm6={128}x4*/ + "psrlw $8,%%mm6\n\t" + /*mm7=0*/ + "pxor %%mm7,%%mm7\n\t" + /*[src]=[src]+4*[ystride]*/ + "lea (%[src],%[ystride],4),%[src]\n\t" + /*Compute [src]-128 and [src+ystride]-128*/ + "movq %%mm0,%%mm4\n\t" + "punpcklbw %%mm7,%%mm0\n\t" + "movq %%mm1,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm0\n\t" + "punpcklbw %%mm7,%%mm1\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm0,0x00(%[residue])\n\t" + "movq %%mm4,0x08(%[residue])\n\t" + "movq %%mm1,0x10(%[residue])\n\t" + "movq %%mm5,0x18(%[residue])\n\t" + /*mm0=[src+4*ystride]*/ + "movq (%[src]),%%mm0\n\t" + /*mm1=[src+5*ystride]*/ + "movq (%[src],%[ystride]),%%mm1\n\t" + /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/ + "movq %%mm2,%%mm4\n\t" + "punpcklbw %%mm7,%%mm2\n\t" + "movq %%mm3,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm2\n\t" + "punpcklbw %%mm7,%%mm3\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm3\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm2,0x20(%[residue])\n\t" + "movq %%mm4,0x28(%[residue])\n\t" + "movq %%mm3,0x30(%[residue])\n\t" + "movq %%mm5,0x38(%[residue])\n\t" + /*mm2=[src+6*ystride]*/ + "movq (%[src],%[ystride],2),%%mm2\n\t" + /*mm3=[src+7*ystride]*/ + "movq (%[src],%[ystride3]),%%mm3\n\t" + /*Compute [src+4*ystride]-128 and [src+5*ystride]-128*/ + "movq %%mm0,%%mm4\n\t" + "punpcklbw %%mm7,%%mm0\n\t" + "movq %%mm1,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm0\n\t" + "punpcklbw %%mm7,%%mm1\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm0,0x40(%[residue])\n\t" + "movq %%mm4,0x48(%[residue])\n\t" + "movq %%mm1,0x50(%[residue])\n\t" + "movq %%mm5,0x58(%[residue])\n\t" + /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ + "movq %%mm2,%%mm4\n\t" + "punpcklbw %%mm7,%%mm2\n\t" + "movq %%mm3,%%mm5\n\t" + "punpckhbw %%mm7,%%mm4\n\t" + "psubw %%mm6,%%mm2\n\t" + "punpcklbw %%mm7,%%mm3\n\t" + "psubw %%mm6,%%mm4\n\t" + "punpckhbw %%mm7,%%mm5\n\t" + "psubw %%mm6,%%mm3\n\t" + "psubw %%mm6,%%mm5\n\t" + /*Write the answer out.*/ + "movq %%mm2,0x60(%[residue])\n\t" + "movq %%mm4,0x68(%[residue])\n\t" + "movq %%mm3,0x70(%[residue])\n\t" + "movq %%mm5,0x78(%[residue])\n\t" + :[src]"+r"(_src),[ystride3]"=&r"(ystride3) + :[residue]"r"(_residue),[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); +} + +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride); +} + +#endif diff --git a/thirdparty/libtheora/x86/mmxfdct.c b/thirdparty/libtheora/x86/mmxfdct.c new file mode 100644 index 0000000000..211875255e --- /dev/null +++ b/thirdparty/libtheora/x86/mmxfdct.c @@ -0,0 +1,665 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ********************************************************************/ +/*MMX fDCT implementation for x86_32*/ +/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/ +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +# define OC_FDCT_STAGE1_8x4 \ + "#OC_FDCT_STAGE1_8x4\n\t" \ + /*Stage 1:*/ \ + /*mm0=t7'=t0-t7*/ \ + "psubw %%mm7,%%mm0\n\t" \ + "paddw %%mm7,%%mm7\n\t" \ + /*mm1=t6'=t1-t6*/ \ + "psubw %%mm6,%%mm1\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + /*mm2=t5'=t2-t5*/ \ + "psubw %%mm5,%%mm2\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*mm3=t4'=t3-t4*/ \ + "psubw %%mm4,%%mm3\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + /*mm7=t0'=t0+t7*/ \ + "paddw %%mm0,%%mm7\n\t" \ + /*mm6=t1'=t1+t6*/ \ + "paddw %%mm1,%%mm6\n\t" \ + /*mm5=t2'=t2+t5*/ \ + "paddw %%mm2,%%mm5\n\t" \ + /*mm4=t3'=t3+t4*/ \ + "paddw %%mm3,%%mm4\n\t" \ + +# define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \ + "#OC_FDCT8x4\n\t" \ + /*Stage 2:*/ \ + /*mm7=t3''=t0'-t3'*/ \ + "psubw %%mm4,%%mm7\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + /*mm6=t2''=t1'-t2'*/ \ + "psubw %%mm5,%%mm6\n\t" \ + "movq %%mm7,"_r6"(%[y])\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*mm1=t5''=t6'-t5'*/ \ + "psubw %%mm2,%%mm1\n\t" \ + "movq %%mm6,"_r2"(%[y])\n\t" \ + /*mm4=t0''=t0'+t3'*/ \ + "paddw %%mm7,%%mm4\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + /*mm5=t1''=t1'+t2'*/ \ + "movq %%mm4,"_r0"(%[y])\n\t" \ + "paddw %%mm6,%%mm5\n\t" \ + /*mm2=t6''=t6'+t5'*/ \ + "paddw %%mm1,%%mm2\n\t" \ + "movq %%mm5,"_r4"(%[y])\n\t" \ + /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \ + /*mm4, mm5, mm6, mm7 are free.*/ \ + /*Stage 3:*/ \ + /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \ + "mov $0x5A806A0A,%[a]\n\t" \ + "pcmpeqb %%mm6,%%mm6\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psrlw $15,%%mm6\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + /*mm0=0, m2={-1}x4 \ + mm5:mm4=t5''*27146+0xB500*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "movq %%mm2,"_r3"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq %%mm0,"_r7"(%[y])\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqb %%mm2,%%mm2\n\t" \ + /*mm2=t6'', mm1=t5''+(t5''!=0) \ + mm4=(t5''*27146+0xB500>>16)*/ \ + "pcmpeqw %%mm1,%%mm0\n\t" \ + "psrad $16,%%mm4\n\t" \ + "psubw %%mm2,%%mm0\n\t" \ + "movq "_r3"(%[y]),%%mm2\n\t" \ + "psrad $16,%%mm5\n\t" \ + "paddw %%mm0,%%mm1\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ + "paddw %%mm1,%%mm4\n\t" \ + "movq "_r7"(%[y]),%%mm0\n\t" \ + "psraw $1,%%mm4\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + /*mm3=t4''=t4'+s*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*mm1=t5'''=t4'-s*/ \ + "psubw %%mm4,%%mm1\n\t" \ + /*mm1=0, mm3={-1}x4 \ + mm5:mm4=t6''*27146+0xB500*/ \ + "movq %%mm2,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "movq %%mm1,"_r5"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq %%mm3,"_r1"(%[y])\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "pxor %%mm1,%%mm1\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqb %%mm3,%%mm3\n\t" \ + /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm1\n\t" \ + "psrad $16,%%mm5\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + /*mm1=t1'' \ + mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ + "paddw %%mm2,%%mm4\n\t" \ + "movq "_r4"(%[y]),%%mm1\n\t" \ + "psraw $1,%%mm4\n\t" \ + "movq %%mm0,%%mm2\n\t" \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm0=t7''=t7'+s*/ \ + "paddw %%mm4,%%mm0\n\t" \ + /*mm2=t6'''=t7'-s*/ \ + "psubw %%mm4,%%mm2\n\t" \ + /*Stage 4:*/ \ + /*mm0=0, mm2=t0'' \ + mm5:mm4=t1''*27146+0xB500*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "movq %%mm2,"_r3"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq "_r0"(%[y]),%%mm2\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movq %%mm0,"_r7"(%[y])\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + /*mm7={27146,0x4000>>1}x2 \ + mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ + "psrad $16,%%mm4\n\t" \ + "mov $0x20006A0A,%[a]\n\t" \ + "pcmpeqw %%mm1,%%mm0\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psrad $16,%%mm5\n\t" \ + "psubw %%mm3,%%mm0\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "paddw %%mm1,%%mm0\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddw %%mm4,%%mm0\n\t" \ + /*mm6={0x00000E3D}x2 \ + mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \ + "movq %%mm2,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "mov $0x0E3D,%[a]\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movd %[a],%%mm6\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pxor %%mm1,%%mm1\n\t" \ + "punpckldq %%mm6,%%mm6\n\t" \ + "pcmpeqw %%mm2,%%mm1\n\t" \ + /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ + "psrad $16,%%mm4\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "psrad $16,%%mm5\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "movq "_r5"(%[y]),%%mm1\n\t" \ + "paddw %%mm2,%%mm4\n\t" \ + /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \ + The naive implementation could cause overflow, so we use \ + u=(r&s)+((r^s)>>1).*/ \ + "movq "_r3"(%[y]),%%mm2\n\t" \ + "movq %%mm0,%%mm7\n\t" \ + "pxor %%mm4,%%mm0\n\t" \ + "pand %%mm4,%%mm7\n\t" \ + "psraw $1,%%mm0\n\t" \ + "mov $0x7FFF54DC,%[a]\n\t" \ + "paddw %%mm7,%%mm0\n\t" \ + "movd %[a],%%mm7\n\t" \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm4=_y[4]=v=r-u*/ \ + "psubw %%mm0,%%mm4\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "movq %%mm4,"_r4"(%[y])\n\t" \ + /*mm0=0, mm7={36410}x4 \ + mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "punpcklwd %%mm1,%%mm4\n\t" \ + "mov $0x8E3A8E3A,%[a]\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "movq %%mm0,"_r0"(%[y])\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqw %%mm0,%%mm1\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddd %%mm6,%%mm4\n\t" \ + "paddd %%mm6,%%mm5\n\t" \ + /*mm0=0 \ + mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "paddw %%mm2,%%mm1\n\t" \ + "pmullw %%mm7,%%mm3\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + "punpckhwd %%mm6,%%mm3\n\t" \ + "punpcklwd %%mm6,%%mm1\n\t" \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ + "paddd %%mm3,%%mm5\n\t" \ + "paddd %%mm1,%%mm4\n\t" \ + "psrad $16,%%mm5\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqb %%mm3,%%mm3\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "psubw %%mm3,%%mm6\n\t" \ + /*mm1=t7'', mm7={26568,0x3400}x2 \ + mm2=s=t6'''-(36410*u>>16)*/ \ + "movq %%mm4,%%mm1\n\t" \ + "mov $0x340067C8,%[a]\n\t" \ + "pmulhw %%mm7,%%mm4\n\t" \ + "movd %[a],%%mm7\n\t" \ + "movq %%mm1,"_r5"(%[y])\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddw %%mm1,%%mm4\n\t" \ + "movq "_r7"(%[y]),%%mm1\n\t" \ + "psubw %%mm4,%%mm2\n\t" \ + /*mm6={0x00007B1B}x2 \ + mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \ + "movq %%mm2,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x7B1B,%[a]\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movd %[a],%%mm6\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "psubw %%mm3,%%mm0\n\t" \ + "punpckldq %%mm6,%%mm6\n\t" \ + /*mm7={64277-0x7FFF,0x7FFF}x2 \ + mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ + "psrad $17,%%mm4\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psrad $17,%%mm5\n\t" \ + "mov $0x7FFF7B16,%[a]\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "movd %[a],%%mm7\n\t" \ + "paddw %%mm4,%%mm2\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + /*mm0=0, mm7={12785}x4 \ + mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "movq %%mm2,"_r3"(%[y])\n\t" \ + "punpcklwd %%mm1,%%mm4\n\t" \ + "movq "_r1"(%[y]),%%mm2\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x31F131F1,%[a]\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "pcmpeqw %%mm0,%%mm1\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddd %%mm6,%%mm4\n\t" \ + "paddd %%mm6,%%mm5\n\t" \ + /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "pmullw %%mm7,%%mm3\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + "punpckhwd %%mm6,%%mm3\n\t" \ + "punpcklwd %%mm6,%%mm1\n\t" \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ + "paddd %%mm3,%%mm5\n\t" \ + "paddd %%mm1,%%mm4\n\t" \ + "psrad $16,%%mm5\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqb %%mm3,%%mm3\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "psubw %%mm3,%%mm6\n\t" \ + /*mm1=t3'', mm7={20539,0x3000}x2 \ + mm4=s=(12785*u>>16)-t4''*/ \ + "movq %%mm4,"_r1"(%[y])\n\t" \ + "pmulhw %%mm7,%%mm4\n\t" \ + "mov $0x3000503B,%[a]\n\t" \ + "movq "_r6"(%[y]),%%mm1\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm2,%%mm4\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + /*mm6={0x00006CB7}x2 \ + mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \ + "movq %%mm4,%%mm5\n\t" \ + "movq %%mm4,%%mm2\n\t" \ + "punpcklwd %%mm6,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x6CB7,%[a]\n\t" \ + "punpckhwd %%mm6,%%mm5\n\t" \ + "movd %[a],%%mm6\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "psubw %%mm3,%%mm0\n\t" \ + "punpckldq %%mm6,%%mm6\n\t" \ + /*mm7={60547-0x7FFF,0x7FFF}x2 \ + mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ + "psrad $20,%%mm4\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psrad $20,%%mm5\n\t" \ + "mov $0x7FFF6C84,%[a]\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + "movd %[a],%%mm7\n\t" \ + "paddw %%mm4,%%mm2\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + /*mm0=0, mm7={25080}x4 \ + mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \ + "movq %%mm1,%%mm4\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "movq %%mm2,"_r7"(%[y])\n\t" \ + "punpcklwd %%mm1,%%mm4\n\t" \ + "movq "_r2"(%[y]),%%mm2\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "mov $0x61F861F8,%[a]\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + "pxor %%mm0,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm5\n\t" \ + "movd %[a],%%mm7\n\t" \ + "pcmpeqw %%mm0,%%mm1\n\t" \ + "psubw %%mm3,%%mm1\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "paddd %%mm6,%%mm4\n\t" \ + "paddd %%mm6,%%mm5\n\t" \ + /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \ + "movq %%mm2,%%mm6\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "pmullw %%mm7,%%mm3\n\t" \ + "paddw %%mm1,%%mm6\n\t" \ + "movq %%mm3,%%mm1\n\t" \ + "punpckhwd %%mm6,%%mm3\n\t" \ + "punpcklwd %%mm6,%%mm1\n\t" \ + /*mm1={-1}x4 \ + mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ + "paddd %%mm3,%%mm5\n\t" \ + "paddd %%mm1,%%mm4\n\t" \ + "psrad $16,%%mm5\n\t" \ + "mov $0x28005460,%[a]\n\t" \ + "psrad $16,%%mm4\n\t" \ + "pcmpeqb %%mm1,%%mm1\n\t" \ + "packssdw %%mm5,%%mm4\n\t" \ + /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \ + mm4=s=(25080*u>>16)-t2''*/ \ + "movq %%mm4,%%mm6\n\t" \ + "pmulhw %%mm7,%%mm4\n\t" \ + "pxor %%mm5,%%mm5\n\t" \ + "movd %[a],%%mm7\n\t" \ + "psubw %%mm1,%%mm5\n\t" \ + "punpckldq %%mm7,%%mm7\n\t" \ + "psubw %%mm2,%%mm4\n\t" \ + /*mm2=s+(s!=0) \ + mm4:mm3=s*21600+0x2800*/ \ + "movq %%mm4,%%mm3\n\t" \ + "movq %%mm4,%%mm2\n\t" \ + "punpckhwd %%mm5,%%mm4\n\t" \ + "pcmpeqw %%mm2,%%mm0\n\t" \ + "pmaddwd %%mm7,%%mm4\n\t" \ + "psubw %%mm1,%%mm0\n\t" \ + "punpcklwd %%mm5,%%mm3\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "pmaddwd %%mm7,%%mm3\n\t" \ + /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \ + mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ + "movq "_r4"(%[y]),%%mm0\n\t" \ + "psrad $18,%%mm4\n\t" \ + "movq "_r5"(%[y]),%%mm5\n\t" \ + "psrad $18,%%mm3\n\t" \ + "movq "_r7"(%[y]),%%mm1\n\t" \ + "packssdw %%mm4,%%mm3\n\t" \ + "movq "_r0"(%[y]),%%mm4\n\t" \ + "paddw %%mm2,%%mm3\n\t" \ + +/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7]. + On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and + {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/ +# define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) \ + "#OC_TRANSPOSE8x4\n\t" \ + /*First 4x4 transpose:*/ \ + /*mm0 = e3 e2 e1 e0 \ + mm5 = f3 f2 f1 f0 \ + mm3 = g3 g2 g1 g0 \ + mm1 = h3 h2 h1 h0*/ \ + "movq %%mm0,%%mm2\n\t" \ + "punpcklwd %%mm5,%%mm0\n\t" \ + "punpckhwd %%mm5,%%mm2\n\t" \ + "movq %%mm3,%%mm5\n\t" \ + "punpcklwd %%mm1,%%mm3\n\t" \ + "punpckhwd %%mm1,%%mm5\n\t" \ + /*mm0 = f1 e1 f0 e0 \ + mm2 = f3 e3 f2 e2 \ + mm3 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + "movq %%mm0,%%mm1\n\t" \ + "punpckldq %%mm3,%%mm0\n\t" \ + "movq %%mm0,"_r4"(%[y])\n\t" \ + "punpckhdq %%mm3,%%mm1\n\t" \ + "movq "_r1"(%[y]),%%mm0\n\t" \ + "movq %%mm2,%%mm3\n\t" \ + "punpckldq %%mm5,%%mm2\n\t" \ + "punpckhdq %%mm5,%%mm3\n\t" \ + "movq "_r3"(%[y]),%%mm5\n\t" \ + /*_y[4] = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm0 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm5 = d3 d2 d1 d0*/ \ + "movq %%mm4,%%mm7\n\t" \ + "punpcklwd %%mm0,%%mm4\n\t" \ + "punpckhwd %%mm0,%%mm7\n\t" \ + "movq %%mm6,%%mm0\n\t" \ + "punpcklwd %%mm5,%%mm6\n\t" \ + "punpckhwd %%mm5,%%mm0\n\t" \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + "movq %%mm4,%%mm5\n\t" \ + "punpckldq %%mm6,%%mm4\n\t" \ + "punpckhdq %%mm6,%%mm5\n\t" \ + "movq %%mm7,%%mm6\n\t" \ + "punpckhdq %%mm0,%%mm7\n\t" \ + "punpckldq %%mm0,%%mm6\n\t" \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ + +/*MMX implementation of the fDCT.*/ +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ptrdiff_t a; + __asm__ __volatile__( + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + /*We also add biases to correct for some systematic error that remains in + the full fDCT->iDCT round trip.*/ + "movq 0x00(%[x]),%%mm0\n\t" + "movq 0x10(%[x]),%%mm1\n\t" + "movq 0x20(%[x]),%%mm2\n\t" + "movq 0x30(%[x]),%%mm3\n\t" + "pcmpeqb %%mm4,%%mm4\n\t" + "pxor %%mm7,%%mm7\n\t" + "movq %%mm0,%%mm5\n\t" + "psllw $2,%%mm0\n\t" + "pcmpeqw %%mm7,%%mm5\n\t" + "movq 0x70(%[x]),%%mm7\n\t" + "psllw $2,%%mm1\n\t" + "psubw %%mm4,%%mm5\n\t" + "psllw $2,%%mm2\n\t" + "mov $1,%[a]\n\t" + "pslld $16,%%mm5\n\t" + "movd %[a],%%mm6\n\t" + "psllq $16,%%mm5\n\t" + "mov $0x10001,%[a]\n\t" + "psllw $2,%%mm3\n\t" + "movd %[a],%%mm4\n\t" + "punpckhwd %%mm6,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "movq 0x60(%[x]),%%mm6\n\t" + "paddw %%mm5,%%mm0\n\t" + "movq 0x50(%[x]),%%mm5\n\t" + "paddw %%mm4,%%mm0\n\t" + "movq 0x40(%[x]),%%mm4\n\t" + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + "psllw $2,%%mm7\n\t" + "psubw %%mm7,%%mm0\n\t" + "psllw $2,%%mm6\n\t" + "paddw %%mm7,%%mm7\n\t" + /*mm1=t6'=t1-t6*/ + "psllw $2,%%mm5\n\t" + "psubw %%mm6,%%mm1\n\t" + "psllw $2,%%mm4\n\t" + "paddw %%mm6,%%mm6\n\t" + /*mm2=t5'=t2-t5*/ + "psubw %%mm5,%%mm2\n\t" + "paddw %%mm5,%%mm5\n\t" + /*mm3=t4'=t3-t4*/ + "psubw %%mm4,%%mm3\n\t" + "paddw %%mm4,%%mm4\n\t" + /*mm7=t0'=t0+t7*/ + "paddw %%mm0,%%mm7\n\t" + /*mm6=t1'=t1+t6*/ + "paddw %%mm1,%%mm6\n\t" + /*mm5=t2'=t2+t5*/ + "paddw %%mm2,%%mm5\n\t" + /*mm4=t3'=t3+t4*/ + "paddw %%mm3,%%mm4\n\t" + OC_FDCT8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70") + OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x40","0x50","0x60","0x70") + /*Swap out this 8x4 block for the next one.*/ + "movq 0x08(%[x]),%%mm0\n\t" + "movq %%mm7,0x30(%[y])\n\t" + "movq 0x78(%[x]),%%mm7\n\t" + "movq %%mm1,0x50(%[y])\n\t" + "movq 0x18(%[x]),%%mm1\n\t" + "movq %%mm6,0x20(%[y])\n\t" + "movq 0x68(%[x]),%%mm6\n\t" + "movq %%mm2,0x60(%[y])\n\t" + "movq 0x28(%[x]),%%mm2\n\t" + "movq %%mm5,0x10(%[y])\n\t" + "movq 0x58(%[x]),%%mm5\n\t" + "movq %%mm3,0x70(%[y])\n\t" + "movq 0x38(%[x]),%%mm3\n\t" + /*And increase its working precision, too.*/ + "psllw $2,%%mm0\n\t" + "movq %%mm4,0x00(%[y])\n\t" + "psllw $2,%%mm7\n\t" + "movq 0x48(%[x]),%%mm4\n\t" + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + "psubw %%mm7,%%mm0\n\t" + "psllw $2,%%mm1\n\t" + "paddw %%mm7,%%mm7\n\t" + "psllw $2,%%mm6\n\t" + /*mm1=t6'=t1-t6*/ + "psubw %%mm6,%%mm1\n\t" + "psllw $2,%%mm2\n\t" + "paddw %%mm6,%%mm6\n\t" + "psllw $2,%%mm5\n\t" + /*mm2=t5'=t2-t5*/ + "psubw %%mm5,%%mm2\n\t" + "psllw $2,%%mm3\n\t" + "paddw %%mm5,%%mm5\n\t" + "psllw $2,%%mm4\n\t" + /*mm3=t4'=t3-t4*/ + "psubw %%mm4,%%mm3\n\t" + "paddw %%mm4,%%mm4\n\t" + /*mm7=t0'=t0+t7*/ + "paddw %%mm0,%%mm7\n\t" + /*mm6=t1'=t1+t6*/ + "paddw %%mm1,%%mm6\n\t" + /*mm5=t2'=t2+t5*/ + "paddw %%mm2,%%mm5\n\t" + /*mm4=t3'=t3+t4*/ + "paddw %%mm3,%%mm4\n\t" + OC_FDCT8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78") + OC_TRANSPOSE8x4("0x08","0x18","0x28","0x38","0x48","0x58","0x68","0x78") + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, + so we only have to do half the stores and loads.*/ + "movq 0x00(%[y]),%%mm0\n\t" + "movq %%mm1,0x58(%[y])\n\t" + "movq 0x10(%[y]),%%mm1\n\t" + "movq %%mm2,0x68(%[y])\n\t" + "movq 0x20(%[y]),%%mm2\n\t" + "movq %%mm3,0x78(%[y])\n\t" + "movq 0x30(%[y]),%%mm3\n\t" + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38") + OC_TRANSPOSE8x4("0x00","0x10","0x20","0x30","0x08","0x18","0x28","0x38") + /*mm0={-2}x4*/ + "pcmpeqw %%mm0,%%mm0\n\t" + "paddw %%mm0,%%mm0\n\t" + /*Round the results.*/ + "psubw %%mm0,%%mm1\n\t" + "psubw %%mm0,%%mm2\n\t" + "psraw $2,%%mm1\n\t" + "psubw %%mm0,%%mm3\n\t" + "movq %%mm1,0x18(%[y])\n\t" + "psraw $2,%%mm2\n\t" + "psubw %%mm0,%%mm4\n\t" + "movq 0x08(%[y]),%%mm1\n\t" + "psraw $2,%%mm3\n\t" + "psubw %%mm0,%%mm5\n\t" + "psraw $2,%%mm4\n\t" + "psubw %%mm0,%%mm6\n\t" + "psraw $2,%%mm5\n\t" + "psubw %%mm0,%%mm7\n\t" + "psraw $2,%%mm6\n\t" + "psubw %%mm0,%%mm1\n\t" + "psraw $2,%%mm7\n\t" + "movq 0x40(%[y]),%%mm0\n\t" + "psraw $2,%%mm1\n\t" + "movq %%mm7,0x30(%[y])\n\t" + "movq 0x78(%[y]),%%mm7\n\t" + "movq %%mm1,0x08(%[y])\n\t" + "movq 0x50(%[y]),%%mm1\n\t" + "movq %%mm6,0x20(%[y])\n\t" + "movq 0x68(%[y]),%%mm6\n\t" + "movq %%mm2,0x28(%[y])\n\t" + "movq 0x60(%[y]),%%mm2\n\t" + "movq %%mm5,0x10(%[y])\n\t" + "movq 0x58(%[y]),%%mm5\n\t" + "movq %%mm3,0x38(%[y])\n\t" + "movq 0x70(%[y]),%%mm3\n\t" + "movq %%mm4,0x00(%[y])\n\t" + "movq 0x48(%[y]),%%mm4\n\t" + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78") + OC_TRANSPOSE8x4("0x40","0x50","0x60","0x70","0x48","0x58","0x68","0x78") + /*mm0={-2}x4*/ + "pcmpeqw %%mm0,%%mm0\n\t" + "paddw %%mm0,%%mm0\n\t" + /*Round the results.*/ + "psubw %%mm0,%%mm1\n\t" + "psubw %%mm0,%%mm2\n\t" + "psraw $2,%%mm1\n\t" + "psubw %%mm0,%%mm3\n\t" + "movq %%mm1,0x58(%[y])\n\t" + "psraw $2,%%mm2\n\t" + "psubw %%mm0,%%mm4\n\t" + "movq 0x48(%[y]),%%mm1\n\t" + "psraw $2,%%mm3\n\t" + "psubw %%mm0,%%mm5\n\t" + "movq %%mm2,0x68(%[y])\n\t" + "psraw $2,%%mm4\n\t" + "psubw %%mm0,%%mm6\n\t" + "movq %%mm3,0x78(%[y])\n\t" + "psraw $2,%%mm5\n\t" + "psubw %%mm0,%%mm7\n\t" + "movq %%mm4,0x40(%[y])\n\t" + "psraw $2,%%mm6\n\t" + "psubw %%mm0,%%mm1\n\t" + "movq %%mm5,0x50(%[y])\n\t" + "psraw $2,%%mm7\n\t" + "movq %%mm6,0x60(%[y])\n\t" + "psraw $2,%%mm1\n\t" + "movq %%mm7,0x70(%[y])\n\t" + "movq %%mm1,0x48(%[y])\n\t" + :[a]"=&r"(a) + :[y]"r"(_y),[x]"r"(_x) + :"memory" + ); +} + +#endif diff --git a/thirdparty/libtheora/x86/mmxfrag.c b/thirdparty/libtheora/x86/mmxfrag.c new file mode 100644 index 0000000000..2c732939c3 --- /dev/null +++ b/thirdparty/libtheora/x86/mmxfrag.c @@ -0,0 +1,293 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxfrag.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of fragment reconstruction for motion compensation. + Originally written by Rudolf Marek. + Additional optimization by Nils Pipenbrinck. + Note: Loops are unrolled for best performance. + The iteration each instruction belongs to is marked in the comments as #i.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride){ + OC_FRAG_COPY_MMX(_dst,_src,_ystride); +} + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue){ + __asm__ __volatile__( + /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ + "pcmpeqw %%mm0,%%mm0\n\t" + /*#0 Load low residue.*/ + "movq 0*8(%[residue]),%%mm1\n\t" + /*#0 Load high residue.*/ + "movq 1*8(%[residue]),%%mm2\n\t" + /*Set mm0 to 0x8000800080008000.*/ + "psllw $15,%%mm0\n\t" + /*#1 Load low residue.*/ + "movq 2*8(%[residue]),%%mm3\n\t" + /*#1 Load high residue.*/ + "movq 3*8(%[residue]),%%mm4\n\t" + /*Set mm0 to 0x0080008000800080.*/ + "psrlw $8,%%mm0\n\t" + /*#2 Load low residue.*/ + "movq 4*8(%[residue]),%%mm5\n\t" + /*#2 Load high residue.*/ + "movq 5*8(%[residue]),%%mm6\n\t" + /*#0 Bias low residue.*/ + "paddsw %%mm0,%%mm1\n\t" + /*#0 Bias high residue.*/ + "paddsw %%mm0,%%mm2\n\t" + /*#0 Pack to byte.*/ + "packuswb %%mm2,%%mm1\n\t" + /*#1 Bias low residue.*/ + "paddsw %%mm0,%%mm3\n\t" + /*#1 Bias high residue.*/ + "paddsw %%mm0,%%mm4\n\t" + /*#1 Pack to byte.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#2 Bias low residue.*/ + "paddsw %%mm0,%%mm5\n\t" + /*#2 Bias high residue.*/ + "paddsw %%mm0,%%mm6\n\t" + /*#2 Pack to byte.*/ + "packuswb %%mm6,%%mm5\n\t" + /*#0 Write row.*/ + "movq %%mm1,(%[dst])\n\t" + /*#1 Write row.*/ + "movq %%mm3,(%[dst],%[ystride])\n\t" + /*#2 Write row.*/ + "movq %%mm5,(%[dst],%[ystride],2)\n\t" + /*#3 Load low residue.*/ + "movq 6*8(%[residue]),%%mm1\n\t" + /*#3 Load high residue.*/ + "movq 7*8(%[residue]),%%mm2\n\t" + /*#4 Load high residue.*/ + "movq 8*8(%[residue]),%%mm3\n\t" + /*#4 Load high residue.*/ + "movq 9*8(%[residue]),%%mm4\n\t" + /*#5 Load high residue.*/ + "movq 10*8(%[residue]),%%mm5\n\t" + /*#5 Load high residue.*/ + "movq 11*8(%[residue]),%%mm6\n\t" + /*#3 Bias low residue.*/ + "paddsw %%mm0,%%mm1\n\t" + /*#3 Bias high residue.*/ + "paddsw %%mm0,%%mm2\n\t" + /*#3 Pack to byte.*/ + "packuswb %%mm2,%%mm1\n\t" + /*#4 Bias low residue.*/ + "paddsw %%mm0,%%mm3\n\t" + /*#4 Bias high residue.*/ + "paddsw %%mm0,%%mm4\n\t" + /*#4 Pack to byte.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#5 Bias low residue.*/ + "paddsw %%mm0,%%mm5\n\t" + /*#5 Bias high residue.*/ + "paddsw %%mm0,%%mm6\n\t" + /*#5 Pack to byte.*/ + "packuswb %%mm6,%%mm5\n\t" + /*#3 Write row.*/ + "movq %%mm1,(%[dst],%[ystride3])\n\t" + /*#4 Write row.*/ + "movq %%mm3,(%[dst4])\n\t" + /*#5 Write row.*/ + "movq %%mm5,(%[dst4],%[ystride])\n\t" + /*#6 Load low residue.*/ + "movq 12*8(%[residue]),%%mm1\n\t" + /*#6 Load high residue.*/ + "movq 13*8(%[residue]),%%mm2\n\t" + /*#7 Load low residue.*/ + "movq 14*8(%[residue]),%%mm3\n\t" + /*#7 Load high residue.*/ + "movq 15*8(%[residue]),%%mm4\n\t" + /*#6 Bias low residue.*/ + "paddsw %%mm0,%%mm1\n\t" + /*#6 Bias high residue.*/ + "paddsw %%mm0,%%mm2\n\t" + /*#6 Pack to byte.*/ + "packuswb %%mm2,%%mm1\n\t" + /*#7 Bias low residue.*/ + "paddsw %%mm0,%%mm3\n\t" + /*#7 Bias high residue.*/ + "paddsw %%mm0,%%mm4\n\t" + /*#7 Pack to byte.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#6 Write row.*/ + "movq %%mm1,(%[dst4],%[ystride],2)\n\t" + /*#7 Write row.*/ + "movq %%mm3,(%[dst4],%[ystride3])\n\t" + : + :[residue]"r"(_residue), + [dst]"r"(_dst), + [dst4]"r"(_dst+(_ystride<<2)), + [ystride]"r"((ptrdiff_t)_ystride), + [ystride3]"r"((ptrdiff_t)_ystride*3) + :"memory" + ); +} + +void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, + int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm0.*/ + __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::); + for(i=4;i-->0;){ + __asm__ __volatile__( + /*#0 Load source.*/ + "movq (%[src]),%%mm3\n\t" + /*#1 Load source.*/ + "movq (%[src],%[ystride]),%%mm7\n\t" + /*#0 Get copy of src.*/ + "movq %%mm3,%%mm4\n\t" + /*#0 Expand high source.*/ + "punpckhbw %%mm0,%%mm4\n\t" + /*#0 Expand low source.*/ + "punpcklbw %%mm0,%%mm3\n\t" + /*#0 Add residue high.*/ + "paddsw 8(%[residue]),%%mm4\n\t" + /*#1 Get copy of src.*/ + "movq %%mm7,%%mm2\n\t" + /*#0 Add residue low.*/ + "paddsw (%[residue]), %%mm3\n\t" + /*#1 Expand high source.*/ + "punpckhbw %%mm0,%%mm2\n\t" + /*#0 Pack final row pixels.*/ + "packuswb %%mm4,%%mm3\n\t" + /*#1 Expand low source.*/ + "punpcklbw %%mm0,%%mm7\n\t" + /*#1 Add residue low.*/ + "paddsw 16(%[residue]),%%mm7\n\t" + /*#1 Add residue high.*/ + "paddsw 24(%[residue]),%%mm2\n\t" + /*Advance residue.*/ + "lea 32(%[residue]),%[residue]\n\t" + /*#1 Pack final row pixels.*/ + "packuswb %%mm2,%%mm7\n\t" + /*Advance src.*/ + "lea (%[src],%[ystride],2),%[src]\n\t" + /*#0 Write row.*/ + "movq %%mm3,(%[dst])\n\t" + /*#1 Write row.*/ + "movq %%mm7,(%[dst],%[ystride])\n\t" + /*Advance dst.*/ + "lea (%[dst],%[ystride],2),%[dst]\n\t" + :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src) + :[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); + } +} + +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm7.*/ + __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::); + for(i=4;i-->0;){ + __asm__ __volatile__( + /*#0 Load src1.*/ + "movq (%[src1]),%%mm0\n\t" + /*#0 Load src2.*/ + "movq (%[src2]),%%mm2\n\t" + /*#0 Copy src1.*/ + "movq %%mm0,%%mm1\n\t" + /*#0 Copy src2.*/ + "movq %%mm2,%%mm3\n\t" + /*#1 Load src1.*/ + "movq (%[src1],%[ystride]),%%mm4\n\t" + /*#0 Unpack lower src1.*/ + "punpcklbw %%mm7,%%mm0\n\t" + /*#1 Load src2.*/ + "movq (%[src2],%[ystride]),%%mm5\n\t" + /*#0 Unpack higher src1.*/ + "punpckhbw %%mm7,%%mm1\n\t" + /*#0 Unpack lower src2.*/ + "punpcklbw %%mm7,%%mm2\n\t" + /*#0 Unpack higher src2.*/ + "punpckhbw %%mm7,%%mm3\n\t" + /*Advance src1 ptr.*/ + "lea (%[src1],%[ystride],2),%[src1]\n\t" + /*Advance src2 ptr.*/ + "lea (%[src2],%[ystride],2),%[src2]\n\t" + /*#0 Lower src1+src2.*/ + "paddsw %%mm2,%%mm0\n\t" + /*#0 Higher src1+src2.*/ + "paddsw %%mm3,%%mm1\n\t" + /*#1 Copy src1.*/ + "movq %%mm4,%%mm2\n\t" + /*#0 Build lo average.*/ + "psraw $1,%%mm0\n\t" + /*#1 Copy src2.*/ + "movq %%mm5,%%mm3\n\t" + /*#1 Unpack lower src1.*/ + "punpcklbw %%mm7,%%mm4\n\t" + /*#0 Build hi average.*/ + "psraw $1,%%mm1\n\t" + /*#1 Unpack higher src1.*/ + "punpckhbw %%mm7,%%mm2\n\t" + /*#0 low+=residue.*/ + "paddsw (%[residue]),%%mm0\n\t" + /*#1 Unpack lower src2.*/ + "punpcklbw %%mm7,%%mm5\n\t" + /*#0 high+=residue.*/ + "paddsw 8(%[residue]),%%mm1\n\t" + /*#1 Unpack higher src2.*/ + "punpckhbw %%mm7,%%mm3\n\t" + /*#1 Lower src1+src2.*/ + "paddsw %%mm4,%%mm5\n\t" + /*#0 Pack and saturate.*/ + "packuswb %%mm1,%%mm0\n\t" + /*#1 Higher src1+src2.*/ + "paddsw %%mm2,%%mm3\n\t" + /*#0 Write row.*/ + "movq %%mm0,(%[dst])\n\t" + /*#1 Build lo average.*/ + "psraw $1,%%mm5\n\t" + /*#1 Build hi average.*/ + "psraw $1,%%mm3\n\t" + /*#1 low+=residue.*/ + "paddsw 16(%[residue]),%%mm5\n\t" + /*#1 high+=residue.*/ + "paddsw 24(%[residue]),%%mm3\n\t" + /*#1 Pack and saturate.*/ + "packuswb %%mm3,%%mm5\n\t" + /*#1 Write row ptr.*/ + "movq %%mm5,(%[dst],%[ystride])\n\t" + /*Advance residue ptr.*/ + "add $32,%[residue]\n\t" + /*Advance dest ptr.*/ + "lea (%[dst],%[ystride],2),%[dst]\n\t" + :[dst]"+r"(_dst),[residue]"+r"(_residue), + [src1]"+%r"(_src1),[src2]"+r"(_src2) + :[ystride]"r"((ptrdiff_t)_ystride) + :"memory" + ); + } +} + +void oc_restore_fpu_mmx(void){ + __asm__ __volatile__("emms\n\t"); +} +#endif diff --git a/thirdparty/libtheora/x86/mmxfrag.h b/thirdparty/libtheora/x86/mmxfrag.h new file mode 100644 index 0000000000..a398427629 --- /dev/null +++ b/thirdparty/libtheora/x86/mmxfrag.h @@ -0,0 +1,64 @@ +#if !defined(_x86_mmxfrag_H) +# define _x86_mmxfrag_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ + do{ \ + const unsigned char *src; \ + unsigned char *dst; \ + ptrdiff_t ystride3; \ + src=(_src); \ + dst=(_dst); \ + __asm__ __volatile__( \ + /*src+0*ystride*/ \ + "movq (%[src]),%%mm0\n\t" \ + /*src+1*ystride*/ \ + "movq (%[src],%[ystride]),%%mm1\n\t" \ + /*ystride3=ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*src+2*ystride*/ \ + "movq (%[src],%[ystride],2),%%mm2\n\t" \ + /*src+3*ystride*/ \ + "movq (%[src],%[ystride3]),%%mm3\n\t" \ + /*dst+0*ystride*/ \ + "movq %%mm0,(%[dst])\n\t" \ + /*dst+1*ystride*/ \ + "movq %%mm1,(%[dst],%[ystride])\n\t" \ + /*Pointer to next 4.*/ \ + "lea (%[src],%[ystride],4),%[src]\n\t" \ + /*dst+2*ystride*/ \ + "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ + /*dst+3*ystride*/ \ + "movq %%mm3,(%[dst],%[ystride3])\n\t" \ + /*Pointer to next 4.*/ \ + "lea (%[dst],%[ystride],4),%[dst]\n\t" \ + /*src+0*ystride*/ \ + "movq (%[src]),%%mm0\n\t" \ + /*src+1*ystride*/ \ + "movq (%[src],%[ystride]),%%mm1\n\t" \ + /*src+2*ystride*/ \ + "movq (%[src],%[ystride],2),%%mm2\n\t" \ + /*src+3*ystride*/ \ + "movq (%[src],%[ystride3]),%%mm3\n\t" \ + /*dst+0*ystride*/ \ + "movq %%mm0,(%[dst])\n\t" \ + /*dst+1*ystride*/ \ + "movq %%mm1,(%[dst],%[ystride])\n\t" \ + /*dst+2*ystride*/ \ + "movq %%mm2,(%[dst],%[ystride],2)\n\t" \ + /*dst+3*ystride*/ \ + "movq %%mm3,(%[dst],%[ystride3])\n\t" \ + :[dst]"+r"(dst),[src]"+r"(src),[ystride3]"=&r"(ystride3) \ + :[ystride]"r"((ptrdiff_t)(_ystride)) \ + :"memory" \ + ); \ + } \ + while(0) + +# endif +#endif diff --git a/thirdparty/libtheora/x86/mmxidct.c b/thirdparty/libtheora/x86/mmxidct.c new file mode 100644 index 0000000000..76424e6364 --- /dev/null +++ b/thirdparty/libtheora/x86/mmxidct.c @@ -0,0 +1,564 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of Theora's iDCT. + Originally written by Rudolf Marek, based on code from On2's VP3.*/ +#include "x86int.h" +#include "../dct.h" + +#if defined(OC_X86_ASM) + +/*These are offsets into the table of constants below.*/ +/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ +#define OC_COSINE_OFFSET (0) +/*A row of 8's.*/ +#define OC_EIGHT_OFFSET (56) + + + +/*A table of constants used by the MMX routines.*/ +static const ogg_uint16_t __attribute__((aligned(8),used)) + OC_IDCT_CONSTS[(7+1)*4]={ + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + 8, 8, 8, 8 +}; + +/*Converts the expression in the argument to a string.*/ +#define OC_M2STR(_s) #_s + +/*38 cycles*/ +#define OC_IDCT_BEGIN \ + "#OC_IDCT_BEGIN\n\t" \ + "movq "OC_I(3)",%%mm2\n\t" \ + "movq "OC_C(3)",%%mm6\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "movq "OC_J(5)",%%mm7\n\t" \ + "pmulhw %%mm6,%%mm4\n\t" \ + "movq "OC_C(5)",%%mm1\n\t" \ + "pmulhw %%mm7,%%mm6\n\t" \ + "movq %%mm1,%%mm5\n\t" \ + "pmulhw %%mm2,%%mm1\n\t" \ + "movq "OC_I(1)",%%mm3\n\t" \ + "pmulhw %%mm7,%%mm5\n\t" \ + "movq "OC_C(1)",%%mm0\n\t" \ + "paddw %%mm2,%%mm4\n\t" \ + "paddw %%mm7,%%mm6\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "movq "OC_J(7)",%%mm1\n\t" \ + "paddw %%mm5,%%mm7\n\t" \ + "movq %%mm0,%%mm5\n\t" \ + "pmulhw %%mm3,%%mm0\n\t" \ + "paddw %%mm7,%%mm4\n\t" \ + "pmulhw %%mm1,%%mm5\n\t" \ + "movq "OC_C(7)",%%mm7\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "paddw %%mm3,%%mm0\n\t" \ + "pmulhw %%mm7,%%mm3\n\t" \ + "movq "OC_I(2)",%%mm2\n\t" \ + "pmulhw %%mm1,%%mm7\n\t" \ + "paddw %%mm1,%%mm5\n\t" \ + "movq %%mm2,%%mm1\n\t" \ + "pmulhw "OC_C(2)",%%mm2\n\t" \ + "psubw %%mm5,%%mm3\n\t" \ + "movq "OC_J(6)",%%mm5\n\t" \ + "paddw %%mm7,%%mm0\n\t" \ + "movq %%mm5,%%mm7\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + "pmulhw "OC_C(2)",%%mm5\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "pmulhw "OC_C(6)",%%mm1\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "psubw %%mm6,%%mm3\n\t" \ + "paddw %%mm7,%%mm5\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + "pmulhw "OC_C(6)",%%mm7\n\t" \ + "paddw %%mm3,%%mm6\n\t" \ + "movq %%mm4,"OC_I(1)"\n\t" \ + "psubw %%mm5,%%mm1\n\t" \ + "movq "OC_C(4)",%%mm4\n\t" \ + "movq %%mm3,%%mm5\n\t" \ + "pmulhw %%mm4,%%mm3\n\t" \ + "paddw %%mm2,%%mm7\n\t" \ + "movq %%mm6,"OC_I(2)"\n\t" \ + "movq %%mm0,%%mm2\n\t" \ + "movq "OC_I(0)",%%mm6\n\t" \ + "pmulhw %%mm4,%%mm0\n\t" \ + "paddw %%mm3,%%mm5\n\t" \ + "movq "OC_J(4)",%%mm3\n\t" \ + "psubw %%mm1,%%mm5\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psubw %%mm3,%%mm6\n\t" \ + "movq %%mm6,%%mm0\n\t" \ + "pmulhw %%mm4,%%mm6\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "paddw %%mm0,%%mm3\n\t" \ + "paddw %%mm5,%%mm1\n\t" \ + "pmulhw %%mm3,%%mm4\n\t" \ + "paddw %%mm0,%%mm6\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "movq "OC_I(1)",%%mm0\n\t" \ + "paddw %%mm6,%%mm2\n\t" \ + "paddw %%mm3,%%mm4\n\t" \ + "psubw %%mm1,%%mm2\n\t" \ + "#end OC_IDCT_BEGIN\n\t" \ + +/*38+8=46 cycles.*/ +#define OC_ROW_IDCT \ + "#OC_ROW_IDCT\n" \ + OC_IDCT_BEGIN \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw %%mm0,%%mm0\n\t" \ + /*Save R1.*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r0=R0=G.+C.*/ \ + "paddw %%mm7,%%mm0\n\t" \ + "#end OC_ROW_IDCT\n\t" \ + +/*The following macro does two 4x4 transposes in place. + At entry, we assume: + r0 = a3 a2 a1 a0 + I(1) = b3 b2 b1 b0 + r2 = c3 c2 c1 c0 + r3 = d3 d2 d1 d0 + + r4 = e3 e2 e1 e0 + r5 = f3 f2 f1 f0 + r6 = g3 g2 g1 g0 + r7 = h3 h2 h1 h0 + + At exit, we have: + I(0) = d0 c0 b0 a0 + I(1) = d1 c1 b1 a1 + I(2) = d2 c2 b2 a2 + I(3) = d3 c3 b3 a3 + + J(4) = h0 g0 f0 e0 + J(5) = h1 g1 f1 e1 + J(6) = h2 g2 f2 e2 + J(7) = h3 g3 f3 e3 + + I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + + Since r1 is free at entry, we calculate the Js first.*/ +/*19 cycles.*/ +#define OC_TRANSPOSE \ + "#OC_TRANSPOSE\n\t" \ + "movq %%mm4,%%mm1\n\t" \ + "punpcklwd %%mm5,%%mm4\n\t" \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "punpckhwd %%mm5,%%mm1\n\t" \ + "movq %%mm6,%%mm0\n\t" \ + "punpcklwd %%mm7,%%mm6\n\t" \ + "movq %%mm4,%%mm5\n\t" \ + "punpckldq %%mm6,%%mm4\n\t" \ + "punpckhdq %%mm6,%%mm5\n\t" \ + "movq %%mm1,%%mm6\n\t" \ + "movq %%mm4,"OC_J(4)"\n\t" \ + "punpckhwd %%mm7,%%mm0\n\t" \ + "movq %%mm5,"OC_J(5)"\n\t" \ + "punpckhdq %%mm0,%%mm6\n\t" \ + "movq "OC_I(0)",%%mm4\n\t" \ + "punpckldq %%mm0,%%mm1\n\t" \ + "movq "OC_I(1)",%%mm5\n\t" \ + "movq %%mm4,%%mm0\n\t" \ + "movq %%mm6,"OC_J(7)"\n\t" \ + "punpcklwd %%mm5,%%mm0\n\t" \ + "movq %%mm1,"OC_J(6)"\n\t" \ + "punpckhwd %%mm5,%%mm4\n\t" \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklwd %%mm3,%%mm2\n\t" \ + "movq %%mm0,%%mm1\n\t" \ + "punpckldq %%mm2,%%mm0\n\t" \ + "punpckhdq %%mm2,%%mm1\n\t" \ + "movq %%mm4,%%mm2\n\t" \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "punpckhwd %%mm3,%%mm5\n\t" \ + "movq %%mm1,"OC_I(1)"\n\t" \ + "punpckhdq %%mm5,%%mm4\n\t" \ + "punpckldq %%mm5,%%mm2\n\t" \ + "movq %%mm4,"OC_I(3)"\n\t" \ + "movq %%mm2,"OC_I(2)"\n\t" \ + "#end OC_TRANSPOSE\n\t" \ + +/*38+19=57 cycles.*/ +#define OC_COLUMN_IDCT \ + "#OC_COLUMN_IDCT\n" \ + OC_IDCT_BEGIN \ + "paddw "OC_8",%%mm2\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r2=NR2*/ \ + "psraw $4,%%mm2\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=NR1*/ \ + "psraw $4,%%mm1\n\t" \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*Store NR2 at I(2).*/ \ + "movq %%mm2,"OC_I(2)"\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*Store NR1 at I(1).*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw "OC_8",%%mm4\n\t" \ + /*r3=D'+D'*/ \ + "paddw %%mm3,%%mm3\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r4=NR4*/ \ + "psraw $4,%%mm4\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + /*r3=NR3*/ \ + "psraw $4,%%mm3\n\t" \ + "paddw "OC_8",%%mm6\n\t" \ + /*r5=B''+B''*/ \ + "paddw %%mm5,%%mm5\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r6=NR6*/ \ + "psraw $4,%%mm6\n\t" \ + /*Store NR4 at J(4).*/ \ + "movq %%mm4,"OC_J(4)"\n\t" \ + /*r5=NR5*/ \ + "psraw $4,%%mm5\n\t" \ + /*Store NR3 at I(3).*/ \ + "movq %%mm3,"OC_I(3)"\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw "OC_8",%%mm7\n\t" \ + /*r0=C'+C'*/ \ + "paddw %%mm0,%%mm0\n\t" \ + /*r0=R0=G'+C'*/ \ + "paddw %%mm7,%%mm0\n\t" \ + /*r7=NR7*/ \ + "psraw $4,%%mm7\n\t" \ + /*Store NR6 at J(6).*/ \ + "movq %%mm6,"OC_J(6)"\n\t" \ + /*r0=NR0*/ \ + "psraw $4,%%mm0\n\t" \ + /*Store NR5 at J(5).*/ \ + "movq %%mm5,"OC_J(5)"\n\t" \ + /*Store NR7 at J(7).*/ \ + "movq %%mm7,"OC_J(7)"\n\t" \ + /*Store NR0 at I(0).*/ \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "#end OC_COLUMN_IDCT\n\t" \ + +#define OC_MID(_m,_i) OC_M2STR(_m+(_i)*8)"(%[c])" +#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) +#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) + +static void oc_idct8x8_slow(ogg_int16_t _y[64]){ + /*This routine accepts an 8x8 matrix, but in partially transposed form. + Every 4x4 block is transposed.*/ + __asm__ __volatile__( +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16)+64)"(%[y])" +#define OC_J(_k) OC_M2STR(((_k-4)*16)+72)"(%[y])" + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J + : + :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) + ); +} + +/*25 cycles.*/ +#define OC_IDCT_BEGIN_10 \ + "#OC_IDCT_BEGIN_10\n\t" \ + "movq "OC_I(3)",%%mm2\n\t" \ + "nop\n\t" \ + "movq "OC_C(3)",%%mm6\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "movq "OC_C(5)",%%mm1\n\t" \ + "pmulhw %%mm6,%%mm4\n\t" \ + "movq "OC_I(1)",%%mm3\n\t" \ + "pmulhw %%mm2,%%mm1\n\t" \ + "movq "OC_C(1)",%%mm0\n\t" \ + "paddw %%mm2,%%mm4\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "paddw %%mm1,%%mm2\n\t" \ + "movq "OC_I(2)",%%mm5\n\t" \ + "pmulhw %%mm3,%%mm0\n\t" \ + "movq %%mm5,%%mm1\n\t" \ + "paddw %%mm3,%%mm0\n\t" \ + "pmulhw "OC_C(7)",%%mm3\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "pmulhw "OC_C(2)",%%mm5\n\t" \ + "psubw %%mm4,%%mm0\n\t" \ + "movq "OC_I(2)",%%mm7\n\t" \ + "paddw %%mm4,%%mm4\n\t" \ + "paddw %%mm5,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "pmulhw "OC_C(6)",%%mm1\n\t" \ + "psubw %%mm6,%%mm3\n\t" \ + "movq %%mm4,"OC_I(1)"\n\t" \ + "paddw %%mm6,%%mm6\n\t" \ + "movq "OC_C(4)",%%mm4\n\t" \ + "paddw %%mm3,%%mm6\n\t" \ + "movq %%mm3,%%mm5\n\t" \ + "pmulhw %%mm4,%%mm3\n\t" \ + "movq %%mm6,"OC_I(2)"\n\t" \ + "movq %%mm0,%%mm2\n\t" \ + "movq "OC_I(0)",%%mm6\n\t" \ + "pmulhw %%mm4,%%mm0\n\t" \ + "paddw %%mm3,%%mm5\n\t" \ + "paddw %%mm0,%%mm2\n\t" \ + "psubw %%mm1,%%mm5\n\t" \ + "pmulhw %%mm4,%%mm6\n\t" \ + "paddw "OC_I(0)",%%mm6\n\t" \ + "paddw %%mm1,%%mm1\n\t" \ + "movq %%mm6,%%mm4\n\t" \ + "paddw %%mm5,%%mm1\n\t" \ + "psubw %%mm2,%%mm6\n\t" \ + "paddw %%mm2,%%mm2\n\t" \ + "movq "OC_I(1)",%%mm0\n\t" \ + "paddw %%mm6,%%mm2\n\t" \ + "psubw %%mm1,%%mm2\n\t" \ + "nop\n\t" \ + "#end OC_IDCT_BEGIN_10\n\t" \ + +/*25+8=33 cycles.*/ +#define OC_ROW_IDCT_10 \ + "#OC_ROW_IDCT_10\n\t" \ + OC_IDCT_BEGIN_10 \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw %%mm3,%%mm3\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + "paddw %%mm5,%%mm5\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw %%mm0,%%mm0\n\t" \ + /*Save R1.*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r0=R0=G'+C'*/ \ + "paddw %%mm7,%%mm0\n\t" \ + "#end OC_ROW_IDCT_10\n\t" \ + +/*25+19=44 cycles'*/ +#define OC_COLUMN_IDCT_10 \ + "#OC_COLUMN_IDCT_10\n\t" \ + OC_IDCT_BEGIN_10 \ + "paddw "OC_8",%%mm2\n\t" \ + /*r1=H'+H'*/ \ + "paddw %%mm1,%%mm1\n\t" \ + /*r1=R1=A''+H'*/ \ + "paddw %%mm2,%%mm1\n\t" \ + /*r2=NR2*/ \ + "psraw $4,%%mm2\n\t" \ + /*r4=E'=E-G*/ \ + "psubw %%mm7,%%mm4\n\t" \ + /*r1=NR1*/ \ + "psraw $4,%%mm1\n\t" \ + /*r3=D'*/ \ + "movq "OC_I(2)",%%mm3\n\t" \ + /*r7=G+G*/ \ + "paddw %%mm7,%%mm7\n\t" \ + /*Store NR2 at I(2).*/ \ + "movq %%mm2,"OC_I(2)"\n\t" \ + /*r7=G'=E+G*/ \ + "paddw %%mm4,%%mm7\n\t" \ + /*Store NR1 at I(1).*/ \ + "movq %%mm1,"OC_I(1)"\n\t" \ + /*r4=R4=E'-D'*/ \ + "psubw %%mm3,%%mm4\n\t" \ + "paddw "OC_8",%%mm4\n\t" \ + /*r3=D'+D'*/ \ + "paddw %%mm3,%%mm3\n\t" \ + /*r3=R3=E'+D'*/ \ + "paddw %%mm4,%%mm3\n\t" \ + /*r4=NR4*/ \ + "psraw $4,%%mm4\n\t" \ + /*r6=R6=F'-B''*/ \ + "psubw %%mm5,%%mm6\n\t" \ + /*r3=NR3*/ \ + "psraw $4,%%mm3\n\t" \ + "paddw "OC_8",%%mm6\n\t" \ + /*r5=B''+B''*/ \ + "paddw %%mm5,%%mm5\n\t" \ + /*r5=R5=F'+B''*/ \ + "paddw %%mm6,%%mm5\n\t" \ + /*r6=NR6*/ \ + "psraw $4,%%mm6\n\t" \ + /*Store NR4 at J(4).*/ \ + "movq %%mm4,"OC_J(4)"\n\t" \ + /*r5=NR5*/ \ + "psraw $4,%%mm5\n\t" \ + /*Store NR3 at I(3).*/ \ + "movq %%mm3,"OC_I(3)"\n\t" \ + /*r7=R7=G'-C'*/ \ + "psubw %%mm0,%%mm7\n\t" \ + "paddw "OC_8",%%mm7\n\t" \ + /*r0=C'+C'*/ \ + "paddw %%mm0,%%mm0\n\t" \ + /*r0=R0=G'+C'*/ \ + "paddw %%mm7,%%mm0\n\t" \ + /*r7=NR7*/ \ + "psraw $4,%%mm7\n\t" \ + /*Store NR6 at J(6).*/ \ + "movq %%mm6,"OC_J(6)"\n\t" \ + /*r0=NR0*/ \ + "psraw $4,%%mm0\n\t" \ + /*Store NR5 at J(5).*/ \ + "movq %%mm5,"OC_J(5)"\n\t" \ + /*Store NR7 at J(7).*/ \ + "movq %%mm7,"OC_J(7)"\n\t" \ + /*Store NR0 at I(0).*/ \ + "movq %%mm0,"OC_I(0)"\n\t" \ + "#end OC_COLUMN_IDCT_10\n\t" \ + +static void oc_idct8x8_10(ogg_int16_t _y[64]){ + __asm__ __volatile__( +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])" + /*Done with dequant, descramble, and partial transpose. + Now do the iDCT itself.*/ + OC_ROW_IDCT_10 + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16))"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])" +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J + : + :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS) + ); +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Then perform the iDCT.*/ + if(_last_zzi<10)oc_idct8x8_10(_y); + else oc_idct8x8_slow(_y); +} + +#endif diff --git a/thirdparty/libtheora/x86/mmxloop.h b/thirdparty/libtheora/x86/mmxloop.h new file mode 100644 index 0000000000..2e870c795d --- /dev/null +++ b/thirdparty/libtheora/x86/mmxloop.h @@ -0,0 +1,215 @@ +#if !defined(_x86_mmxloop_H) +# define _x86_mmxloop_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. + On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and + mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ +#define OC_LOOP_FILTER8_MMX \ + "#OC_LOOP_FILTER8_MMX\n\t" \ + /*mm7=0*/ \ + "pxor %%mm7,%%mm7\n\t" \ + /*mm6:mm0={a0,...,a7}*/ \ + "movq %%mm0,%%mm6\n\t" \ + "punpcklbw %%mm7,%%mm0\n\t" \ + "punpckhbw %%mm7,%%mm6\n\t" \ + /*mm3:mm5={d0,...,d7}*/ \ + "movq %%mm3,%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm3\n\t" \ + "punpckhbw %%mm7,%%mm5\n\t" \ + /*mm6:mm0={a0-d0,...,a7-d7}*/ \ + "psubw %%mm3,%%mm0\n\t" \ + "psubw %%mm5,%%mm6\n\t" \ + /*mm3:mm1={b0,...,b7}*/ \ + "movq %%mm1,%%mm3\n\t" \ + "punpcklbw %%mm7,%%mm1\n\t" \ + "movq %%mm2,%%mm4\n\t" \ + "punpckhbw %%mm7,%%mm3\n\t" \ + /*mm5:mm4={c0,...,c7}*/ \ + "movq %%mm2,%%mm5\n\t" \ + "punpcklbw %%mm7,%%mm4\n\t" \ + "punpckhbw %%mm7,%%mm5\n\t" \ + /*mm7={3}x4 \ + mm5:mm4={c0-b0,...,c7-b7}*/ \ + "pcmpeqw %%mm7,%%mm7\n\t" \ + "psubw %%mm1,%%mm4\n\t" \ + "psrlw $14,%%mm7\n\t" \ + "psubw %%mm3,%%mm5\n\t" \ + /*Scale by 3.*/ \ + "pmullw %%mm7,%%mm4\n\t" \ + "pmullw %%mm7,%%mm5\n\t" \ + /*mm7={4}x4 \ + mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ + "psrlw $1,%%mm7\n\t" \ + "paddw %%mm0,%%mm4\n\t" \ + "psllw $2,%%mm7\n\t" \ + "movq (%[ll]),%%mm0\n\t" \ + "paddw %%mm6,%%mm5\n\t" \ + /*R_i has the range [-127,128], so we compute -R_i instead. \ + mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ + "psubw %%mm7,%%mm4\n\t" \ + "psubw %%mm7,%%mm5\n\t" \ + "psraw $3,%%mm4\n\t" \ + "psraw $3,%%mm5\n\t" \ + "pcmpeqb %%mm7,%%mm7\n\t" \ + "packsswb %%mm5,%%mm4\n\t" \ + "pxor %%mm6,%%mm6\n\t" \ + "pxor %%mm7,%%mm4\n\t" \ + "packuswb %%mm3,%%mm1\n\t" \ + /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ + /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ + we have to split things by sign (the other option is to work in 16 bits, \ + but working in 8 bits gives much better parallelism). \ + We compute abs(R_i), but save a mask of which terms were negative in mm6. \ + Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ + Finally, we split mm4 into positive and negative pieces using the mask in \ + mm6, and add and subtract them as appropriate.*/ \ + /*mm4=abs(-R_i)*/ \ + /*mm7=255-2*L*/ \ + "pcmpgtb %%mm4,%%mm6\n\t" \ + "psubb %%mm0,%%mm7\n\t" \ + "pxor %%mm6,%%mm4\n\t" \ + "psubb %%mm0,%%mm7\n\t" \ + "psubb %%mm6,%%mm4\n\t" \ + /*mm7=255-max(2*L-abs(R_i),0)*/ \ + "paddusb %%mm4,%%mm7\n\t" \ + /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ + "paddusb %%mm7,%%mm4\n\t" \ + "psubusb %%mm7,%%mm4\n\t" \ + /*Now split mm4 by the original sign of -R_i.*/ \ + "movq %%mm4,%%mm5\n\t" \ + "pand %%mm6,%%mm4\n\t" \ + "pandn %%mm5,%%mm6\n\t" \ + /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ + /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ + "paddusb %%mm4,%%mm1\n\t" \ + "psubusb %%mm4,%%mm2\n\t" \ + "psubusb %%mm6,%%mm1\n\t" \ + "paddusb %%mm6,%%mm2\n\t" \ + +#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ + do{ \ + ptrdiff_t ystride3__; \ + __asm__ __volatile__( \ + /*mm0={a0,...,a7}*/ \ + "movq (%[pix]),%%mm0\n\t" \ + /*ystride3=_ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*mm3={d0,...,d7}*/ \ + "movq (%[pix],%[ystride3]),%%mm3\n\t" \ + /*mm1={b0,...,b7}*/ \ + "movq (%[pix],%[ystride]),%%mm1\n\t" \ + /*mm2={c0,...,c7}*/ \ + "movq (%[pix],%[ystride],2),%%mm2\n\t" \ + OC_LOOP_FILTER8_MMX \ + /*Write it back out.*/ \ + "movq %%mm1,(%[pix],%[ystride])\n\t" \ + "movq %%mm2,(%[pix],%[ystride],2)\n\t" \ + :[ystride3]"=&r"(ystride3__) \ + :[pix]"r"(_pix-_ystride*2),[ystride]"r"((ptrdiff_t)(_ystride)), \ + [ll]"r"(_ll) \ + :"memory" \ + ); \ + } \ + while(0) + +#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ + do{ \ + unsigned char *pix__; \ + ptrdiff_t ystride3__; \ + ptrdiff_t d__; \ + pix__=(_pix)-2; \ + __asm__ __volatile__( \ + /*x x x x d0 c0 b0 a0*/ \ + "movd (%[pix]),%%mm0\n\t" \ + /*x x x x d1 c1 b1 a1*/ \ + "movd (%[pix],%[ystride]),%%mm1\n\t" \ + /*ystride3=_ystride*3*/ \ + "lea (%[ystride],%[ystride],2),%[ystride3]\n\t" \ + /*x x x x d2 c2 b2 a2*/ \ + "movd (%[pix],%[ystride],2),%%mm2\n\t" \ + /*x x x x d3 c3 b3 a3*/ \ + "lea (%[pix],%[ystride],4),%[d]\n\t" \ + "movd (%[pix],%[ystride3]),%%mm3\n\t" \ + /*x x x x d4 c4 b4 a4*/ \ + "movd (%[d]),%%mm4\n\t" \ + /*x x x x d5 c5 b5 a5*/ \ + "movd (%[d],%[ystride]),%%mm5\n\t" \ + /*x x x x d6 c6 b6 a6*/ \ + "movd (%[d],%[ystride],2),%%mm6\n\t" \ + /*x x x x d7 c7 b7 a7*/ \ + "movd (%[d],%[ystride3]),%%mm7\n\t" \ + /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + "punpcklbw %%mm1,%%mm0\n\t" \ + /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ + "punpcklbw %%mm3,%%mm2\n\t" \ + /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + "movq %%mm0,%%mm3\n\t" \ + /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + "punpcklwd %%mm2,%%mm0\n\t" \ + /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + "punpckhwd %%mm2,%%mm3\n\t" \ + /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + "movq %%mm0,%%mm1\n\t" \ + /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + "punpcklbw %%mm5,%%mm4\n\t" \ + /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ + "punpcklbw %%mm7,%%mm6\n\t" \ + /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + "movq %%mm4,%%mm5\n\t" \ + /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ + "punpcklwd %%mm6,%%mm4\n\t" \ + /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ + "punpckhwd %%mm6,%%mm5\n\t" \ + /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + "movq %%mm3,%%mm2\n\t" \ + /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ + "punpckldq %%mm4,%%mm0\n\t" \ + /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ + "punpckhdq %%mm4,%%mm1\n\t" \ + /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ + "punpckldq %%mm5,%%mm2\n\t" \ + /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ + "punpckhdq %%mm5,%%mm3\n\t" \ + OC_LOOP_FILTER8_MMX \ + /*mm2={b0+R_0'',...,b7+R_7''}*/ \ + "movq %%mm1,%%mm0\n\t" \ + /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ + "punpcklbw %%mm2,%%mm1\n\t" \ + /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ + "punpckhbw %%mm2,%%mm0\n\t" \ + /*[d]=c1 b1 c0 b0*/ \ + "movd %%mm1,%[d]\n\t" \ + "movw %w[d],1(%[pix])\n\t" \ + "psrlq $32,%%mm1\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride])\n\t" \ + /*[d]=c3 b3 c2 b2*/ \ + "movd %%mm1,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride3])\n\t" \ + "lea (%[pix],%[ystride],4),%[pix]\n\t" \ + /*[d]=c5 b5 c4 b4*/ \ + "movd %%mm0,%[d]\n\t" \ + "movw %w[d],1(%[pix])\n\t" \ + "psrlq $32,%%mm0\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride])\n\t" \ + /*[d]=c7 b7 c6 b6*/ \ + "movd %%mm0,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride],2)\n\t" \ + "shr $16,%[d]\n\t" \ + "movw %w[d],1(%[pix],%[ystride3])\n\t" \ + :[pix]"+r"(pix__),[ystride3]"=&r"(ystride3__),[d]"=&r"(d__) \ + :[ystride]"r"((ptrdiff_t)(_ystride)),[ll]"r"(_ll) \ + :"memory" \ + ); \ + } \ + while(0) + +# endif +#endif diff --git a/thirdparty/libtheora/x86/mmxstate.c b/thirdparty/libtheora/x86/mmxstate.c new file mode 100644 index 0000000000..808b0a789b --- /dev/null +++ b/thirdparty/libtheora/x86/mmxstate.c @@ -0,0 +1,188 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" +#include "mmxloop.h" + +#if defined(OC_X86_ASM) + +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + /*Note that this value must be unsigned, to keep the __asm__ block from + sign-extending it when it puts it in a register.*/ + ogg_uint16_t p; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*Fill _dct_coeffs with p.*/ + __asm__ __volatile__( + /*mm0=0000 0000 0000 AAAA*/ + "movd %[p],%%mm0\n\t" + /*mm0=0000 0000 AAAA AAAA*/ + "punpcklwd %%mm0,%%mm0\n\t" + /*mm0=AAAA AAAA AAAA AAAA*/ + "punpckldq %%mm0,%%mm0\n\t" + "movq %%mm0,(%[y])\n\t" + "movq %%mm0,8(%[y])\n\t" + "movq %%mm0,16(%[y])\n\t" + "movq %%mm0,24(%[y])\n\t" + "movq %%mm0,32(%[y])\n\t" + "movq %%mm0,40(%[y])\n\t" + "movq %%mm0,48(%[y])\n\t" + "movq %%mm0,56(%[y])\n\t" + "movq %%mm0,64(%[y])\n\t" + "movq %%mm0,72(%[y])\n\t" + "movq %%mm0,80(%[y])\n\t" + "movq %%mm0,88(%[y])\n\t" + "movq %%mm0,96(%[y])\n\t" + "movq %%mm0,104(%[y])\n\t" + "movq %%mm0,112(%[y])\n\t" + "movq %%mm0,120(%[y])\n\t" + : + :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p) + :"memory" + ); + } + else{ + /*Dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, + _dct_coeffs); + } + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*We copy these entire function to inline the actual MMX routines so that we + use only a single indirect call.*/ + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; + OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); + } +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ + OC_ALIGN8(unsigned char ll[8]); + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi+1 +#include "x86enc.h" + +#if defined(OC_X86_64_ASM) + +# define OC_FDCT8x8 \ + /*Note: xmm15={0}x8 and xmm14={-1}x8.*/ \ + "#OC_FDCT8x8\n\t" \ + /*Stage 1:*/ \ + "movdqa %%xmm0,%%xmm11\n\t" \ + "movdqa %%xmm1,%%xmm10\n\t" \ + "movdqa %%xmm2,%%xmm9\n\t" \ + "movdqa %%xmm3,%%xmm8\n\t" \ + /*xmm11=t7'=t0-t7*/ \ + "psubw %%xmm7,%%xmm11\n\t" \ + /*xmm10=t6'=t1-t6*/ \ + "psubw %%xmm6,%%xmm10\n\t" \ + /*xmm9=t5'=t2-t5*/ \ + "psubw %%xmm5,%%xmm9\n\t" \ + /*xmm8=t4'=t3-t4*/ \ + "psubw %%xmm4,%%xmm8\n\t" \ + /*xmm0=t0'=t0+t7*/ \ + "paddw %%xmm7,%%xmm0\n\t" \ + /*xmm1=t1'=t1+t6*/ \ + "paddw %%xmm6,%%xmm1\n\t" \ + /*xmm5=t2'=t2+t5*/ \ + "paddw %%xmm2,%%xmm5\n\t" \ + /*xmm4=t3'=t3+t4*/ \ + "paddw %%xmm3,%%xmm4\n\t" \ + /*xmm2,3,6,7 are now free.*/ \ + /*Stage 2:*/ \ + "movdqa %%xmm0,%%xmm3\n\t" \ + "mov $0x5A806A0A,%[a]\n\t" \ + "movdqa %%xmm1,%%xmm2\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "movdqa %%xmm10,%%xmm6\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + /*xmm2=t2''=t1'-t2'*/ \ + "psubw %%xmm5,%%xmm2\n\t" \ + "pxor %%xmm12,%%xmm12\n\t" \ + /*xmm3=t3''=t0'-t3'*/ \ + "psubw %%xmm4,%%xmm3\n\t" \ + "psubw %%xmm14,%%xmm12\n\t" \ + /*xmm10=t5''=t6'-t5'*/ \ + "psubw %%xmm9,%%xmm10\n\t" \ + "paddw %%xmm12,%%xmm12\n\t" \ + /*xmm4=t0''=t0'+t3'*/ \ + "paddw %%xmm0,%%xmm4\n\t" \ + /*xmm1=t1''=t1'+t2'*/ \ + "paddw %%xmm5,%%xmm1\n\t" \ + /*xmm6=t6''=t6'+t5'*/ \ + "paddw %%xmm9,%%xmm6\n\t" \ + /*xmm0,xmm5,xmm9 are now free.*/ \ + /*Stage 3:*/ \ + /*xmm10:xmm5=t5''*27146+0xB500 \ + xmm0=t5''*/ \ + "movdqa %%xmm10,%%xmm5\n\t" \ + "movdqa %%xmm10,%%xmm0\n\t" \ + "punpckhwd %%xmm12,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "punpcklwd %%xmm12,%%xmm5\n\t" \ + "pmaddwd %%xmm13,%%xmm5\n\t" \ + /*xmm5=(t5''*27146+0xB500>>16)+t5''*/ \ + "psrad $16,%%xmm10\n\t" \ + "psrad $16,%%xmm5\n\t" \ + "packssdw %%xmm10,%%xmm5\n\t" \ + "paddw %%xmm0,%%xmm5\n\t" \ + /*xmm0=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ + "pcmpeqw %%xmm15,%%xmm0\n\t" \ + "psubw %%xmm14,%%xmm0\n\t" \ + "paddw %%xmm5,%%xmm0\n\t" \ + "movdqa %%xmm8,%%xmm5\n\t" \ + "psraw $1,%%xmm0\n\t" \ + /*xmm5=t5'''=t4'-s*/ \ + "psubw %%xmm0,%%xmm5\n\t" \ + /*xmm8=t4''=t4'+s*/ \ + "paddw %%xmm0,%%xmm8\n\t" \ + /*xmm0,xmm7,xmm9,xmm10 are free.*/ \ + /*xmm7:xmm9=t6''*27146+0xB500*/ \ + "movdqa %%xmm6,%%xmm7\n\t" \ + "movdqa %%xmm6,%%xmm9\n\t" \ + "punpckhwd %%xmm12,%%xmm7\n\t" \ + "pmaddwd %%xmm13,%%xmm7\n\t" \ + "punpcklwd %%xmm12,%%xmm9\n\t" \ + "pmaddwd %%xmm13,%%xmm9\n\t" \ + /*xmm9=(t6''*27146+0xB500>>16)+t6''*/ \ + "psrad $16,%%xmm7\n\t" \ + "psrad $16,%%xmm9\n\t" \ + "packssdw %%xmm7,%%xmm9\n\t" \ + "paddw %%xmm6,%%xmm9\n\t" \ + /*xmm9=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ + "pcmpeqw %%xmm15,%%xmm6\n\t" \ + "psubw %%xmm14,%%xmm6\n\t" \ + "paddw %%xmm6,%%xmm9\n\t" \ + "movdqa %%xmm11,%%xmm7\n\t" \ + "psraw $1,%%xmm9\n\t" \ + /*xmm7=t6'''=t7'-s*/ \ + "psubw %%xmm9,%%xmm7\n\t" \ + /*xmm9=t7''=t7'+s*/ \ + "paddw %%xmm11,%%xmm9\n\t" \ + /*xmm0,xmm6,xmm10,xmm11 are free.*/ \ + /*Stage 4:*/ \ + /*xmm10:xmm0=t1''*27146+0xB500*/ \ + "movdqa %%xmm1,%%xmm0\n\t" \ + "movdqa %%xmm1,%%xmm10\n\t" \ + "punpcklwd %%xmm12,%%xmm0\n\t" \ + "pmaddwd %%xmm13,%%xmm0\n\t" \ + "punpckhwd %%xmm12,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + /*xmm0=(t1''*27146+0xB500>>16)+t1''*/ \ + "psrad $16,%%xmm0\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "mov $0x20006A0A,%[a]\n\t" \ + "packssdw %%xmm10,%%xmm0\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddw %%xmm1,%%xmm0\n\t" \ + /*xmm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ + "pcmpeqw %%xmm15,%%xmm1\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "psubw %%xmm14,%%xmm1\n\t" \ + "paddw %%xmm1,%%xmm0\n\t" \ + /*xmm10:xmm4=t0''*27146+0x4000*/ \ + "movdqa %%xmm4,%%xmm1\n\t" \ + "movdqa %%xmm4,%%xmm10\n\t" \ + "punpcklwd %%xmm12,%%xmm4\n\t" \ + "pmaddwd %%xmm13,%%xmm4\n\t" \ + "punpckhwd %%xmm12,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + /*xmm4=(t0''*27146+0x4000>>16)+t0''*/ \ + "psrad $16,%%xmm4\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "mov $0x6CB7,%[a]\n\t" \ + "packssdw %%xmm10,%%xmm4\n\t" \ + "movd %[a],%%xmm12\n\t" \ + "paddw %%xmm1,%%xmm4\n\t" \ + /*xmm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ + "pcmpeqw %%xmm15,%%xmm1\n\t" \ + "pshufd $00,%%xmm12,%%xmm12\n\t" \ + "psubw %%xmm14,%%xmm1\n\t" \ + "mov $0x7FFF6C84,%[a]\n\t" \ + "paddw %%xmm1,%%xmm4\n\t" \ + /*xmm0=_y[0]=u=r+s>>1 \ + The naive implementation could cause overflow, so we use \ + u=(r&s)+((r^s)>>1).*/ \ + "movdqa %%xmm0,%%xmm6\n\t" \ + "pxor %%xmm4,%%xmm0\n\t" \ + "pand %%xmm4,%%xmm6\n\t" \ + "psraw $1,%%xmm0\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddw %%xmm6,%%xmm0\n\t" \ + /*xmm4=_y[4]=v=r-u*/ \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "psubw %%xmm0,%%xmm4\n\t" \ + /*xmm1,xmm6,xmm10,xmm11 are free.*/ \ + /*xmm6:xmm10=60547*t3''+0x6CB7*/ \ + "movdqa %%xmm3,%%xmm10\n\t" \ + "movdqa %%xmm3,%%xmm6\n\t" \ + "punpcklwd %%xmm3,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "mov $0x61F861F8,%[a]\n\t" \ + "punpckhwd %%xmm3,%%xmm6\n\t" \ + "pmaddwd %%xmm13,%%xmm6\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm6\n\t" \ + /*xmm1:xmm2=25080*t2'' \ + xmm12=t2''*/ \ + "movdqa %%xmm2,%%xmm11\n\t" \ + "movdqa %%xmm2,%%xmm12\n\t" \ + "pmullw %%xmm13,%%xmm2\n\t" \ + "pmulhw %%xmm13,%%xmm11\n\t" \ + "movdqa %%xmm2,%%xmm1\n\t" \ + "punpcklwd %%xmm11,%%xmm2\n\t" \ + "punpckhwd %%xmm11,%%xmm1\n\t" \ + /*xmm10=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ + "paddd %%xmm2,%%xmm10\n\t" \ + "paddd %%xmm1,%%xmm6\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "pcmpeqw %%xmm15,%%xmm3\n\t" \ + "psrad $16,%%xmm6\n\t" \ + "psubw %%xmm14,%%xmm3\n\t" \ + "packssdw %%xmm6,%%xmm10\n\t" \ + "paddw %%xmm3,%%xmm10\n\t" \ + /*xmm2=_y[2]=u \ + xmm10=s=(25080*u>>16)-t2''*/ \ + "movdqa %%xmm10,%%xmm2\n\t" \ + "pmulhw %%xmm13,%%xmm10\n\t" \ + "psubw %%xmm12,%%xmm10\n\t" \ + /*xmm1:xmm6=s*21600+0x2800*/ \ + "pxor %%xmm12,%%xmm12\n\t" \ + "psubw %%xmm14,%%xmm12\n\t" \ + "mov $0x28005460,%[a]\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "movdqa %%xmm10,%%xmm6\n\t" \ + "movdqa %%xmm10,%%xmm1\n\t" \ + "punpcklwd %%xmm12,%%xmm6\n\t" \ + "pmaddwd %%xmm13,%%xmm6\n\t" \ + "mov $0x0E3D,%[a]\n\t" \ + "punpckhwd %%xmm12,%%xmm1\n\t" \ + "pmaddwd %%xmm13,%%xmm1\n\t" \ + /*xmm6=(s*21600+0x2800>>18)+s*/ \ + "psrad $18,%%xmm6\n\t" \ + "psrad $18,%%xmm1\n\t" \ + "movd %[a],%%xmm12\n\t" \ + "packssdw %%xmm1,%%xmm6\n\t" \ + "pshufd $00,%%xmm12,%%xmm12\n\t" \ + "paddw %%xmm10,%%xmm6\n\t" \ + /*xmm6=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ + "mov $0x7FFF54DC,%[a]\n\t" \ + "pcmpeqw %%xmm15,%%xmm10\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "psubw %%xmm14,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddw %%xmm10,%%xmm6\n\t " \ + /*xmm1,xmm3,xmm10,xmm11 are free.*/ \ + /*xmm11:xmm10=54491*t5'''+0x0E3D*/ \ + "movdqa %%xmm5,%%xmm10\n\t" \ + "movdqa %%xmm5,%%xmm11\n\t" \ + "punpcklwd %%xmm5,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "mov $0x8E3A8E3A,%[a]\n\t" \ + "punpckhwd %%xmm5,%%xmm11\n\t" \ + "pmaddwd %%xmm13,%%xmm11\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm11\n\t" \ + /*xmm7:xmm12=36410*t6''' \ + xmm1=t6'''*/ \ + "movdqa %%xmm7,%%xmm3\n\t" \ + "movdqa %%xmm7,%%xmm1\n\t" \ + "pmulhw %%xmm13,%%xmm3\n\t" \ + "pmullw %%xmm13,%%xmm7\n\t" \ + "paddw %%xmm1,%%xmm3\n\t" \ + "movdqa %%xmm7,%%xmm12\n\t" \ + "punpckhwd %%xmm3,%%xmm7\n\t" \ + "punpcklwd %%xmm3,%%xmm12\n\t" \ + /*xmm10=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ + "paddd %%xmm12,%%xmm10\n\t" \ + "paddd %%xmm7,%%xmm11\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "pcmpeqw %%xmm15,%%xmm5\n\t" \ + "psrad $16,%%xmm11\n\t" \ + "psubw %%xmm14,%%xmm5\n\t" \ + "packssdw %%xmm11,%%xmm10\n\t" \ + "pxor %%xmm12,%%xmm12\n\t" \ + "paddw %%xmm5,%%xmm10\n\t" \ + /*xmm5=_y[5]=u \ + xmm1=s=t6'''-(36410*u>>16)*/ \ + "psubw %%xmm14,%%xmm12\n\t" \ + "movdqa %%xmm10,%%xmm5\n\t" \ + "mov $0x340067C8,%[a]\n\t" \ + "pmulhw %%xmm13,%%xmm10\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddw %%xmm5,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "psubw %%xmm10,%%xmm1\n\t" \ + /*xmm11:xmm3=s*26568+0x3400*/ \ + "movdqa %%xmm1,%%xmm3\n\t" \ + "movdqa %%xmm1,%%xmm11\n\t" \ + "punpcklwd %%xmm12,%%xmm3\n\t" \ + "pmaddwd %%xmm13,%%xmm3\n\t" \ + "mov $0x7B1B,%[a]\n\t" \ + "punpckhwd %%xmm12,%%xmm11\n\t" \ + "pmaddwd %%xmm13,%%xmm11\n\t" \ + /*xmm3=(s*26568+0x3400>>17)+s*/ \ + "psrad $17,%%xmm3\n\t" \ + "psrad $17,%%xmm11\n\t" \ + "movd %[a],%%xmm12\n\t" \ + "packssdw %%xmm11,%%xmm3\n\t" \ + "pshufd $00,%%xmm12,%%xmm12\n\t" \ + "paddw %%xmm1,%%xmm3\n\t" \ + /*xmm3=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ + "mov $0x7FFF7B16,%[a]\n\t" \ + "pcmpeqw %%xmm15,%%xmm1\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "psubw %%xmm14,%%xmm1\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddw %%xmm1,%%xmm3\n\t " \ + /*xmm1,xmm7,xmm10,xmm11 are free.*/ \ + /*xmm11:xmm10=64277*t7''+0x7B1B*/ \ + "movdqa %%xmm9,%%xmm10\n\t" \ + "movdqa %%xmm9,%%xmm11\n\t" \ + "punpcklwd %%xmm9,%%xmm10\n\t" \ + "pmaddwd %%xmm13,%%xmm10\n\t" \ + "mov $0x31F131F1,%[a]\n\t" \ + "punpckhwd %%xmm9,%%xmm11\n\t" \ + "pmaddwd %%xmm13,%%xmm11\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + "paddd %%xmm12,%%xmm11\n\t" \ + /*xmm12:xmm7=12785*t4''*/ \ + "movdqa %%xmm8,%%xmm7\n\t" \ + "movdqa %%xmm8,%%xmm1\n\t" \ + "pmullw %%xmm13,%%xmm7\n\t" \ + "pmulhw %%xmm13,%%xmm1\n\t" \ + "movdqa %%xmm7,%%xmm12\n\t" \ + "punpcklwd %%xmm1,%%xmm7\n\t" \ + "punpckhwd %%xmm1,%%xmm12\n\t" \ + /*xmm10=u=(12785*t4''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ + "paddd %%xmm7,%%xmm10\n\t" \ + "paddd %%xmm12,%%xmm11\n\t" \ + "psrad $16,%%xmm10\n\t" \ + "pcmpeqw %%xmm15,%%xmm9\n\t" \ + "psrad $16,%%xmm11\n\t" \ + "psubw %%xmm14,%%xmm9\n\t" \ + "packssdw %%xmm11,%%xmm10\n\t" \ + "pxor %%xmm12,%%xmm12\n\t" \ + "paddw %%xmm9,%%xmm10\n\t" \ + /*xmm1=_y[1]=u \ + xmm10=s=(12785*u>>16)-t4''*/ \ + "psubw %%xmm14,%%xmm12\n\t" \ + "movdqa %%xmm10,%%xmm1\n\t" \ + "mov $0x3000503B,%[a]\n\t" \ + "pmulhw %%xmm13,%%xmm10\n\t" \ + "movd %[a],%%xmm13\n\t" \ + "psubw %%xmm8,%%xmm10\n\t" \ + "pshufd $00,%%xmm13,%%xmm13\n\t" \ + /*xmm8:xmm7=s*20539+0x3000*/ \ + "movdqa %%xmm10,%%xmm7\n\t" \ + "movdqa %%xmm10,%%xmm8\n\t" \ + "punpcklwd %%xmm12,%%xmm7\n\t" \ + "pmaddwd %%xmm13,%%xmm7\n\t" \ + "punpckhwd %%xmm12,%%xmm8\n\t" \ + "pmaddwd %%xmm13,%%xmm8\n\t" \ + /*xmm7=(s*20539+0x3000>>20)+s*/ \ + "psrad $20,%%xmm7\n\t" \ + "psrad $20,%%xmm8\n\t" \ + "packssdw %%xmm8,%%xmm7\n\t" \ + "paddw %%xmm10,%%xmm7\n\t" \ + /*xmm7=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ + "pcmpeqw %%xmm15,%%xmm10\n\t" \ + "psubw %%xmm14,%%xmm10\n\t" \ + "paddw %%xmm10,%%xmm7\n\t " \ + +# define OC_TRANSPOSE8x8 \ + "#OC_TRANSPOSE8x8\n\t" \ + "movdqa %%xmm4,%%xmm8\n\t" \ + /*xmm4 = f3 e3 f2 e2 f1 e1 f0 e0*/ \ + "punpcklwd %%xmm5,%%xmm4\n\t" \ + /*xmm8 = f7 e7 f6 e6 f5 e5 f4 e4*/ \ + "punpckhwd %%xmm5,%%xmm8\n\t" \ + /*xmm5 is free.*/ \ + "movdqa %%xmm0,%%xmm5\n\t" \ + /*xmm0 = b3 a3 b2 a2 b1 a1 b0 a0*/ \ + "punpcklwd %%xmm1,%%xmm0\n\t" \ + /*xmm5 = b7 a7 b6 a6 b5 a5 b4 a4*/ \ + "punpckhwd %%xmm1,%%xmm5\n\t" \ + /*xmm1 is free.*/ \ + "movdqa %%xmm6,%%xmm1\n\t" \ + /*xmm6 = h3 g3 h2 g2 h1 g1 h0 g0*/ \ + "punpcklwd %%xmm7,%%xmm6\n\t" \ + /*xmm1 = h7 g7 h6 g6 h5 g5 h4 g4*/ \ + "punpckhwd %%xmm7,%%xmm1\n\t" \ + /*xmm7 is free.*/ \ + "movdqa %%xmm2,%%xmm7\n\t" \ + /*xmm7 = d3 c3 d2 c2 d1 c1 d0 c0*/ \ + "punpcklwd %%xmm3,%%xmm7\n\t" \ + /*xmm2 = d7 c7 d6 c6 d5 c5 d4 c4*/ \ + "punpckhwd %%xmm3,%%xmm2\n\t" \ + /*xmm3 is free.*/ \ + "movdqa %%xmm0,%%xmm3\n\t" \ + /*xmm0 = d1 c1 b1 a1 d0 c0 b0 a0*/ \ + "punpckldq %%xmm7,%%xmm0\n\t" \ + /*xmm3 = d3 c3 b3 a3 d2 c2 b2 a2*/ \ + "punpckhdq %%xmm7,%%xmm3\n\t" \ + /*xmm7 is free.*/ \ + "movdqa %%xmm5,%%xmm7\n\t" \ + /*xmm5 = d5 c5 b5 a5 d4 c4 b4 a4*/ \ + "punpckldq %%xmm2,%%xmm5\n\t" \ + /*xmm7 = d7 c7 b7 a7 d6 c6 b6 a6*/ \ + "punpckhdq %%xmm2,%%xmm7\n\t" \ + /*xmm2 is free.*/ \ + "movdqa %%xmm4,%%xmm2\n\t" \ + /*xmm2 = h1 g1 f1 e1 h0 g0 f0 e0*/ \ + "punpckldq %%xmm6,%%xmm2\n\t" \ + /*xmm4 = h3 g3 f3 e3 h2 g2 f2 e2*/ \ + "punpckhdq %%xmm6,%%xmm4\n\t" \ + /*xmm6 is free.*/ \ + "movdqa %%xmm8,%%xmm6\n\t" \ + /*xmm6 = h5 g5 f5 e5 h4 g4 f4 e4*/ \ + "punpckldq %%xmm1,%%xmm6\n\t" \ + /*xmm8 = h7 g7 f7 e7 h6 g6 f6 e6*/ \ + "punpckhdq %%xmm1,%%xmm8\n\t" \ + /*xmm1 is free.*/ \ + "movdqa %%xmm0,%%xmm1\n\t" \ + /*xmm0 = h0 g0 f0 e0 d0 c0 b0 a0*/ \ + "punpcklqdq %%xmm2,%%xmm0\n\t" \ + /*xmm1 = h1 g1 f1 e1 d1 c1 b1 a1*/ \ + "punpckhqdq %%xmm2,%%xmm1\n\t" \ + /*xmm2 is free.*/ \ + "movdqa %%xmm3,%%xmm2\n\t" \ + /*xmm2 = h2 g2 f2 e2 d2 c2 b2 a2*/ \ + "punpcklqdq %%xmm4,%%xmm2\n\t" \ + /*xmm3 = h3 g3 f3 e3 d3 c3 b3 a3*/ \ + "punpckhqdq %%xmm4,%%xmm3\n\t" \ + /*xmm4 is free.*/ \ + "movdqa %%xmm5,%%xmm4\n\t" \ + /*xmm4 = h4 g4 f4 e4 d4 c4 b4 a4*/ \ + "punpcklqdq %%xmm6,%%xmm4\n\t" \ + /*xmm5 = h5 g5 f5 e5 d5 c5 b5 a5*/ \ + "punpckhqdq %%xmm6,%%xmm5\n\t" \ + /*xmm6 is free.*/ \ + "movdqa %%xmm7,%%xmm6\n\t" \ + /*xmm6 = h6 g6 f6 e6 d6 c6 b6 a6*/ \ + "punpcklqdq %%xmm8,%%xmm6\n\t" \ + /*xmm7 = h7 g7 f7 e7 d7 c7 b7 a7*/ \ + "punpckhqdq %%xmm8,%%xmm7\n\t" \ + /*xmm8 is free.*/ \ + +/*SSE2 implementation of the fDCT for x86-64 only. + Because of the 8 extra XMM registers on x86-64, this version can operate + without any temporary stack access at all.*/ +void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ptrdiff_t a; + __asm__ __volatile__( + /*Load the input.*/ + "movdqa 0x00(%[x]),%%xmm0\n\t" + "movdqa 0x10(%[x]),%%xmm1\n\t" + "movdqa 0x20(%[x]),%%xmm2\n\t" + "movdqa 0x30(%[x]),%%xmm3\n\t" + "movdqa 0x40(%[x]),%%xmm4\n\t" + "movdqa 0x50(%[x]),%%xmm5\n\t" + "movdqa 0x60(%[x]),%%xmm6\n\t" + "movdqa 0x70(%[x]),%%xmm7\n\t" + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + /*We also add a few biases to correct for some systematic error that + remains in the full fDCT->iDCT round trip.*/ + /*xmm15={0}x8*/ + "pxor %%xmm15,%%xmm15\n\t" + /*xmm14={-1}x8*/ + "pcmpeqb %%xmm14,%%xmm14\n\t" + "psllw $2,%%xmm0\n\t" + /*xmm8=xmm0*/ + "movdqa %%xmm0,%%xmm8\n\t" + "psllw $2,%%xmm1\n\t" + /*xmm8={_x[7...0]==0}*/ + "pcmpeqw %%xmm15,%%xmm8\n\t" + "psllw $2,%%xmm2\n\t" + /*xmm8={_x[7...0]!=0}*/ + "psubw %%xmm14,%%xmm8\n\t" + "psllw $2,%%xmm3\n\t" + /*%[a]=1*/ + "mov $1,%[a]\n\t" + /*xmm8={_x[6]!=0,0,_x[4]!=0,0,_x[2]!=0,0,_x[0]!=0,0}*/ + "pslld $16,%%xmm8\n\t" + "psllw $2,%%xmm4\n\t" + /*xmm9={0,0,0,0,0,0,0,1}*/ + "movd %[a],%%xmm9\n\t" + /*xmm8={0,0,_x[2]!=0,0,_x[0]!=0,0}*/ + "pshufhw $0x00,%%xmm8,%%xmm8\n\t" + "psllw $2,%%xmm5\n\t" + /*%[a]={1}x2*/ + "mov $0x10001,%[a]\n\t" + /*xmm8={0,0,0,0,0,0,0,_x[0]!=0}*/ + "pshuflw $0x01,%%xmm8,%%xmm8\n\t" + "psllw $2,%%xmm6\n\t" + /*xmm10={0,0,0,0,0,0,1,1}*/ + "movd %[a],%%xmm10\n\t" + /*xmm0=_x[7...0]+{0,0,0,0,0,0,0,_x[0]!=0}*/ + "paddw %%xmm8,%%xmm0\n\t" + "psllw $2,%%xmm7\n\t" + /*xmm0=_x[7...0]+{0,0,0,0,0,0,1,(_x[0]!=0)+1}*/ + "paddw %%xmm10,%%xmm0\n\t" + /*xmm1=_x[15...8]-{0,0,0,0,0,0,0,1}*/ + "psubw %%xmm9,%%xmm1\n\t" + /*Transform columns.*/ + OC_FDCT8x8 + /*Transform rows.*/ + OC_TRANSPOSE8x8 + OC_FDCT8x8 + /*TODO: zig-zag ordering?*/ + OC_TRANSPOSE8x8 + /*xmm14={-2,-2,-2,-2,-2,-2,-2,-2}*/ + "paddw %%xmm14,%%xmm14\n\t" + "psubw %%xmm14,%%xmm0\n\t" + "psubw %%xmm14,%%xmm1\n\t" + "psraw $2,%%xmm0\n\t" + "psubw %%xmm14,%%xmm2\n\t" + "psraw $2,%%xmm1\n\t" + "psubw %%xmm14,%%xmm3\n\t" + "psraw $2,%%xmm2\n\t" + "psubw %%xmm14,%%xmm4\n\t" + "psraw $2,%%xmm3\n\t" + "psubw %%xmm14,%%xmm5\n\t" + "psraw $2,%%xmm4\n\t" + "psubw %%xmm14,%%xmm6\n\t" + "psraw $2,%%xmm5\n\t" + "psubw %%xmm14,%%xmm7\n\t" + "psraw $2,%%xmm6\n\t" + "psraw $2,%%xmm7\n\t" + /*Store the result.*/ + "movdqa %%xmm0,0x00(%[y])\n\t" + "movdqa %%xmm1,0x10(%[y])\n\t" + "movdqa %%xmm2,0x20(%[y])\n\t" + "movdqa %%xmm3,0x30(%[y])\n\t" + "movdqa %%xmm4,0x40(%[y])\n\t" + "movdqa %%xmm5,0x50(%[y])\n\t" + "movdqa %%xmm6,0x60(%[y])\n\t" + "movdqa %%xmm7,0x70(%[y])\n\t" + :[a]"=&r"(a) + :[y]"r"(_y),[x]"r"(_x) + :"memory" + ); +} +#endif diff --git a/thirdparty/libtheora/x86/x86enc.c b/thirdparty/libtheora/x86/x86enc.c new file mode 100644 index 0000000000..43b7be3ea3 --- /dev/null +++ b/thirdparty/libtheora/x86/x86enc.c @@ -0,0 +1,49 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86state.c 15675 2009-02-06 09:43:27Z tterribe $ + + ********************************************************************/ +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +#include "../cpu.c" + +void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){ + ogg_uint32_t cpu_flags; + cpu_flags=oc_cpu_flags_get(); + oc_enc_vtable_init_c(_enc); + if(cpu_flags&OC_CPU_X86_MMX){ + _enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx; + _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx; + _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx; + } + if(cpu_flags&OC_CPU_X86_MMXEXT){ + _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext; + _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext; + _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext; + _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext; + _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext; + _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext; + _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext; + } + if(cpu_flags&OC_CPU_X86_SSE2){ +# if defined(OC_X86_64_ASM) + /*_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;*/ +# endif + } +} +#endif diff --git a/thirdparty/libtheora/x86/x86enc.h b/thirdparty/libtheora/x86/x86enc.h new file mode 100644 index 0000000000..06c3908bcd --- /dev/null +++ b/thirdparty/libtheora/x86/x86enc.h @@ -0,0 +1,47 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $ + + ********************************************************************/ + +#if !defined(_x86_x86enc_H) +# define _x86_x86enc_H (1) +# include "../encint.h" +# include "x86int.h" + +void oc_enc_vtable_init_x86(oc_enc_ctx *_enc); + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride); +void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,const unsigned char *_y,int _stride); +void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,int _stride); +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]); +void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]); + +#endif diff --git a/thirdparty/libtheora/x86/x86int.h b/thirdparty/libtheora/x86/x86int.h new file mode 100644 index 0000000000..ede724f5aa --- /dev/null +++ b/thirdparty/libtheora/x86/x86int.h @@ -0,0 +1,42 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_x86_x86int_H) +# define _x86_x86int_H (1) +# include "../internal.h" + +void oc_state_vtable_init_x86(oc_theora_state *_state); + +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride); +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue); +void oc_frag_recon_inter_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t *_residue); +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue); +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu_mmx(void); + +#endif diff --git a/thirdparty/libtheora/x86/x86state.c b/thirdparty/libtheora/x86/x86state.c new file mode 100644 index 0000000000..a786bec284 --- /dev/null +++ b/thirdparty/libtheora/x86/x86state.c @@ -0,0 +1,62 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include "x86int.h" + +#if defined(OC_X86_ASM) + +#include "../cpu.c" + +/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into + each quadrant of the destination.*/ +static const unsigned char OC_FZIG_ZAG_MMX[128]={ + 0, 8, 1, 2, 9,16,24,17, + 10, 3,32,11,18,25, 4,12, + 5,26,19,40,33,34,41,48, + 27, 6,13,20,28,21,14, 7, + 56,49,42,35,43,50,57,36, + 15,22,29,30,23,44,37,58, + 51,59,38,45,52,31,60,53, + 46,39,47,54,61,62,55,63, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, +}; + +void oc_state_vtable_init_x86(oc_theora_state *_state){ + _state->cpu_flags=oc_cpu_flags_get(); + if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_copy=oc_frag_copy_mmx; + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; + _state->opt_vtable.idct8x8=oc_idct8x8_mmx; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_mmx; + _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; + } + else oc_state_vtable_init_c(_state); +} +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxencfrag.c b/thirdparty/libtheora/x86_vc/mmxencfrag.c new file mode 100644 index 0000000000..ac9dacf377 --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxencfrag.c @@ -0,0 +1,969 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: dsp_mmx.c 14579 2008-03-12 06:42:40Z xiphmont $ + + ********************************************************************/ +#include +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride){ + ptrdiff_t ret; + __asm{ +#define SRC esi +#define REF edx +#define YSTRIDE ecx +#define YSTRIDE3 edi + mov YSTRIDE,_ystride + mov SRC,_src + mov REF,_ref + /*Load the first 4 rows of each block.*/ + movq mm0,[SRC] + movq mm1,[REF] + movq mm2,[SRC][YSTRIDE] + movq mm3,[REF][YSTRIDE] + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + movq mm4,[SRC+YSTRIDE*2] + movq mm5,[REF+YSTRIDE*2] + movq mm6,[SRC+YSTRIDE3] + movq mm7,[REF+YSTRIDE3] + /*Compute their SADs and add them in mm0*/ + psadbw mm0,mm1 + psadbw mm2,mm3 + lea SRC,[SRC+YSTRIDE*4] + paddw mm0,mm2 + lea REF,[REF+YSTRIDE*4] + /*Load the next 3 rows as registers become available.*/ + movq mm2,[SRC] + movq mm3,[REF] + psadbw mm4,mm5 + psadbw mm6,mm7 + paddw mm0,mm4 + movq mm5,[REF+YSTRIDE] + movq mm4,[SRC+YSTRIDE] + paddw mm0,mm6 + movq mm7,[REF+YSTRIDE*2] + movq mm6,[SRC+YSTRIDE*2] + /*Start adding their SADs to mm0*/ + psadbw mm2,mm3 + psadbw mm4,mm5 + paddw mm0,mm2 + psadbw mm6,mm7 + /*Load last row as registers become available.*/ + movq mm2,[SRC+YSTRIDE3] + movq mm3,[REF+YSTRIDE3] + /*And finish adding up their SADs.*/ + paddw mm0,mm4 + psadbw mm2,mm3 + paddw mm0,mm6 + paddw mm0,mm2 + movd [ret],mm0 +#undef SRC +#undef REF +#undef YSTRIDE +#undef YSTRIDE3 + } + return (unsigned)ret; +} + +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + /*Early termination is for suckers.*/ + return oc_enc_frag_sad_mmxext(_src,_ref,_ystride); +} + +#define OC_SAD2_LOOP __asm{ \ + /*We want to compute (mm0+mm1>>1) on unsigned bytes without overflow, but \ + pavgb computes (mm0+mm1+1>>1). \ + The latter is exactly 1 too large when the low bit of two corresponding \ + bytes is only set in one of them. \ + Therefore we pxor the operands, pand to mask out the low bits, and psubb to \ + correct the output of pavgb.*/ \ + __asm movq mm6,mm0 \ + __asm lea REF1,[REF1+YSTRIDE*2] \ + __asm pxor mm0,mm1 \ + __asm pavgb mm6,mm1 \ + __asm lea REF2,[REF2+YSTRIDE*2] \ + __asm movq mm1,mm2 \ + __asm pand mm0,mm7 \ + __asm pavgb mm2,mm3 \ + __asm pxor mm1,mm3 \ + __asm movq mm3,[REF2+YSTRIDE] \ + __asm psubb mm6,mm0 \ + __asm movq mm0,[REF1] \ + __asm pand mm1,mm7 \ + __asm psadbw mm4,mm6 \ + __asm movd mm6,RET \ + __asm psubb mm2,mm1 \ + __asm movq mm1,[REF2] \ + __asm lea SRC,[SRC+YSTRIDE*2] \ + __asm psadbw mm5,mm2 \ + __asm movq mm2,[REF1+YSTRIDE] \ + __asm paddw mm5,mm4 \ + __asm movq mm4,[SRC] \ + __asm paddw mm6,mm5 \ + __asm movq mm5,[SRC+YSTRIDE] \ + __asm movd RET,mm6 \ +} + +/*Same as above, but does not pre-load the next two rows.*/ +#define OC_SAD2_TAIL __asm{ \ + __asm movq mm6,mm0 \ + __asm pavgb mm0,mm1 \ + __asm pxor mm6,mm1 \ + __asm movq mm1,mm2 \ + __asm pand mm6,mm7 \ + __asm pavgb mm2,mm3 \ + __asm pxor mm1,mm3 \ + __asm psubb mm0,mm6 \ + __asm pand mm1,mm7 \ + __asm psadbw mm4,mm0 \ + __asm psubb mm2,mm1 \ + __asm movd mm6,RET \ + __asm psadbw mm5,mm2 \ + __asm paddw mm5,mm4 \ + __asm paddw mm6,mm5 \ + __asm movd RET,mm6 \ +} + +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + ptrdiff_t ret; + __asm{ +#define REF1 ecx +#define REF2 edi +#define YSTRIDE esi +#define SRC edx +#define RET eax + mov YSTRIDE,_ystride + mov SRC,_src + mov REF1,_ref1 + mov REF2,_ref2 + movq mm0,[REF1] + movq mm1,[REF2] + movq mm2,[REF1+YSTRIDE] + movq mm3,[REF2+YSTRIDE] + xor RET,RET + movq mm4,[SRC] + pxor mm7,mm7 + pcmpeqb mm6,mm6 + movq mm5,[SRC+YSTRIDE] + psubb mm7,mm6 + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_LOOP + OC_SAD2_TAIL + mov [ret],RET +#undef REF1 +#undef REF2 +#undef YSTRIDE +#undef SRC +#undef RET + } + return (unsigned)ret; +} + +/*Load an 8x4 array of pixel values from %[src] and %[ref] and compute their + 16-bit difference in mm0...mm7.*/ +#define OC_LOAD_SUB_8x4(_off) __asm{ \ + __asm movd mm0,[_off+SRC] \ + __asm movd mm4,[_off+REF] \ + __asm movd mm1,[_off+SRC+SRC_YSTRIDE] \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm movd mm5,[_off+REF+REF_YSTRIDE] \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm movd mm2,[_off+SRC] \ + __asm movd mm7,[_off+REF] \ + __asm movd mm3,[_off+SRC+SRC_YSTRIDE] \ + __asm movd mm6,[_off+REF+REF_YSTRIDE] \ + __asm punpcklbw mm0,mm4 \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm punpcklbw mm4,mm4 \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm psubw mm0,mm4 \ + __asm movd mm4,[_off+SRC] \ + __asm movq [_off*2+BUF],mm0 \ + __asm movd mm0,[_off+REF] \ + __asm punpcklbw mm1,mm5 \ + __asm punpcklbw mm5,mm5 \ + __asm psubw mm1,mm5 \ + __asm movd mm5,[_off+SRC+SRC_YSTRIDE] \ + __asm punpcklbw mm2,mm7 \ + __asm punpcklbw mm7,mm7 \ + __asm psubw mm2,mm7 \ + __asm movd mm7,[_off+REF+REF_YSTRIDE] \ + __asm punpcklbw mm3,mm6 \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm punpcklbw mm6,mm6 \ + __asm psubw mm3,mm6 \ + __asm movd mm6,[_off+SRC] \ + __asm punpcklbw mm4,mm0 \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm punpcklbw mm0,mm0 \ + __asm lea SRC,[SRC+SRC_YSTRIDE*2] \ + __asm psubw mm4,mm0 \ + __asm movd mm0,[_off+REF] \ + __asm punpcklbw mm5,mm7 \ + __asm neg SRC_YSTRIDE \ + __asm punpcklbw mm7,mm7 \ + __asm psubw mm5,mm7 \ + __asm movd mm7,[_off+SRC+SRC_YSTRIDE] \ + __asm punpcklbw mm6,mm0 \ + __asm lea REF,[REF+REF_YSTRIDE*2] \ + __asm punpcklbw mm0,mm0 \ + __asm neg REF_YSTRIDE \ + __asm psubw mm6,mm0 \ + __asm movd mm0,[_off+REF+REF_YSTRIDE] \ + __asm lea SRC,[SRC+SRC_YSTRIDE*8] \ + __asm punpcklbw mm7,mm0 \ + __asm neg SRC_YSTRIDE \ + __asm punpcklbw mm0,mm0 \ + __asm lea REF,[REF+REF_YSTRIDE*8] \ + __asm psubw mm7,mm0 \ + __asm neg REF_YSTRIDE \ + __asm movq mm0,[_off*2+BUF] \ +} + +/*Load an 8x4 array of pixel values from %[src] into %%mm0...%%mm7.*/ +#define OC_LOAD_8x4(_off) __asm{ \ + __asm movd mm0,[_off+SRC] \ + __asm movd mm1,[_off+SRC+YSTRIDE] \ + __asm movd mm2,[_off+SRC+YSTRIDE*2] \ + __asm pxor mm7,mm7 \ + __asm movd mm3,[_off+SRC+YSTRIDE3] \ + __asm punpcklbw mm0,mm7 \ + __asm movd mm4,[_off+SRC4] \ + __asm punpcklbw mm1,mm7 \ + __asm movd mm5,[_off+SRC4+YSTRIDE] \ + __asm punpcklbw mm2,mm7 \ + __asm movd mm6,[_off+SRC4+YSTRIDE*2] \ + __asm punpcklbw mm3,mm7 \ + __asm movd mm7,[_off+SRC4+YSTRIDE3] \ + __asm punpcklbw mm4,mm4 \ + __asm punpcklbw mm5,mm5 \ + __asm psrlw mm4,8 \ + __asm psrlw mm5,8 \ + __asm punpcklbw mm6,mm6 \ + __asm punpcklbw mm7,mm7 \ + __asm psrlw mm6,8 \ + __asm psrlw mm7,8 \ +} + +/*Performs the first two stages of an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 2, 3, 6 and 7 from the second stage are negated (which allows us to + perform this stage in place with no temporary registers).*/ +#define OC_HADAMARD_AB_8x4 __asm{ \ + /*Stage A: \ + Outputs 0-3 are swapped with 4-7 here.*/ \ + __asm paddw mm5,mm1 \ + __asm paddw mm6,mm2 \ + __asm paddw mm1,mm1 \ + __asm paddw mm2,mm2 \ + __asm psubw mm1,mm5 \ + __asm psubw mm2,mm6 \ + __asm paddw mm7,mm3 \ + __asm paddw mm4,mm0 \ + __asm paddw mm3,mm3 \ + __asm paddw mm0,mm0 \ + __asm psubw mm3,mm7 \ + __asm psubw mm0,mm4 \ + /*Stage B:*/ \ + __asm paddw mm0,mm2 \ + __asm paddw mm1,mm3 \ + __asm paddw mm4,mm6 \ + __asm paddw mm5,mm7 \ + __asm paddw mm2,mm2 \ + __asm paddw mm3,mm3 \ + __asm paddw mm6,mm6 \ + __asm paddw mm7,mm7 \ + __asm psubw mm2,mm0 \ + __asm psubw mm3,mm1 \ + __asm psubw mm6,mm4 \ + __asm psubw mm7,mm5 \ +} + +/*Performs the last stage of an 8-point 1-D Hadamard transform in place. + Ouputs 1, 3, 5, and 7 are negated (which allows us to perform this stage in + place with no temporary registers).*/ +#define OC_HADAMARD_C_8x4 __asm{ \ + /*Stage C:*/ \ + __asm paddw mm0,mm1 \ + __asm paddw mm2,mm3 \ + __asm paddw mm4,mm5 \ + __asm paddw mm6,mm7 \ + __asm paddw mm1,mm1 \ + __asm paddw mm3,mm3 \ + __asm paddw mm5,mm5 \ + __asm paddw mm7,mm7 \ + __asm psubw mm1,mm0 \ + __asm psubw mm3,mm2 \ + __asm psubw mm5,mm4 \ + __asm psubw mm7,mm6 \ +} + +/*Performs an 8-point 1-D Hadamard transform. + The transform is performed in place, except that outputs 0-3 are swapped with + outputs 4-7. + Outputs 1, 2, 5 and 6 are negated (which allows us to perform the transform + in place with no temporary registers).*/ +#define OC_HADAMARD_8x4 __asm{ \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_8x4 \ +} + +/*Performs the first part of the final stage of the Hadamard transform and + summing of absolute values. + At the end of this part, mm1 will contain the DC coefficient of the + transform.*/ +#define OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) __asm{ \ + /*We use the fact that \ + (abs(a+b)+abs(a-b))/2=max(abs(a),abs(b)) \ + to merge the final butterfly with the abs and the first stage of \ + accumulation. \ + Thus we can avoid using pabsw, which is not available until SSSE3. \ + Emulating pabsw takes 3 instructions, so the straightforward MMXEXT \ + implementation would be (3+3)*8+7=55 instructions (+4 for spilling \ + registers). \ + Even with pabsw, it would be (3+1)*8+7=39 instructions (with no spills). \ + This implementation is only 26 (+4 for spilling registers).*/ \ + __asm movq [_r7+BUF],mm7 \ + __asm movq [_r6+BUF],mm6 \ + /*mm7={0x7FFF}x4 \ + mm0=max(abs(mm0),abs(mm1))-0x7FFF*/ \ + __asm pcmpeqb mm7,mm7 \ + __asm movq mm6,mm0 \ + __asm psrlw mm7,1 \ + __asm paddw mm6,mm1 \ + __asm pmaxsw mm0,mm1 \ + __asm paddsw mm6,mm7 \ + __asm psubw mm0,mm6 \ + /*mm2=max(abs(mm2),abs(mm3))-0x7FFF \ + mm4=max(abs(mm4),abs(mm5))-0x7FFF*/ \ + __asm movq mm6,mm2 \ + __asm movq mm1,mm4 \ + __asm pmaxsw mm2,mm3 \ + __asm pmaxsw mm4,mm5 \ + __asm paddw mm6,mm3 \ + __asm paddw mm1,mm5 \ + __asm movq mm3,[_r7+BUF] \ +} + +/*Performs the second part of the final stage of the Hadamard transform and + summing of absolute values.*/ +#define OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) __asm{ \ + __asm paddsw mm6,mm7 \ + __asm movq mm5,[_r6+BUF] \ + __asm paddsw mm1,mm7 \ + __asm psubw mm2,mm6 \ + __asm psubw mm4,mm1 \ + /*mm7={1}x4 (needed for the horizontal add that follows) \ + mm0+=mm2+mm4+max(abs(mm3),abs(mm5))-0x7FFF*/ \ + __asm movq mm6,mm3 \ + __asm pmaxsw mm3,mm5 \ + __asm paddw mm0,mm2 \ + __asm paddw mm6,mm5 \ + __asm paddw mm0,mm4 \ + __asm paddsw mm6,mm7 \ + __asm paddw mm0,mm3 \ + __asm psrlw mm7,14 \ + __asm psubw mm0,mm6 \ +} + +/*Performs the last stage of an 8-point 1-D Hadamard transform, takes the + absolute value of each component, and accumulates everything into mm0. + This is the only portion of SATD which requires MMXEXT (we could use plain + MMX, but it takes 4 instructions and an extra register to work around the + lack of a pmaxsw, which is a pretty serious penalty).*/ +#define OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) __asm{ \ + OC_HADAMARD_C_ABS_ACCUM_A_8x4(_r6,_r7) \ + OC_HADAMARD_C_ABS_ACCUM_B_8x4(_r6,_r7) \ +} + +/*Performs an 8-point 1-D Hadamard transform, takes the absolute value of each + component, and accumulates everything into mm0. + Note that mm0 will have an extra 4 added to each column, and that after + removing this value, the remainder will be half the conventional value.*/ +#define OC_HADAMARD_ABS_ACCUM_8x4(_r6,_r7) __asm{ \ + OC_HADAMARD_AB_8x4 \ + OC_HADAMARD_C_ABS_ACCUM_8x4(_r6,_r7) \ +} + +/*Performs two 4x4 transposes (mostly) in place. + On input, {mm0,mm1,mm2,mm3} contains rows {e,f,g,h}, and {mm4,mm5,mm6,mm7} + contains rows {a,b,c,d}. + On output, {0x40,0x50,0x60,0x70}+_off+BUF contains {e,f,g,h}^T, and + {mm4,mm5,mm6,mm7} contains the transposed rows {a,b,c,d}^T.*/ +#define OC_TRANSPOSE_4x4x2(_off) __asm{ \ + /*First 4x4 transpose:*/ \ + __asm movq [0x10+_off+BUF],mm5 \ + /*mm0 = e3 e2 e1 e0 \ + mm1 = f3 f2 f1 f0 \ + mm2 = g3 g2 g1 g0 \ + mm3 = h3 h2 h1 h0*/ \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm2,mm3 \ + __asm punpckhwd mm5,mm3 \ + __asm movq mm3,mm0 \ + __asm punpcklwd mm0,mm1 \ + __asm punpckhwd mm3,mm1 \ + /*mm0 = f1 e1 f0 e0 \ + mm3 = f3 e3 f2 e2 \ + mm2 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm2 \ + __asm punpckhdq mm1,mm2 \ + __asm movq mm2,mm3 \ + __asm punpckhdq mm3,mm5 \ + __asm movq [0x40+_off+BUF],mm0 \ + __asm punpckldq mm2,mm5 \ + /*mm0 = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + __asm movq mm5,[0x10+_off+BUF] \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm5 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm7 = d3 d2 d1 d0*/ \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm7 \ + __asm movq [0x50+_off+BUF],mm1 \ + __asm punpckhwd mm0,mm7 \ + __asm movq mm7,mm4 \ + __asm punpcklwd mm4,mm5 \ + __asm movq [0x60+_off+BUF],mm2 \ + __asm punpckhwd mm7,mm5 \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm movq [0x70+_off+BUF],mm3 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm7 \ + __asm punpckhdq mm7,mm0 \ + __asm punpckldq mm6,mm0 \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ +} + +static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src, + int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret1; + unsigned ret2; + bufp=buf; + __asm{ +#define SRC esi +#define REF eax +#define SRC_YSTRIDE ecx +#define REF_YSTRIDE edx +#define BUF edi +#define RET eax +#define RET2 edx + mov SRC,_src + mov SRC_YSTRIDE,_src_ystride + mov REF,_ref + mov REF_YSTRIDE,_ref_ystride + mov BUF,bufp + OC_LOAD_SUB_8x4(0x00) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x00) + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + movq [0x00+BUF],mm4 + movq [0x10+BUF],mm5 + movq [0x20+BUF],mm6 + movq [0x30+BUF],mm7 + OC_LOAD_SUB_8x4(0x04) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x08) + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + movq mm1,[0x10+BUF] + movq mm2,[0x20+BUF] + movq mm3,[0x30+BUF] + movq mm0,[0x00+BUF] + OC_HADAMARD_ABS_ACCUM_8x4(0x28,0x38) + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + mov RET2,_thresh + pmaddwd mm0,mm7 + movq mm1,[0x50+BUF] + movq mm5,[0x58+BUF] + movq mm4,mm0 + movq mm2,[0x60+BUF] + punpckhdq mm0,mm0 + movq mm6,[0x68+BUF] + paddd mm4,mm0 + movq mm3,[0x70+BUF] + movd RET,mm4 + movq mm7,[0x78+BUF] + /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4 + added to them, and a factor of two removed; correct the final sum here.*/ + lea RET,[RET+RET-32] + movq mm0,[0x40+BUF] + cmp RET,RET2 + movq mm4,[0x48+BUF] + jae at_end + OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78) + pmaddwd mm0,mm7 + /*There isn't much to stick in here to hide the latency this time, but the + alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose + latency is even worse.*/ + sub RET,32 + movq mm4,mm0 + punpckhdq mm0,mm0 + paddd mm4,mm0 + movd RET2,mm4 + lea RET,[RET+RET2*2] + align 16 +at_end: + mov ret1,RET +#undef SRC +#undef REF +#undef SRC_YSTRIDE +#undef REF_YSTRIDE +#undef BUF +#undef RET +#undef RET2 + } + return ret1; +} + +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh){ + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh); +} + + +/*Our internal implementation of frag_copy2 takes an extra stride parameter so + we can share code with oc_enc_frag_satd2_thresh_mmxext().*/ +static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){ + __asm{ + /*Load the first 3 rows.*/ +#define DST_YSTRIDE edi +#define SRC_YSTRIDE esi +#define DST eax +#define SRC1 edx +#define SRC2 ecx + mov DST_YSTRIDE,_dst_ystride + mov SRC_YSTRIDE,_src_ystride + mov DST,_dst + mov SRC1,_src1 + mov SRC2,_src2 + movq mm0,[SRC1] + movq mm1,[SRC2] + movq mm2,[SRC1+SRC_YSTRIDE] + lea SRC1,[SRC1+SRC_YSTRIDE*2] + movq mm3,[SRC2+SRC_YSTRIDE] + lea SRC2,[SRC2+SRC_YSTRIDE*2] + pxor mm7,mm7 + movq mm4,[SRC1] + pcmpeqb mm6,mm6 + movq mm5,[SRC2] + /*mm7={1}x8.*/ + psubb mm7,mm6 + /*Start averaging mm0 and mm1 into mm6.*/ + movq mm6,mm0 + pxor mm0,mm1 + pavgb mm6,mm1 + /*mm1 is free, start averaging mm3 into mm2 using mm1.*/ + movq mm1,mm2 + pand mm0,mm7 + pavgb mm2,mm3 + pxor mm1,mm3 + /*mm3 is free.*/ + psubb mm6,mm0 + /*mm0 is free, start loading the next row.*/ + movq mm0,[SRC1+SRC_YSTRIDE] + /*Start averaging mm5 and mm4 using mm3.*/ + movq mm3,mm4 + /*mm6 [row 0] is done; write it out.*/ + movq [DST],mm6 + pand mm1,mm7 + pavgb mm4,mm5 + psubb mm2,mm1 + /*mm1 is free, continue loading the next row.*/ + movq mm1,[SRC2+SRC_YSTRIDE] + pxor mm3,mm5 + lea SRC1,[SRC1+SRC_YSTRIDE*2] + /*mm2 [row 1] is done; write it out.*/ + movq [DST+DST_YSTRIDE],mm2 + pand mm3,mm7 + /*Start loading the next row.*/ + movq mm2,[SRC1] + lea DST,[DST+DST_YSTRIDE*2] + psubb mm4,mm3 + lea SRC2,[SRC2+SRC_YSTRIDE*2] + /*mm4 [row 2] is done; write it out.*/ + movq [DST],mm4 + /*Continue loading the next row.*/ + movq mm3,[SRC2] + /*Start averaging mm0 and mm1 into mm6.*/ + movq mm6,mm0 + pxor mm0,mm1 + /*Start loading the next row.*/ + movq mm4,[SRC1+SRC_YSTRIDE] + pavgb mm6,mm1 + /*mm1 is free; start averaging mm3 into mm2 using mm1.*/ + movq mm1,mm2 + pand mm0,mm7 + /*Continue loading the next row.*/ + movq mm5,[SRC2+SRC_YSTRIDE] + pavgb mm2,mm3 + lea SRC1,[SRC1+SRC_YSTRIDE*2] + pxor mm1,mm3 + /*mm3 is free.*/ + psubb mm6,mm0 + /*mm0 is free, start loading the next row.*/ + movq mm0,[SRC1] + /*Start averaging mm5 into mm4 using mm3.*/ + movq mm3,mm4 + /*mm6 [row 3] is done; write it out.*/ + movq [DST+DST_YSTRIDE],mm6 + pand mm1,mm7 + lea SRC2,[SRC2+SRC_YSTRIDE*2] + pavgb mm4,mm5 + lea DST,[DST+DST_YSTRIDE*2] + psubb mm2,mm1 + /*mm1 is free; continue loading the next row.*/ + movq mm1,[SRC2] + pxor mm3,mm5 + /*mm2 [row 4] is done; write it out.*/ + movq [DST],mm2 + pand mm3,mm7 + /*Start loading the next row.*/ + movq mm2,[SRC1+SRC_YSTRIDE] + psubb mm4,mm3 + /*Start averaging mm0 and mm1 into mm6.*/ + movq mm6,mm0 + /*Continue loading the next row.*/ + movq mm3,[SRC2+SRC_YSTRIDE] + /*mm4 [row 5] is done; write it out.*/ + movq [DST+DST_YSTRIDE],mm4 + pxor mm0,mm1 + pavgb mm6,mm1 + /*mm4 is free; start averaging mm3 into mm2 using mm4.*/ + movq mm4,mm2 + pand mm0,mm7 + pavgb mm2,mm3 + pxor mm4,mm3 + lea DST,[DST+DST_YSTRIDE*2] + psubb mm6,mm0 + pand mm4,mm7 + /*mm6 [row 6] is done, write it out.*/ + movq [DST],mm6 + psubb mm2,mm4 + /*mm2 [row 7] is done, write it out.*/ + movq [DST+DST_YSTRIDE],mm2 +#undef SRC1 +#undef SRC2 +#undef SRC_YSTRIDE +#undef DST_YSTRIDE +#undef DST + } +} + +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh){ + OC_ALIGN8(unsigned char ref[64]); + oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride); + return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh); +} + +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src, + int _ystride){ + OC_ALIGN8(ogg_int16_t buf[64]); + ogg_int16_t *bufp; + unsigned ret1; + unsigned ret2; + bufp=buf; + __asm{ +#define SRC eax +#define SRC4 esi +#define BUF edi +#define RET eax +#define RET_WORD ax +#define RET2 ecx +#define YSTRIDE edx +#define YSTRIDE3 ecx + mov SRC,_src + mov BUF,bufp + mov YSTRIDE,_ystride + /* src4 = src+4*ystride */ + lea SRC4,[SRC+YSTRIDE*4] + /* ystride3 = 3*ystride */ + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + OC_LOAD_8x4(0x00) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x00) + /*Finish swapping out this 8x4 block to make room for the next one. + mm0...mm3 have been swapped out already.*/ + movq [0x00+BUF],mm4 + movq [0x10+BUF],mm5 + movq [0x20+BUF],mm6 + movq [0x30+BUF],mm7 + OC_LOAD_8x4(0x04) + OC_HADAMARD_8x4 + OC_TRANSPOSE_4x4x2(0x08) + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, so + we only have to do half the loads.*/ + movq mm1,[0x10+BUF] + movq mm2,[0x20+BUF] + movq mm3,[0x30+BUF] + movq mm0,[0x00+BUF] + /*We split out the stages here so we can save the DC coefficient in the + middle.*/ + OC_HADAMARD_AB_8x4 + OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38) + movd RET,mm1 + OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38) + /*Up to this point, everything fit in 16 bits (8 input + 1 for the + difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1 + for the factor of two we dropped + 3 for the vertical accumulation). + Now we finally have to promote things to dwords. + We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long + latency of pmaddwd by starting the next series of loads now.*/ + pmaddwd mm0,mm7 + movq mm1,[0x50+BUF] + movq mm5,[0x58+BUF] + movq mm2,[0x60+BUF] + movq mm4,mm0 + movq mm6,[0x68+BUF] + punpckhdq mm0,mm0 + movq mm3,[0x70+BUF] + paddd mm4,mm0 + movq mm7,[0x78+BUF] + movd RET2,mm4 + movq mm0,[0x40+BUF] + movq mm4,[0x48+BUF] + OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78) + pmaddwd mm0,mm7 + /*We assume that the DC coefficient is always positive (which is true, + because the input to the INTRA transform was not a difference).*/ + movzx RET,RET_WORD + add RET2,RET2 + sub RET2,RET + movq mm4,mm0 + punpckhdq mm0,mm0 + paddd mm4,mm0 + movd RET,mm4 + lea RET,[-64+RET2+RET*2] + mov [ret1],RET +#undef SRC +#undef SRC4 +#undef BUF +#undef RET +#undef RET_WORD +#undef RET2 +#undef YSTRIDE +#undef YSTRIDE3 + } + return ret1; +} + +void oc_enc_frag_sub_mmx(ogg_int16_t _residue[64], + const unsigned char *_src, const unsigned char *_ref,int _ystride){ + int i; + __asm pxor mm7,mm7 + for(i=4;i-->0;){ + __asm{ +#define SRC edx +#define YSTRIDE esi +#define RESIDUE eax +#define REF ecx + mov YSTRIDE,_ystride + mov RESIDUE,_residue + mov SRC,_src + mov REF,_ref + /*mm0=[src]*/ + movq mm0,[SRC] + /*mm1=[ref]*/ + movq mm1,[REF] + /*mm4=[src+ystride]*/ + movq mm4,[SRC+YSTRIDE] + /*mm5=[ref+ystride]*/ + movq mm5,[REF+YSTRIDE] + /*Compute [src]-[ref].*/ + movq mm2,mm0 + punpcklbw mm0,mm7 + movq mm3,mm1 + punpckhbw mm2,mm7 + punpcklbw mm1,mm7 + punpckhbw mm3,mm7 + psubw mm0,mm1 + psubw mm2,mm3 + /*Compute [src+ystride]-[ref+ystride].*/ + movq mm1,mm4 + punpcklbw mm4,mm7 + movq mm3,mm5 + punpckhbw mm1,mm7 + lea SRC,[SRC+YSTRIDE*2] + punpcklbw mm5,mm7 + lea REF,[REF+YSTRIDE*2] + punpckhbw mm3,mm7 + psubw mm4,mm5 + psubw mm1,mm3 + /*Write the answer out.*/ + movq [RESIDUE+0x00],mm0 + movq [RESIDUE+0x08],mm2 + movq [RESIDUE+0x10],mm4 + movq [RESIDUE+0x18],mm1 + lea RESIDUE,[RESIDUE+0x20] + mov _residue,RESIDUE + mov _src,SRC + mov _ref,REF +#undef SRC +#undef YSTRIDE +#undef RESIDUE +#undef REF + } + } +} + +void oc_enc_frag_sub_128_mmx(ogg_int16_t _residue[64], + const unsigned char *_src,int _ystride){ + __asm{ +#define YSTRIDE edx +#define YSTRIDE3 edi +#define RESIDUE ecx +#define SRC eax + mov YSTRIDE,_ystride + mov RESIDUE,_residue + mov SRC,_src + /*mm0=[src]*/ + movq mm0,[SRC] + /*mm1=[src+ystride]*/ + movq mm1,[SRC+YSTRIDE] + /*mm6={-1}x4*/ + pcmpeqw mm6,mm6 + /*mm2=[src+2*ystride]*/ + movq mm2,[SRC+YSTRIDE*2] + /*[ystride3]=3*[ystride]*/ + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + /*mm6={1}x4*/ + psllw mm6,15 + /*mm3=[src+3*ystride]*/ + movq mm3,[SRC+YSTRIDE3] + /*mm6={128}x4*/ + psrlw mm6,8 + /*mm7=0*/ + pxor mm7,mm7 + /*[src]=[src]+4*[ystride]*/ + lea SRC,[SRC+YSTRIDE*4] + /*Compute [src]-128 and [src+ystride]-128*/ + movq mm4,mm0 + punpcklbw mm0,mm7 + movq mm5,mm1 + punpckhbw mm4,mm7 + psubw mm0,mm6 + punpcklbw mm1,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm1,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x00],mm0 + movq [RESIDUE+0x08],mm4 + movq [RESIDUE+0x10],mm1 + movq [RESIDUE+0x18],mm5 + /*mm0=[src+4*ystride]*/ + movq mm0,[SRC] + /*mm1=[src+5*ystride]*/ + movq mm1,[SRC+YSTRIDE] + /*Compute [src+2*ystride]-128 and [src+3*ystride]-128*/ + movq mm4,mm2 + punpcklbw mm2,mm7 + movq mm5,mm3 + punpckhbw mm4,mm7 + psubw mm2,mm6 + punpcklbw mm3,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm3,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x20],mm2 + movq [RESIDUE+0x28],mm4 + movq [RESIDUE+0x30],mm3 + movq [RESIDUE+0x38],mm5 + /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ + movq mm2,[SRC+YSTRIDE*2] + movq mm3,[SRC+YSTRIDE3] + movq mm4,mm0 + punpcklbw mm0,mm7 + movq mm5,mm1 + punpckhbw mm4,mm7 + psubw mm0,mm6 + punpcklbw mm1,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm1,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x40],mm0 + movq [RESIDUE+0x48],mm4 + movq [RESIDUE+0x50],mm1 + movq [RESIDUE+0x58],mm5 + /*Compute [src+6*ystride]-128 and [src+7*ystride]-128*/ + movq mm4,mm2 + punpcklbw mm2,mm7 + movq mm5,mm3 + punpckhbw mm4,mm7 + psubw mm2,mm6 + punpcklbw mm3,mm7 + psubw mm4,mm6 + punpckhbw mm5,mm7 + psubw mm3,mm6 + psubw mm5,mm6 + /*Write the answer out.*/ + movq [RESIDUE+0x60],mm2 + movq [RESIDUE+0x68],mm4 + movq [RESIDUE+0x70],mm3 + movq [RESIDUE+0x78],mm5 +#undef YSTRIDE +#undef YSTRIDE3 +#undef RESIDUE +#undef SRC + } +} + +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride){ + oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride); +} + +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxfdct.c b/thirdparty/libtheora/x86_vc/mmxfdct.c new file mode 100644 index 0000000000..dcf17c9fa7 --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxfdct.c @@ -0,0 +1,670 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 1999-2006 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ********************************************************************/ + /*MMX fDCT implementation for x86_32*/ +/*$Id: fdct_ses2.c 14579 2008-03-12 06:42:40Z xiphmont $*/ +#include "x86enc.h" + +#if defined(OC_X86_ASM) + +#define OC_FDCT_STAGE1_8x4 __asm{ \ + /*Stage 1:*/ \ + /*mm0=t7'=t0-t7*/ \ + __asm psubw mm0,mm7 \ + __asm paddw mm7,mm7 \ + /*mm1=t6'=t1-t6*/ \ + __asm psubw mm1, mm6 \ + __asm paddw mm6,mm6 \ + /*mm2=t5'=t2-t5*/ \ + __asm psubw mm2,mm5 \ + __asm paddw mm5,mm5 \ + /*mm3=t4'=t3-t4*/ \ + __asm psubw mm3,mm4 \ + __asm paddw mm4,mm4 \ + /*mm7=t0'=t0+t7*/ \ + __asm paddw mm7,mm0 \ + /*mm6=t1'=t1+t6*/ \ + __asm paddw mm6,mm1 \ + /*mm5=t2'=t2+t5*/ \ + __asm paddw mm5,mm2 \ + /*mm4=t3'=t3+t4*/ \ + __asm paddw mm4,mm3\ +} + +#define OC_FDCT8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \ + /*Stage 2:*/ \ + /*mm7=t3''=t0'-t3'*/ \ + __asm psubw mm7,mm4 \ + __asm paddw mm4,mm4 \ + /*mm6=t2''=t1'-t2'*/ \ + __asm psubw mm6,mm5 \ + __asm movq [Y+_r6],mm7 \ + __asm paddw mm5,mm5 \ + /*mm1=t5''=t6'-t5'*/ \ + __asm psubw mm1,mm2 \ + __asm movq [Y+_r2],mm6 \ + /*mm4=t0''=t0'+t3'*/ \ + __asm paddw mm4,mm7 \ + __asm paddw mm2,mm2 \ + /*mm5=t1''=t1'+t2'*/ \ + __asm movq [Y+_r0],mm4 \ + __asm paddw mm5,mm6 \ + /*mm2=t6''=t6'+t5'*/ \ + __asm paddw mm2,mm1 \ + __asm movq [Y+_r4],mm5 \ + /*mm0=t7', mm1=t5'', mm2=t6'', mm3=t4'.*/ \ + /*mm4, mm5, mm6, mm7 are free.*/ \ + /*Stage 3:*/ \ + /*mm6={2}x4, mm7={27146,0xB500>>1}x2*/ \ + __asm mov A,0x5A806A0A \ + __asm pcmpeqb mm6,mm6 \ + __asm movd mm7,A \ + __asm psrlw mm6,15 \ + __asm punpckldq mm7,mm7 \ + __asm paddw mm6,mm6 \ + /*mm0=0, m2={-1}x4 \ + mm5:mm4=t5''*27146+0xB500*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm punpcklwd mm4,mm6 \ + __asm movq [Y+_r3],mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm movq [Y+_r7],mm0 \ + __asm punpckhwd mm5,mm6 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqb mm2,mm2 \ + /*mm2=t6'', mm1=t5''+(t5''!=0) \ + mm4=(t5''*27146+0xB500>>16)*/ \ + __asm pcmpeqw mm0,mm1 \ + __asm psrad mm4,16 \ + __asm psubw mm0,mm2 \ + __asm movq mm2, [Y+_r3] \ + __asm psrad mm5,16 \ + __asm paddw mm1,mm0 \ + __asm packssdw mm4,mm5 \ + /*mm4=s=(t5''*27146+0xB500>>16)+t5''+(t5''!=0)>>1*/ \ + __asm paddw mm4,mm1 \ + __asm movq mm0, [Y+_r7] \ + __asm psraw mm4,1 \ + __asm movq mm1,mm3 \ + /*mm3=t4''=t4'+s*/ \ + __asm paddw mm3,mm4 \ + /*mm1=t5'''=t4'-s*/ \ + __asm psubw mm1,mm4 \ + /*mm1=0, mm3={-1}x4 \ + mm5:mm4=t6''*27146+0xB500*/ \ + __asm movq mm4,mm2 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm4,mm6 \ + __asm movq [Y+_r5],mm1 \ + __asm pmaddwd mm4,mm7 \ + __asm movq [Y+_r1],mm3 \ + __asm punpckhwd mm5,mm6 \ + __asm pxor mm1,mm1 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqb mm3,mm3 \ + /*mm2=t6''+(t6''!=0), mm4=(t6''*27146+0xB500>>16)*/ \ + __asm psrad mm4,16 \ + __asm pcmpeqw mm1,mm2 \ + __asm psrad mm5,16 \ + __asm psubw mm1,mm3 \ + __asm packssdw mm4,mm5 \ + __asm paddw mm2,mm1 \ + /*mm1=t1'' \ + mm4=s=(t6''*27146+0xB500>>16)+t6''+(t6''!=0)>>1*/ \ + __asm paddw mm4,mm2 \ + __asm movq mm1,[Y+_r4] \ + __asm psraw mm4,1 \ + __asm movq mm2,mm0 \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm0=t7''=t7'+s*/ \ + __asm paddw mm0,mm4 \ + /*mm2=t6'''=t7'-s*/ \ + __asm psubw mm2,mm4 \ + /*Stage 4:*/ \ + /*mm0=0, mm2=t0'' \ + mm5:mm4=t1''*27146+0xB500*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm punpcklwd mm4,mm6 \ + __asm movq [Y+_r3],mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm movq mm2,[Y+_r0] \ + __asm punpckhwd mm5,mm6 \ + __asm movq [Y+_r7],mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pxor mm0,mm0 \ + /*mm7={27146,0x4000>>1}x2 \ + mm0=s=(t1''*27146+0xB500>>16)+t1''+(t1''!=0)*/ \ + __asm psrad mm4,16 \ + __asm mov A,0x20006A0A \ + __asm pcmpeqw mm0,mm1 \ + __asm movd mm7,A \ + __asm psrad mm5,16 \ + __asm psubw mm0,mm3 \ + __asm packssdw mm4,mm5 \ + __asm paddw mm0,mm1 \ + __asm punpckldq mm7,mm7 \ + __asm paddw mm0,mm4 \ + /*mm6={0x00000E3D}x2 \ + mm1=-(t0''==0), mm5:mm4=t0''*27146+0x4000*/ \ + __asm movq mm4,mm2 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm4,mm6 \ + __asm mov A,0x0E3D \ + __asm pmaddwd mm4,mm7 \ + __asm punpckhwd mm5,mm6 \ + __asm movd mm6,A \ + __asm pmaddwd mm5,mm7 \ + __asm pxor mm1,mm1 \ + __asm punpckldq mm6,mm6 \ + __asm pcmpeqw mm1,mm2 \ + /*mm4=r=(t0''*27146+0x4000>>16)+t0''+(t0''!=0)*/ \ + __asm psrad mm4,16 \ + __asm psubw mm1,mm3 \ + __asm psrad mm5,16 \ + __asm paddw mm2,mm1 \ + __asm packssdw mm4,mm5 \ + __asm movq mm1,[Y+_r5] \ + __asm paddw mm4,mm2 \ + /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \ + The naive implementation could cause overflow, so we use \ + u=(r&s)+((r^s)>>1).*/ \ + __asm movq mm2,[Y+_r3] \ + __asm movq mm7,mm0 \ + __asm pxor mm0,mm4 \ + __asm pand mm7,mm4 \ + __asm psraw mm0,1 \ + __asm mov A,0x7FFF54DC \ + __asm paddw mm0,mm7 \ + __asm movd mm7,A \ + /*mm7={54491-0x7FFF,0x7FFF}x2 \ + mm4=_y[4]=v=r-u*/ \ + __asm psubw mm4,mm0 \ + __asm punpckldq mm7,mm7 \ + __asm movq [Y+_r4],mm4 \ + /*mm0=0, mm7={36410}x4 \ + mm1=(t5'''!=0), mm5:mm4=54491*t5'''+0x0E3D*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm punpcklwd mm4,mm1 \ + __asm mov A,0x8E3A8E3A \ + __asm pmaddwd mm4,mm7 \ + __asm movq [Y+_r0],mm0 \ + __asm punpckhwd mm5,mm1 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqw mm1,mm0 \ + __asm movd mm7,A \ + __asm psubw mm1,mm3 \ + __asm punpckldq mm7,mm7 \ + __asm paddd mm4,mm6 \ + __asm paddd mm5,mm6 \ + /*mm0=0 \ + mm3:mm1=36410*t6'''+((t5'''!=0)<<16)*/ \ + __asm movq mm6,mm2 \ + __asm movq mm3,mm2 \ + __asm pmulhw mm6,mm7 \ + __asm paddw mm1,mm2 \ + __asm pmullw mm3,mm7 \ + __asm pxor mm0,mm0 \ + __asm paddw mm6,mm1 \ + __asm movq mm1,mm3 \ + __asm punpckhwd mm3,mm6 \ + __asm punpcklwd mm1,mm6 \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[5]=u=(54491*t5'''+36410*t6'''+0x0E3D>>16)+(t5'''!=0)*/ \ + __asm paddd mm5,mm3 \ + __asm paddd mm4,mm1 \ + __asm psrad mm5,16 \ + __asm pxor mm6,mm6 \ + __asm psrad mm4,16 \ + __asm pcmpeqb mm3,mm3 \ + __asm packssdw mm4,mm5 \ + __asm psubw mm6,mm3 \ + /*mm1=t7'', mm7={26568,0x3400}x2 \ + mm2=s=t6'''-(36410*u>>16)*/ \ + __asm movq mm1,mm4 \ + __asm mov A,0x340067C8 \ + __asm pmulhw mm4,mm7 \ + __asm movd mm7,A \ + __asm movq [Y+_r5],mm1 \ + __asm punpckldq mm7,mm7 \ + __asm paddw mm4,mm1 \ + __asm movq mm1,[Y+_r7] \ + __asm psubw mm2,mm4 \ + /*mm6={0x00007B1B}x2 \ + mm0=(s!=0), mm5:mm4=s*26568+0x3400*/ \ + __asm movq mm4,mm2 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm4,mm6 \ + __asm pcmpeqw mm0,mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x7B1B \ + __asm punpckhwd mm5,mm6 \ + __asm movd mm6,A \ + __asm pmaddwd mm5,mm7 \ + __asm psubw mm0,mm3 \ + __asm punpckldq mm6,mm6 \ + /*mm7={64277-0x7FFF,0x7FFF}x2 \ + mm2=_y[3]=v=(s*26568+0x3400>>17)+s+(s!=0)*/ \ + __asm psrad mm4,17 \ + __asm paddw mm2,mm0 \ + __asm psrad mm5,17 \ + __asm mov A,0x7FFF7B16 \ + __asm packssdw mm4,mm5 \ + __asm movd mm7,A \ + __asm paddw mm2,mm4 \ + __asm punpckldq mm7,mm7 \ + /*mm0=0, mm7={12785}x4 \ + mm1=(t7''!=0), mm2=t4'', mm5:mm4=64277*t7''+0x7B1B*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm movq [Y+_r3],mm2 \ + __asm punpcklwd mm4,mm1 \ + __asm movq mm2,[Y+_r1] \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x31F131F1 \ + __asm punpckhwd mm5,mm1 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm pcmpeqw mm1,mm0 \ + __asm movd mm7,A \ + __asm psubw mm1,mm3 \ + __asm punpckldq mm7,mm7 \ + __asm paddd mm4,mm6 \ + __asm paddd mm5,mm6 \ + /*mm3:mm1=12785*t4'''+((t7''!=0)<<16)*/ \ + __asm movq mm6,mm2 \ + __asm movq mm3,mm2 \ + __asm pmulhw mm6,mm7 \ + __asm pmullw mm3,mm7 \ + __asm paddw mm6,mm1 \ + __asm movq mm1,mm3 \ + __asm punpckhwd mm3,mm6 \ + __asm punpcklwd mm1,mm6 \ + /*mm3={-1}x4, mm6={1}x4 \ + mm4=_y[1]=u=(12785*t4'''+64277*t7''+0x7B1B>>16)+(t7''!=0)*/ \ + __asm paddd mm5,mm3 \ + __asm paddd mm4,mm1 \ + __asm psrad mm5,16 \ + __asm pxor mm6,mm6 \ + __asm psrad mm4,16 \ + __asm pcmpeqb mm3,mm3 \ + __asm packssdw mm4,mm5 \ + __asm psubw mm6,mm3 \ + /*mm1=t3'', mm7={20539,0x3000}x2 \ + mm4=s=(12785*u>>16)-t4''*/ \ + __asm movq [Y+_r1],mm4 \ + __asm pmulhw mm4,mm7 \ + __asm mov A,0x3000503B \ + __asm movq mm1,[Y+_r6] \ + __asm movd mm7,A \ + __asm psubw mm4,mm2 \ + __asm punpckldq mm7,mm7 \ + /*mm6={0x00006CB7}x2 \ + mm0=(s!=0), mm5:mm4=s*20539+0x3000*/ \ + __asm movq mm5,mm4 \ + __asm movq mm2,mm4 \ + __asm punpcklwd mm4,mm6 \ + __asm pcmpeqw mm0,mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x6CB7 \ + __asm punpckhwd mm5,mm6 \ + __asm movd mm6,A \ + __asm pmaddwd mm5,mm7 \ + __asm psubw mm0,mm3 \ + __asm punpckldq mm6,mm6 \ + /*mm7={60547-0x7FFF,0x7FFF}x2 \ + mm2=_y[7]=v=(s*20539+0x3000>>20)+s+(s!=0)*/ \ + __asm psrad mm4,20 \ + __asm paddw mm2,mm0 \ + __asm psrad mm5,20 \ + __asm mov A,0x7FFF6C84 \ + __asm packssdw mm4,mm5 \ + __asm movd mm7,A \ + __asm paddw mm2,mm4 \ + __asm punpckldq mm7,mm7 \ + /*mm0=0, mm7={25080}x4 \ + mm2=t2'', mm5:mm4=60547*t3''+0x6CB7*/ \ + __asm movq mm4,mm1 \ + __asm movq mm5,mm1 \ + __asm movq [Y+_r7],mm2 \ + __asm punpcklwd mm4,mm1 \ + __asm movq mm2,[Y+_r2] \ + __asm pmaddwd mm4,mm7 \ + __asm mov A,0x61F861F8 \ + __asm punpckhwd mm5,mm1 \ + __asm pxor mm0,mm0 \ + __asm pmaddwd mm5,mm7 \ + __asm movd mm7,A \ + __asm pcmpeqw mm1,mm0 \ + __asm psubw mm1,mm3 \ + __asm punpckldq mm7,mm7 \ + __asm paddd mm4,mm6 \ + __asm paddd mm5,mm6 \ + /*mm3:mm1=25080*t2''+((t3''!=0)<<16)*/ \ + __asm movq mm6,mm2 \ + __asm movq mm3,mm2 \ + __asm pmulhw mm6,mm7 \ + __asm pmullw mm3,mm7 \ + __asm paddw mm6,mm1 \ + __asm movq mm1,mm3 \ + __asm punpckhwd mm3,mm6 \ + __asm punpcklwd mm1,mm6 \ + /*mm1={-1}x4 \ + mm4=u=(25080*t2''+60547*t3''+0x6CB7>>16)+(t3''!=0)*/ \ + __asm paddd mm5,mm3 \ + __asm paddd mm4,mm1 \ + __asm psrad mm5,16 \ + __asm mov A,0x28005460 \ + __asm psrad mm4,16 \ + __asm pcmpeqb mm1,mm1 \ + __asm packssdw mm4,mm5 \ + /*mm5={1}x4, mm6=_y[2]=u, mm7={21600,0x2800}x2 \ + mm4=s=(25080*u>>16)-t2''*/ \ + __asm movq mm6,mm4 \ + __asm pmulhw mm4,mm7 \ + __asm pxor mm5,mm5 \ + __asm movd mm7,A \ + __asm psubw mm5,mm1 \ + __asm punpckldq mm7,mm7 \ + __asm psubw mm4,mm2 \ + /*mm2=s+(s!=0) \ + mm4:mm3=s*21600+0x2800*/ \ + __asm movq mm3,mm4 \ + __asm movq mm2,mm4 \ + __asm punpckhwd mm4,mm5 \ + __asm pcmpeqw mm0,mm2 \ + __asm pmaddwd mm4,mm7 \ + __asm psubw mm0,mm1 \ + __asm punpcklwd mm3,mm5 \ + __asm paddw mm2,mm0 \ + __asm pmaddwd mm3,mm7 \ + /*mm0=_y[4], mm1=_y[7], mm4=_y[0], mm5=_y[5] \ + mm3=_y[6]=v=(s*21600+0x2800>>18)+s+(s!=0)*/ \ + __asm movq mm0,[Y+_r4] \ + __asm psrad mm4,18 \ + __asm movq mm5,[Y+_r5] \ + __asm psrad mm3,18 \ + __asm movq mm1,[Y+_r7] \ + __asm packssdw mm3,mm4 \ + __asm movq mm4,[Y+_r0] \ + __asm paddw mm3,mm2 \ +} + +/*On input, mm4=_y[0], mm6=_y[2], mm0=_y[4], mm5=_y[5], mm3=_y[6], mm1=_y[7]. + On output, {_y[4],mm1,mm2,mm3} contains the transpose of _y[4...7] and + {mm4,mm5,mm6,mm7} contains the transpose of _y[0...3].*/ +#define OC_TRANSPOSE8x4(_r0,_r1,_r2,_r3,_r4,_r5,_r6,_r7) __asm{ \ + /*First 4x4 transpose:*/ \ + /*mm0 = e3 e2 e1 e0 \ + mm5 = f3 f2 f1 f0 \ + mm3 = g3 g2 g1 g0 \ + mm1 = h3 h2 h1 h0*/ \ + __asm movq mm2,mm0 \ + __asm punpcklwd mm0,mm5 \ + __asm punpckhwd mm2,mm5 \ + __asm movq mm5,mm3 \ + __asm punpcklwd mm3,mm1 \ + __asm punpckhwd mm5,mm1 \ + /*mm0 = f1 e1 f0 e0 \ + mm2 = f3 e3 f2 e2 \ + mm3 = h1 g1 h0 g0 \ + mm5 = h3 g3 h2 g2*/ \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm3 \ + __asm movq [Y+_r4],mm0 \ + __asm punpckhdq mm1,mm3 \ + __asm movq mm0,[Y+_r1] \ + __asm movq mm3,mm2 \ + __asm punpckldq mm2,mm5 \ + __asm punpckhdq mm3,mm5 \ + __asm movq mm5,[Y+_r3] \ + /*_y[4] = h0 g0 f0 e0 \ + mm1 = h1 g1 f1 e1 \ + mm2 = h2 g2 f2 e2 \ + mm3 = h3 g3 f3 e3*/ \ + /*Second 4x4 transpose:*/ \ + /*mm4 = a3 a2 a1 a0 \ + mm0 = b3 b2 b1 b0 \ + mm6 = c3 c2 c1 c0 \ + mm5 = d3 d2 d1 d0*/ \ + __asm movq mm7,mm4 \ + __asm punpcklwd mm4,mm0 \ + __asm punpckhwd mm7,mm0 \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm5 \ + __asm punpckhwd mm0,mm5 \ + /*mm4 = b1 a1 b0 a0 \ + mm7 = b3 a3 b2 a2 \ + mm6 = d1 c1 d0 c0 \ + mm0 = d3 c3 d2 c2*/ \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm7 \ + __asm punpckhdq mm7,mm0 \ + __asm punpckldq mm6,mm0 \ + /*mm4 = d0 c0 b0 a0 \ + mm5 = d1 c1 b1 a1 \ + mm6 = d2 c2 b2 a2 \ + mm7 = d3 c3 b3 a3*/ \ +} + +/*MMX implementation of the fDCT.*/ +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]){ + ptrdiff_t a; + __asm{ +#define Y eax +#define A ecx +#define X edx + /*Add two extra bits of working precision to improve accuracy; any more and + we could overflow.*/ + /*We also add biases to correct for some systematic error that remains in + the full fDCT->iDCT round trip.*/ + mov X, _x + mov Y, _y + movq mm0,[0x00+X] + movq mm1,[0x10+X] + movq mm2,[0x20+X] + movq mm3,[0x30+X] + pcmpeqb mm4,mm4 + pxor mm7,mm7 + movq mm5,mm0 + psllw mm0,2 + pcmpeqw mm5,mm7 + movq mm7,[0x70+X] + psllw mm1,2 + psubw mm5,mm4 + psllw mm2,2 + mov A,1 + pslld mm5,16 + movd mm6,A + psllq mm5,16 + mov A,0x10001 + psllw mm3,2 + movd mm4,A + punpckhwd mm5,mm6 + psubw mm1,mm6 + movq mm6,[0x60+X] + paddw mm0,mm5 + movq mm5,[0x50+X] + paddw mm0,mm4 + movq mm4,[0x40+X] + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + psllw mm7,2 + psubw mm0,mm7 + psllw mm6,2 + paddw mm7,mm7 + /*mm1=t6'=t1-t6*/ + psllw mm5,2 + psubw mm1,mm6 + psllw mm4,2 + paddw mm6,mm6 + /*mm2=t5'=t2-t5*/ + psubw mm2,mm5 + paddw mm5,mm5 + /*mm3=t4'=t3-t4*/ + psubw mm3,mm4 + paddw mm4,mm4 + /*mm7=t0'=t0+t7*/ + paddw mm7,mm0 + /*mm6=t1'=t1+t6*/ + paddw mm6,mm1 + /*mm5=t2'=t2+t5*/ + paddw mm5,mm2 + /*mm4=t3'=t3+t4*/ + paddw mm4,mm3 + OC_FDCT8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70) + OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70) + /*Swap out this 8x4 block for the next one.*/ + movq mm0,[0x08+X] + movq [0x30+Y],mm7 + movq mm7,[0x78+X] + movq [0x50+Y],mm1 + movq mm1,[0x18+X] + movq [0x20+Y],mm6 + movq mm6,[0x68+X] + movq [0x60+Y],mm2 + movq mm2,[0x28+X] + movq [0x10+Y],mm5 + movq mm5,[0x58+X] + movq [0x70+Y],mm3 + movq mm3,[0x38+X] + /*And increase its working precision, too.*/ + psllw mm0,2 + movq [0x00+Y],mm4 + psllw mm7,2 + movq mm4,[0x48+X] + /*We inline stage1 of the transform here so we can get better instruction + scheduling with the shifts.*/ + /*mm0=t7'=t0-t7*/ + psubw mm0,mm7 + psllw mm1,2 + paddw mm7,mm7 + psllw mm6,2 + /*mm1=t6'=t1-t6*/ + psubw mm1,mm6 + psllw mm2,2 + paddw mm6,mm6 + psllw mm5,2 + /*mm2=t5'=t2-t5*/ + psubw mm2,mm5 + psllw mm3,2 + paddw mm5,mm5 + psllw mm4,2 + /*mm3=t4'=t3-t4*/ + psubw mm3,mm4 + paddw mm4,mm4 + /*mm7=t0'=t0+t7*/ + paddw mm7,mm0 + /*mm6=t1'=t1+t6*/ + paddw mm6,mm1 + /*mm5=t2'=t2+t5*/ + paddw mm5,mm2 + /*mm4=t3'=t3+t4*/ + paddw mm4,mm3 + OC_FDCT8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78) + OC_TRANSPOSE8x4(0x08,0x18,0x28,0x38,0x48,0x58,0x68,0x78) + /*Here the first 4x4 block of output from the last transpose is the second + 4x4 block of input for the next transform. + We have cleverly arranged that it already be in the appropriate place, + so we only have to do half the stores and loads.*/ + movq mm0,[0x00+Y] + movq [0x58+Y],mm1 + movq mm1,[0x10+Y] + movq [0x68+Y],mm2 + movq mm2,[0x20+Y] + movq [0x78+Y],mm3 + movq mm3,[0x30+Y] + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38) + OC_TRANSPOSE8x4(0x00,0x10,0x20,0x30,0x08,0x18,0x28,0x38) + /*mm0={-2}x4*/ + pcmpeqw mm0,mm0 + paddw mm0,mm0 + /*Round the results.*/ + psubw mm1,mm0 + psubw mm2,mm0 + psraw mm1,2 + psubw mm3,mm0 + movq [0x18+Y],mm1 + psraw mm2,2 + psubw mm4,mm0 + movq mm1,[0x08+Y] + psraw mm3,2 + psubw mm5,mm0 + psraw mm4,2 + psubw mm6,mm0 + psraw mm5,2 + psubw mm7,mm0 + psraw mm6,2 + psubw mm1,mm0 + psraw mm7,2 + movq mm0,[0x40+Y] + psraw mm1,2 + movq [0x30+Y],mm7 + movq mm7,[0x78+Y] + movq [0x08+Y],mm1 + movq mm1,[0x50+Y] + movq [0x20+Y],mm6 + movq mm6,[0x68+Y] + movq [0x28+Y],mm2 + movq mm2,[0x60+Y] + movq [0x10+Y],mm5 + movq mm5,[0x58+Y] + movq [0x38+Y],mm3 + movq mm3,[0x70+Y] + movq [0x00+Y],mm4 + movq mm4,[0x48+Y] + OC_FDCT_STAGE1_8x4 + OC_FDCT8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78) + OC_TRANSPOSE8x4(0x40,0x50,0x60,0x70,0x48,0x58,0x68,0x78) + /*mm0={-2}x4*/ + pcmpeqw mm0,mm0 + paddw mm0,mm0 + /*Round the results.*/ + psubw mm1,mm0 + psubw mm2,mm0 + psraw mm1,2 + psubw mm3,mm0 + movq [0x58+Y],mm1 + psraw mm2,2 + psubw mm4,mm0 + movq mm1,[0x48+Y] + psraw mm3,2 + psubw mm5,mm0 + movq [0x68+Y],mm2 + psraw mm4,2 + psubw mm6,mm0 + movq [0x78+Y],mm3 + psraw mm5,2 + psubw mm7,mm0 + movq [0x40+Y],mm4 + psraw mm6,2 + psubw mm1,mm0 + movq [0x50+Y],mm5 + psraw mm7,2 + movq [0x60+Y],mm6 + psraw mm1,2 + movq [0x70+Y],mm7 + movq [0x48+Y],mm1 +#undef Y +#undef A +#undef X + } +} + +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxfrag.c b/thirdparty/libtheora/x86_vc/mmxfrag.c new file mode 100644 index 0000000000..4eb2084dc6 --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxfrag.c @@ -0,0 +1,337 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxfrag.c 16578 2009-09-25 19:50:48Z cristianadam $ + + ********************************************************************/ + +/*MMX acceleration of fragment reconstruction for motion compensation. + Originally written by Rudolf Marek. + Additional optimization by Nils Pipenbrinck. + Note: Loops are unrolled for best performance. + The iteration each instruction belongs to is marked in the comments as #i.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride){ +#define SRC edx +#define DST eax +#define YSTRIDE ecx +#define YSTRIDE3 esi + OC_FRAG_COPY_MMX(_dst,_src,_ystride); +#undef SRC +#undef DST +#undef YSTRIDE +#undef YSTRIDE3 +} + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue){ + __asm{ +#define DST edx +#define DST4 esi +#define YSTRIDE eax +#define YSTRIDE3 edi +#define RESIDUE ecx + mov DST,_dst + mov YSTRIDE,_ystride + mov RESIDUE,_residue + lea DST4,[DST+YSTRIDE*4] + lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] + /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/ + pcmpeqw mm0,mm0 + /*#0 Load low residue.*/ + movq mm1,[0*8+RESIDUE] + /*#0 Load high residue.*/ + movq mm2,[1*8+RESIDUE] + /*Set mm0 to 0x8000800080008000.*/ + psllw mm0,15 + /*#1 Load low residue.*/ + movq mm3,[2*8+RESIDUE] + /*#1 Load high residue.*/ + movq mm4,[3*8+RESIDUE] + /*Set mm0 to 0x0080008000800080.*/ + psrlw mm0,8 + /*#2 Load low residue.*/ + movq mm5,[4*8+RESIDUE] + /*#2 Load high residue.*/ + movq mm6,[5*8+RESIDUE] + /*#0 Bias low residue.*/ + paddsw mm1,mm0 + /*#0 Bias high residue.*/ + paddsw mm2,mm0 + /*#0 Pack to byte.*/ + packuswb mm1,mm2 + /*#1 Bias low residue.*/ + paddsw mm3,mm0 + /*#1 Bias high residue.*/ + paddsw mm4,mm0 + /*#1 Pack to byte.*/ + packuswb mm3,mm4 + /*#2 Bias low residue.*/ + paddsw mm5,mm0 + /*#2 Bias high residue.*/ + paddsw mm6,mm0 + /*#2 Pack to byte.*/ + packuswb mm5,mm6 + /*#0 Write row.*/ + movq [DST],mm1 + /*#1 Write row.*/ + movq [DST+YSTRIDE],mm3 + /*#2 Write row.*/ + movq [DST+YSTRIDE*2],mm5 + /*#3 Load low residue.*/ + movq mm1,[6*8+RESIDUE] + /*#3 Load high residue.*/ + movq mm2,[7*8+RESIDUE] + /*#4 Load high residue.*/ + movq mm3,[8*8+RESIDUE] + /*#4 Load high residue.*/ + movq mm4,[9*8+RESIDUE] + /*#5 Load high residue.*/ + movq mm5,[10*8+RESIDUE] + /*#5 Load high residue.*/ + movq mm6,[11*8+RESIDUE] + /*#3 Bias low residue.*/ + paddsw mm1,mm0 + /*#3 Bias high residue.*/ + paddsw mm2,mm0 + /*#3 Pack to byte.*/ + packuswb mm1,mm2 + /*#4 Bias low residue.*/ + paddsw mm3,mm0 + /*#4 Bias high residue.*/ + paddsw mm4,mm0 + /*#4 Pack to byte.*/ + packuswb mm3,mm4 + /*#5 Bias low residue.*/ + paddsw mm5,mm0 + /*#5 Bias high residue.*/ + paddsw mm6,mm0 + /*#5 Pack to byte.*/ + packuswb mm5,mm6 + /*#3 Write row.*/ + movq [DST+YSTRIDE3],mm1 + /*#4 Write row.*/ + movq [DST4],mm3 + /*#5 Write row.*/ + movq [DST4+YSTRIDE],mm5 + /*#6 Load low residue.*/ + movq mm1,[12*8+RESIDUE] + /*#6 Load high residue.*/ + movq mm2,[13*8+RESIDUE] + /*#7 Load low residue.*/ + movq mm3,[14*8+RESIDUE] + /*#7 Load high residue.*/ + movq mm4,[15*8+RESIDUE] + /*#6 Bias low residue.*/ + paddsw mm1,mm0 + /*#6 Bias high residue.*/ + paddsw mm2,mm0 + /*#6 Pack to byte.*/ + packuswb mm1,mm2 + /*#7 Bias low residue.*/ + paddsw mm3,mm0 + /*#7 Bias high residue.*/ + paddsw mm4,mm0 + /*#7 Pack to byte.*/ + packuswb mm3,mm4 + /*#6 Write row.*/ + movq [DST4+YSTRIDE*2],mm1 + /*#7 Write row.*/ + movq [DST4+YSTRIDE3],mm3 +#undef DST +#undef DST4 +#undef YSTRIDE +#undef YSTRIDE3 +#undef RESIDUE + } +} + +void oc_frag_recon_inter_mmx(unsigned char *_dst,const unsigned char *_src, + int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm0.*/ + __asm pxor mm0,mm0; + for(i=4;i-->0;){ + __asm{ +#define DST edx +#define SRC ecx +#define YSTRIDE edi +#define RESIDUE eax + mov DST,_dst + mov SRC,_src + mov YSTRIDE,_ystride + mov RESIDUE,_residue + /*#0 Load source.*/ + movq mm3,[SRC] + /*#1 Load source.*/ + movq mm7,[SRC+YSTRIDE] + /*#0 Get copy of src.*/ + movq mm4,mm3 + /*#0 Expand high source.*/ + punpckhbw mm4,mm0 + /*#0 Expand low source.*/ + punpcklbw mm3,mm0 + /*#0 Add residue high.*/ + paddsw mm4,[8+RESIDUE] + /*#1 Get copy of src.*/ + movq mm2,mm7 + /*#0 Add residue low.*/ + paddsw mm3,[RESIDUE] + /*#1 Expand high source.*/ + punpckhbw mm2,mm0 + /*#0 Pack final row pixels.*/ + packuswb mm3,mm4 + /*#1 Expand low source.*/ + punpcklbw mm7,mm0 + /*#1 Add residue low.*/ + paddsw mm7,[16+RESIDUE] + /*#1 Add residue high.*/ + paddsw mm2,[24+RESIDUE] + /*Advance residue.*/ + lea RESIDUE,[32+RESIDUE] + /*#1 Pack final row pixels.*/ + packuswb mm7,mm2 + /*Advance src.*/ + lea SRC,[SRC+YSTRIDE*2] + /*#0 Write row.*/ + movq [DST],mm3 + /*#1 Write row.*/ + movq [DST+YSTRIDE],mm7 + /*Advance dst.*/ + lea DST,[DST+YSTRIDE*2] + mov _residue,RESIDUE + mov _dst,DST + mov _src,SRC +#undef DST +#undef SRC +#undef YSTRIDE +#undef RESIDUE + } + } +} + +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue){ + int i; + /*Zero mm7.*/ + __asm pxor mm7,mm7; + for(i=4;i-->0;){ + __asm{ +#define SRC1 ecx +#define SRC2 edi +#define YSTRIDE esi +#define RESIDUE edx +#define DST eax + mov YSTRIDE,_ystride + mov DST,_dst + mov RESIDUE,_residue + mov SRC1,_src1 + mov SRC2,_src2 + /*#0 Load src1.*/ + movq mm0,[SRC1] + /*#0 Load src2.*/ + movq mm2,[SRC2] + /*#0 Copy src1.*/ + movq mm1,mm0 + /*#0 Copy src2.*/ + movq mm3,mm2 + /*#1 Load src1.*/ + movq mm4,[SRC1+YSTRIDE] + /*#0 Unpack lower src1.*/ + punpcklbw mm0,mm7 + /*#1 Load src2.*/ + movq mm5,[SRC2+YSTRIDE] + /*#0 Unpack higher src1.*/ + punpckhbw mm1,mm7 + /*#0 Unpack lower src2.*/ + punpcklbw mm2,mm7 + /*#0 Unpack higher src2.*/ + punpckhbw mm3,mm7 + /*Advance src1 ptr.*/ + lea SRC1,[SRC1+YSTRIDE*2] + /*Advance src2 ptr.*/ + lea SRC2,[SRC2+YSTRIDE*2] + /*#0 Lower src1+src2.*/ + paddsw mm0,mm2 + /*#0 Higher src1+src2.*/ + paddsw mm1,mm3 + /*#1 Copy src1.*/ + movq mm2,mm4 + /*#0 Build lo average.*/ + psraw mm0,1 + /*#1 Copy src2.*/ + movq mm3,mm5 + /*#1 Unpack lower src1.*/ + punpcklbw mm4,mm7 + /*#0 Build hi average.*/ + psraw mm1,1 + /*#1 Unpack higher src1.*/ + punpckhbw mm2,mm7 + /*#0 low+=residue.*/ + paddsw mm0,[RESIDUE] + /*#1 Unpack lower src2.*/ + punpcklbw mm5,mm7 + /*#0 high+=residue.*/ + paddsw mm1,[8+RESIDUE] + /*#1 Unpack higher src2.*/ + punpckhbw mm3,mm7 + /*#1 Lower src1+src2.*/ + paddsw mm5,mm4 + /*#0 Pack and saturate.*/ + packuswb mm0,mm1 + /*#1 Higher src1+src2.*/ + paddsw mm3,mm2 + /*#0 Write row.*/ + movq [DST],mm0 + /*#1 Build lo average.*/ + psraw mm5,1 + /*#1 Build hi average.*/ + psraw mm3,1 + /*#1 low+=residue.*/ + paddsw mm5,[16+RESIDUE] + /*#1 high+=residue.*/ + paddsw mm3,[24+RESIDUE] + /*#1 Pack and saturate.*/ + packuswb mm5,mm3 + /*#1 Write row ptr.*/ + movq [DST+YSTRIDE],mm5 + /*Advance residue ptr.*/ + add RESIDUE,32 + /*Advance dest ptr.*/ + lea DST,[DST+YSTRIDE*2] + mov _dst,DST + mov _residue,RESIDUE + mov _src1,SRC1 + mov _src2,SRC2 +#undef SRC1 +#undef SRC2 +#undef YSTRIDE +#undef RESIDUE +#undef DST + } + } +} + +void oc_restore_fpu_mmx(void){ + __asm emms; +} + +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxfrag.h b/thirdparty/libtheora/x86_vc/mmxfrag.h new file mode 100644 index 0000000000..45ee93e777 --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxfrag.h @@ -0,0 +1,61 @@ +#if !defined(_x86_vc_mmxfrag_H) +# define _x86_vc_mmxfrag_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*Copies an 8x8 block of pixels from _src to _dst, assuming _ystride bytes + between rows.*/ +#define OC_FRAG_COPY_MMX(_dst,_src,_ystride) \ + do{ \ + const unsigned char *src; \ + unsigned char *dst; \ + src=(_src); \ + dst=(_dst); \ + __asm mov SRC,src \ + __asm mov DST,dst \ + __asm mov YSTRIDE,_ystride \ + /*src+0*ystride*/ \ + __asm movq mm0,[SRC] \ + /*src+1*ystride*/ \ + __asm movq mm1,[SRC+YSTRIDE] \ + /*ystride3=ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*src+2*ystride*/ \ + __asm movq mm2,[SRC+YSTRIDE*2] \ + /*src+3*ystride*/ \ + __asm movq mm3,[SRC+YSTRIDE3] \ + /*dst+0*ystride*/ \ + __asm movq [DST],mm0 \ + /*dst+1*ystride*/ \ + __asm movq [DST+YSTRIDE],mm1 \ + /*Pointer to next 4.*/ \ + __asm lea SRC,[SRC+YSTRIDE*4] \ + /*dst+2*ystride*/ \ + __asm movq [DST+YSTRIDE*2],mm2 \ + /*dst+3*ystride*/ \ + __asm movq [DST+YSTRIDE3],mm3 \ + /*Pointer to next 4.*/ \ + __asm lea DST,[DST+YSTRIDE*4] \ + /*src+0*ystride*/ \ + __asm movq mm0,[SRC] \ + /*src+1*ystride*/ \ + __asm movq mm1,[SRC+YSTRIDE] \ + /*src+2*ystride*/ \ + __asm movq mm2,[SRC+YSTRIDE*2] \ + /*src+3*ystride*/ \ + __asm movq mm3,[SRC+YSTRIDE3] \ + /*dst+0*ystride*/ \ + __asm movq [DST],mm0 \ + /*dst+1*ystride*/ \ + __asm movq [DST+YSTRIDE],mm1 \ + /*dst+2*ystride*/ \ + __asm movq [DST+YSTRIDE*2],mm2 \ + /*dst+3*ystride*/ \ + __asm movq [DST+YSTRIDE3],mm3 \ + } \ + while(0) + +# endif +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxidct.c b/thirdparty/libtheora/x86_vc/mmxidct.c new file mode 100644 index 0000000000..8f5ff6803c --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxidct.c @@ -0,0 +1,562 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +/*MMX acceleration of Theora's iDCT. + Originally written by Rudolf Marek, based on code from On2's VP3.*/ +#include "x86int.h" +#include "../dct.h" + +#if defined(OC_X86_ASM) + +/*These are offsets into the table of constants below.*/ +/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/ +#define OC_COSINE_OFFSET (0) +/*A row of 8's.*/ +#define OC_EIGHT_OFFSET (56) + + + +/*A table of constants used by the MMX routines.*/ +static const __declspec(align(16))ogg_uint16_t + OC_IDCT_CONSTS[(7+1)*4]={ + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + 8, 8, 8, 8 +}; + +/*38 cycles*/ +#define OC_IDCT_BEGIN __asm{ \ + __asm movq mm2,OC_I(3) \ + __asm movq mm6,OC_C(3) \ + __asm movq mm4,mm2 \ + __asm movq mm7,OC_J(5) \ + __asm pmulhw mm4,mm6 \ + __asm movq mm1,OC_C(5) \ + __asm pmulhw mm6,mm7 \ + __asm movq mm5,mm1 \ + __asm pmulhw mm1,mm2 \ + __asm movq mm3,OC_I(1) \ + __asm pmulhw mm5,mm7 \ + __asm movq mm0,OC_C(1) \ + __asm paddw mm4,mm2 \ + __asm paddw mm6,mm7 \ + __asm paddw mm2,mm1 \ + __asm movq mm1,OC_J(7) \ + __asm paddw mm7,mm5 \ + __asm movq mm5,mm0 \ + __asm pmulhw mm0,mm3 \ + __asm paddw mm4,mm7 \ + __asm pmulhw mm5,mm1 \ + __asm movq mm7,OC_C(7) \ + __asm psubw mm6,mm2 \ + __asm paddw mm0,mm3 \ + __asm pmulhw mm3,mm7 \ + __asm movq mm2,OC_I(2) \ + __asm pmulhw mm7,mm1 \ + __asm paddw mm5,mm1 \ + __asm movq mm1,mm2 \ + __asm pmulhw mm2,OC_C(2) \ + __asm psubw mm3,mm5 \ + __asm movq mm5,OC_J(6) \ + __asm paddw mm0,mm7 \ + __asm movq mm7,mm5 \ + __asm psubw mm0,mm4 \ + __asm pmulhw mm5,OC_C(2) \ + __asm paddw mm2,mm1 \ + __asm pmulhw mm1,OC_C(6) \ + __asm paddw mm4,mm4 \ + __asm paddw mm4,mm0 \ + __asm psubw mm3,mm6 \ + __asm paddw mm5,mm7 \ + __asm paddw mm6,mm6 \ + __asm pmulhw mm7,OC_C(6) \ + __asm paddw mm6,mm3 \ + __asm movq OC_I(1),mm4 \ + __asm psubw mm1,mm5 \ + __asm movq mm4,OC_C(4) \ + __asm movq mm5,mm3 \ + __asm pmulhw mm3,mm4 \ + __asm paddw mm7,mm2 \ + __asm movq OC_I(2),mm6 \ + __asm movq mm2,mm0 \ + __asm movq mm6,OC_I(0) \ + __asm pmulhw mm0,mm4 \ + __asm paddw mm5,mm3 \ + __asm movq mm3,OC_J(4) \ + __asm psubw mm5,mm1 \ + __asm paddw mm2,mm0 \ + __asm psubw mm6,mm3 \ + __asm movq mm0,mm6 \ + __asm pmulhw mm6,mm4 \ + __asm paddw mm3,mm3 \ + __asm paddw mm1,mm1 \ + __asm paddw mm3,mm0 \ + __asm paddw mm1,mm5 \ + __asm pmulhw mm4,mm3 \ + __asm paddw mm6,mm0 \ + __asm psubw mm6,mm2 \ + __asm paddw mm2,mm2 \ + __asm movq mm0,OC_I(1) \ + __asm paddw mm2,mm6 \ + __asm paddw mm4,mm3 \ + __asm psubw mm2,mm1 \ +} + +/*38+8=46 cycles.*/ +#define OC_ROW_IDCT __asm{ \ + OC_IDCT_BEGIN \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm3,mm3 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + __asm paddw mm5,mm5 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm0,mm0 \ + /*Save R1.*/ \ + __asm movq OC_I(1),mm1 \ + /*r0=R0=G.+C.*/ \ + __asm paddw mm0,mm7 \ +} + +/*The following macro does two 4x4 transposes in place. + At entry, we assume: + r0 = a3 a2 a1 a0 + I(1) = b3 b2 b1 b0 + r2 = c3 c2 c1 c0 + r3 = d3 d2 d1 d0 + + r4 = e3 e2 e1 e0 + r5 = f3 f2 f1 f0 + r6 = g3 g2 g1 g0 + r7 = h3 h2 h1 h0 + + At exit, we have: + I(0) = d0 c0 b0 a0 + I(1) = d1 c1 b1 a1 + I(2) = d2 c2 b2 a2 + I(3) = d3 c3 b3 a3 + + J(4) = h0 g0 f0 e0 + J(5) = h1 g1 f1 e1 + J(6) = h2 g2 f2 e2 + J(7) = h3 g3 f3 e3 + + I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + + Since r1 is free at entry, we calculate the Js first.*/ +/*19 cycles.*/ +#define OC_TRANSPOSE __asm{ \ + __asm movq mm1,mm4 \ + __asm punpcklwd mm4,mm5 \ + __asm movq OC_I(0),mm0 \ + __asm punpckhwd mm1,mm5 \ + __asm movq mm0,mm6 \ + __asm punpcklwd mm6,mm7 \ + __asm movq mm5,mm4 \ + __asm punpckldq mm4,mm6 \ + __asm punpckhdq mm5,mm6 \ + __asm movq mm6,mm1 \ + __asm movq OC_J(4),mm4 \ + __asm punpckhwd mm0,mm7 \ + __asm movq OC_J(5),mm5 \ + __asm punpckhdq mm6,mm0 \ + __asm movq mm4,OC_I(0) \ + __asm punpckldq mm1,mm0 \ + __asm movq mm5,OC_I(1) \ + __asm movq mm0,mm4 \ + __asm movq OC_J(7),mm6 \ + __asm punpcklwd mm0,mm5 \ + __asm movq OC_J(6),mm1 \ + __asm punpckhwd mm4,mm5 \ + __asm movq mm5,mm2 \ + __asm punpcklwd mm2,mm3 \ + __asm movq mm1,mm0 \ + __asm punpckldq mm0,mm2 \ + __asm punpckhdq mm1,mm2 \ + __asm movq mm2,mm4 \ + __asm movq OC_I(0),mm0 \ + __asm punpckhwd mm5,mm3 \ + __asm movq OC_I(1),mm1 \ + __asm punpckhdq mm4,mm5 \ + __asm punpckldq mm2,mm5 \ + __asm movq OC_I(3),mm4 \ + __asm movq OC_I(2),mm2 \ +} + +/*38+19=57 cycles.*/ +#define OC_COLUMN_IDCT __asm{ \ + OC_IDCT_BEGIN \ + __asm paddw mm2,OC_8 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r2=NR2*/ \ + __asm psraw mm2,4 \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=NR1*/ \ + __asm psraw mm1,4 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*Store NR2 at I(2).*/ \ + __asm movq OC_I(2),mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*Store NR1 at I(1).*/ \ + __asm movq OC_I(1),mm1 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm4,OC_8 \ + /*r3=D'+D'*/ \ + __asm paddw mm3,mm3 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r4=NR4*/ \ + __asm psraw mm4,4 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + /*r3=NR3*/ \ + __asm psraw mm3,4 \ + __asm paddw mm6,OC_8 \ + /*r5=B''+B''*/ \ + __asm paddw mm5,mm5 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r6=NR6*/ \ + __asm psraw mm6,4 \ + /*Store NR4 at J(4).*/ \ + __asm movq OC_J(4),mm4 \ + /*r5=NR5*/ \ + __asm psraw mm5,4 \ + /*Store NR3 at I(3).*/ \ + __asm movq OC_I(3),mm3 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm7,OC_8 \ + /*r0=C'+C'*/ \ + __asm paddw mm0,mm0 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ + /*r7=NR7*/ \ + __asm psraw mm7,4 \ + /*Store NR6 at J(6).*/ \ + __asm movq OC_J(6),mm6 \ + /*r0=NR0*/ \ + __asm psraw mm0,4 \ + /*Store NR5 at J(5).*/ \ + __asm movq OC_J(5),mm5 \ + /*Store NR7 at J(7).*/ \ + __asm movq OC_J(7),mm7 \ + /*Store NR0 at I(0).*/ \ + __asm movq OC_I(0),mm0 \ +} + +#define OC_MID(_m,_i) [CONSTS+_m+(_i)*8] +#define OC_C(_i) OC_MID(OC_COSINE_OFFSET,_i-1) +#define OC_8 OC_MID(OC_EIGHT_OFFSET,0) + +static void oc_idct8x8_slow(ogg_int16_t _y[64]){ + /*This routine accepts an 8x8 matrix, but in partially transposed form. + Every 4x4 block is transposed.*/ + __asm{ +#define CONSTS eax +#define Y edx + mov CONSTS,offset OC_IDCT_CONSTS + mov Y,_y +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) [Y+(_k-4)*16+8] + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+(_k*16)+64] +#define OC_J(_k) [Y+(_k-4)*16+72] + OC_ROW_IDCT + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16+8] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT +#undef OC_I +#undef OC_J +#undef CONSTS +#undef Y + } +} + +/*25 cycles.*/ +#define OC_IDCT_BEGIN_10 __asm{ \ + __asm movq mm2,OC_I(3) \ + __asm nop \ + __asm movq mm6,OC_C(3) \ + __asm movq mm4,mm2 \ + __asm movq mm1,OC_C(5) \ + __asm pmulhw mm4,mm6 \ + __asm movq mm3,OC_I(1) \ + __asm pmulhw mm1,mm2 \ + __asm movq mm0,OC_C(1) \ + __asm paddw mm4,mm2 \ + __asm pxor mm6,mm6 \ + __asm paddw mm2,mm1 \ + __asm movq mm5,OC_I(2) \ + __asm pmulhw mm0,mm3 \ + __asm movq mm1,mm5 \ + __asm paddw mm0,mm3 \ + __asm pmulhw mm3,OC_C(7) \ + __asm psubw mm6,mm2 \ + __asm pmulhw mm5,OC_C(2) \ + __asm psubw mm0,mm4 \ + __asm movq mm7,OC_I(2) \ + __asm paddw mm4,mm4 \ + __asm paddw mm7,mm5 \ + __asm paddw mm4,mm0 \ + __asm pmulhw mm1,OC_C(6) \ + __asm psubw mm3,mm6 \ + __asm movq OC_I(1),mm4 \ + __asm paddw mm6,mm6 \ + __asm movq mm4,OC_C(4) \ + __asm paddw mm6,mm3 \ + __asm movq mm5,mm3 \ + __asm pmulhw mm3,mm4 \ + __asm movq OC_I(2),mm6 \ + __asm movq mm2,mm0 \ + __asm movq mm6,OC_I(0) \ + __asm pmulhw mm0,mm4 \ + __asm paddw mm5,mm3 \ + __asm paddw mm2,mm0 \ + __asm psubw mm5,mm1 \ + __asm pmulhw mm6,mm4 \ + __asm paddw mm6,OC_I(0) \ + __asm paddw mm1,mm1 \ + __asm movq mm4,mm6 \ + __asm paddw mm1,mm5 \ + __asm psubw mm6,mm2 \ + __asm paddw mm2,mm2 \ + __asm movq mm0,OC_I(1) \ + __asm paddw mm2,mm6 \ + __asm psubw mm2,mm1 \ + __asm nop \ +} + +/*25+8=33 cycles.*/ +#define OC_ROW_IDCT_10 __asm{ \ + OC_IDCT_BEGIN_10 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm3,mm3 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + __asm paddw mm5,mm5 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm0,mm0 \ + /*Save R1.*/ \ + __asm movq OC_I(1),mm1 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ +} + +/*25+19=44 cycles'*/ +#define OC_COLUMN_IDCT_10 __asm{ \ + OC_IDCT_BEGIN_10 \ + __asm paddw mm2,OC_8 \ + /*r1=H'+H'*/ \ + __asm paddw mm1,mm1 \ + /*r1=R1=A''+H'*/ \ + __asm paddw mm1,mm2 \ + /*r2=NR2*/ \ + __asm psraw mm2,4 \ + /*r4=E'=E-G*/ \ + __asm psubw mm4,mm7 \ + /*r1=NR1*/ \ + __asm psraw mm1,4 \ + /*r3=D'*/ \ + __asm movq mm3,OC_I(2) \ + /*r7=G+G*/ \ + __asm paddw mm7,mm7 \ + /*Store NR2 at I(2).*/ \ + __asm movq OC_I(2),mm2 \ + /*r7=G'=E+G*/ \ + __asm paddw mm7,mm4 \ + /*Store NR1 at I(1).*/ \ + __asm movq OC_I(1),mm1 \ + /*r4=R4=E'-D'*/ \ + __asm psubw mm4,mm3 \ + __asm paddw mm4,OC_8 \ + /*r3=D'+D'*/ \ + __asm paddw mm3,mm3 \ + /*r3=R3=E'+D'*/ \ + __asm paddw mm3,mm4 \ + /*r4=NR4*/ \ + __asm psraw mm4,4 \ + /*r6=R6=F'-B''*/ \ + __asm psubw mm6,mm5 \ + /*r3=NR3*/ \ + __asm psraw mm3,4 \ + __asm paddw mm6,OC_8 \ + /*r5=B''+B''*/ \ + __asm paddw mm5,mm5 \ + /*r5=R5=F'+B''*/ \ + __asm paddw mm5,mm6 \ + /*r6=NR6*/ \ + __asm psraw mm6,4 \ + /*Store NR4 at J(4).*/ \ + __asm movq OC_J(4),mm4 \ + /*r5=NR5*/ \ + __asm psraw mm5,4 \ + /*Store NR3 at I(3).*/ \ + __asm movq OC_I(3),mm3 \ + /*r7=R7=G'-C'*/ \ + __asm psubw mm7,mm0 \ + __asm paddw mm7,OC_8 \ + /*r0=C'+C'*/ \ + __asm paddw mm0,mm0 \ + /*r0=R0=G'+C'*/ \ + __asm paddw mm0,mm7 \ + /*r7=NR7*/ \ + __asm psraw mm7,4 \ + /*Store NR6 at J(6).*/ \ + __asm movq OC_J(6),mm6 \ + /*r0=NR0*/ \ + __asm psraw mm0,4 \ + /*Store NR5 at J(5).*/ \ + __asm movq OC_J(5),mm5 \ + /*Store NR7 at J(7).*/ \ + __asm movq OC_J(7),mm7 \ + /*Store NR0 at I(0).*/ \ + __asm movq OC_I(0),mm0 \ +} + +static void oc_idct8x8_10(ogg_int16_t _y[64]){ + __asm{ +#define CONSTS eax +#define Y edx + mov CONSTS,offset OC_IDCT_CONSTS + mov Y,_y +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) [Y+(_k-4)*16+8] + /*Done with dequant, descramble, and partial transpose. + Now do the iDCT itself.*/ + OC_ROW_IDCT_10 + OC_TRANSPOSE +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#define OC_I(_k) [Y+_k*16+8] +#define OC_J(_k) OC_I(_k) + OC_COLUMN_IDCT_10 +#undef OC_I +#undef OC_J +#undef CONSTS +#undef Y + } +} + +/*Performs an inverse 8x8 Type-II DCT transform. + The input is assumed to be scaled by a factor of 4 relative to orthonormal + version of the transform.*/ +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi){ + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + use a smaller transform when the block ends with a long zero run instead + of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Perform the iDCT.*/ + if(_last_zzi<10)oc_idct8x8_10(_y); + else oc_idct8x8_slow(_y); +} + +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxloop.h b/thirdparty/libtheora/x86_vc/mmxloop.h new file mode 100644 index 0000000000..2561fca2ae --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxloop.h @@ -0,0 +1,219 @@ +#if !defined(_x86_vc_mmxloop_H) +# define _x86_vc_mmxloop_H (1) +# include +# include "x86int.h" + +#if defined(OC_X86_ASM) + +/*On entry, mm0={a0,...,a7}, mm1={b0,...,b7}, mm2={c0,...,c7}, mm3={d0,...d7}. + On exit, mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)} and + mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}; mm0 and mm3 are clobbered.*/ +#define OC_LOOP_FILTER8_MMX __asm{ \ + /*mm7=0*/ \ + __asm pxor mm7,mm7 \ + /*mm6:mm0={a0,...,a7}*/ \ + __asm movq mm6,mm0 \ + __asm punpcklbw mm0,mm7 \ + __asm punpckhbw mm6,mm7 \ + /*mm3:mm5={d0,...,d7}*/ \ + __asm movq mm5,mm3 \ + __asm punpcklbw mm3,mm7 \ + __asm punpckhbw mm5,mm7 \ + /*mm6:mm0={a0-d0,...,a7-d7}*/ \ + __asm psubw mm0,mm3 \ + __asm psubw mm6,mm5 \ + /*mm3:mm1={b0,...,b7}*/ \ + __asm movq mm3,mm1 \ + __asm punpcklbw mm1,mm7 \ + __asm movq mm4,mm2 \ + __asm punpckhbw mm3,mm7 \ + /*mm5:mm4={c0,...,c7}*/ \ + __asm movq mm5,mm2 \ + __asm punpcklbw mm4,mm7 \ + __asm punpckhbw mm5,mm7 \ + /*mm7={3}x4 \ + mm5:mm4={c0-b0,...,c7-b7}*/ \ + __asm pcmpeqw mm7,mm7 \ + __asm psubw mm4,mm1 \ + __asm psrlw mm7,14 \ + __asm psubw mm5,mm3 \ + /*Scale by 3.*/ \ + __asm pmullw mm4,mm7 \ + __asm pmullw mm5,mm7 \ + /*mm7={4}x4 \ + mm5:mm4=f={a0-d0+3*(c0-b0),...,a7-d7+3*(c7-b7)}*/ \ + __asm psrlw mm7,1 \ + __asm paddw mm4,mm0 \ + __asm psllw mm7,2 \ + __asm movq mm0,[LL] \ + __asm paddw mm5,mm6 \ + /*R_i has the range [-127,128], so we compute -R_i instead. \ + mm4=-R_i=-(f+4>>3)=0xFF^(f-4>>3)*/ \ + __asm psubw mm4,mm7 \ + __asm psubw mm5,mm7 \ + __asm psraw mm4,3 \ + __asm psraw mm5,3 \ + __asm pcmpeqb mm7,mm7 \ + __asm packsswb mm4,mm5 \ + __asm pxor mm6,mm6 \ + __asm pxor mm4,mm7 \ + __asm packuswb mm1,mm3 \ + /*Now compute lflim of -mm4 cf. Section 7.10 of the sepc.*/ \ + /*There's no unsigned byte+signed byte with unsigned saturation op code, so \ + we have to split things by sign (the other option is to work in 16 bits, \ + but working in 8 bits gives much better parallelism). \ + We compute abs(R_i), but save a mask of which terms were negative in mm6. \ + Then we compute mm4=abs(lflim(R_i,L))=min(abs(R_i),max(2*L-abs(R_i),0)). \ + Finally, we split mm4 into positive and negative pieces using the mask in \ + mm6, and add and subtract them as appropriate.*/ \ + /*mm4=abs(-R_i)*/ \ + /*mm7=255-2*L*/ \ + __asm pcmpgtb mm6,mm4 \ + __asm psubb mm7,mm0 \ + __asm pxor mm4,mm6 \ + __asm psubb mm7,mm0 \ + __asm psubb mm4,mm6 \ + /*mm7=255-max(2*L-abs(R_i),0)*/ \ + __asm paddusb mm7,mm4 \ + /*mm4=min(abs(R_i),max(2*L-abs(R_i),0))*/ \ + __asm paddusb mm4,mm7 \ + __asm psubusb mm4,mm7 \ + /*Now split mm4 by the original sign of -R_i.*/ \ + __asm movq mm5,mm4 \ + __asm pand mm4,mm6 \ + __asm pandn mm6,mm5 \ + /*mm1={b0+lflim(R_0,L),...,b7+lflim(R_7,L)}*/ \ + /*mm2={c0-lflim(R_0,L),...,c7-lflim(R_7,L)}*/ \ + __asm paddusb mm1,mm4 \ + __asm psubusb mm2,mm4 \ + __asm psubusb mm1,mm6 \ + __asm paddusb mm2,mm6 \ +} + +#define OC_LOOP_FILTER_V_MMX(_pix,_ystride,_ll) \ + do{ \ + /*Used local variable pix__ in order to fix compilation errors like: \ + "error C2425: 'SHL' : non-constant expression in 'second operand'".*/ \ + unsigned char *pix__; \ + unsigned char *ll__; \ + ll__=(_ll); \ + pix__=(_pix); \ + __asm mov YSTRIDE,_ystride \ + __asm mov LL,ll__ \ + __asm mov PIX,pix__ \ + __asm sub PIX,YSTRIDE \ + __asm sub PIX,YSTRIDE \ + /*mm0={a0,...,a7}*/ \ + __asm movq mm0,[PIX] \ + /*ystride3=_ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*mm3={d0,...,d7}*/ \ + __asm movq mm3,[PIX+YSTRIDE3] \ + /*mm1={b0,...,b7}*/ \ + __asm movq mm1,[PIX+YSTRIDE] \ + /*mm2={c0,...,c7}*/ \ + __asm movq mm2,[PIX+YSTRIDE*2] \ + OC_LOOP_FILTER8_MMX \ + /*Write it back out.*/ \ + __asm movq [PIX+YSTRIDE],mm1 \ + __asm movq [PIX+YSTRIDE*2],mm2 \ + } \ + while(0) + +#define OC_LOOP_FILTER_H_MMX(_pix,_ystride,_ll) \ + do{ \ + /*Used local variable ll__ in order to fix compilation errors like: \ + "error C2443: operand size conflict".*/ \ + unsigned char *ll__; \ + unsigned char *pix__; \ + ll__=(_ll); \ + pix__=(_pix)-2; \ + __asm mov PIX,pix__ \ + __asm mov YSTRIDE,_ystride \ + __asm mov LL,ll__ \ + /*x x x x d0 c0 b0 a0*/ \ + __asm movd mm0,[PIX] \ + /*x x x x d1 c1 b1 a1*/ \ + __asm movd mm1,[PIX+YSTRIDE] \ + /*ystride3=_ystride*3*/ \ + __asm lea YSTRIDE3,[YSTRIDE+YSTRIDE*2] \ + /*x x x x d2 c2 b2 a2*/ \ + __asm movd mm2,[PIX+YSTRIDE*2] \ + /*x x x x d3 c3 b3 a3*/ \ + __asm lea D,[PIX+YSTRIDE*4] \ + __asm movd mm3,[PIX+YSTRIDE3] \ + /*x x x x d4 c4 b4 a4*/ \ + __asm movd mm4,[D] \ + /*x x x x d5 c5 b5 a5*/ \ + __asm movd mm5,[D+YSTRIDE] \ + /*x x x x d6 c6 b6 a6*/ \ + __asm movd mm6,[D+YSTRIDE*2] \ + /*x x x x d7 c7 b7 a7*/ \ + __asm movd mm7,[D+YSTRIDE3] \ + /*mm0=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + __asm punpcklbw mm0,mm1 \ + /*mm2=d3 d2 c3 c2 b3 b2 a3 a2*/ \ + __asm punpcklbw mm2,mm3 \ + /*mm3=d1 d0 c1 c0 b1 b0 a1 a0*/ \ + __asm movq mm3,mm0 \ + /*mm0=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + __asm punpcklwd mm0,mm2 \ + /*mm3=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + __asm punpckhwd mm3,mm2 \ + /*mm1=b3 b2 b1 b0 a3 a2 a1 a0*/ \ + __asm movq mm1,mm0 \ + /*mm4=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + __asm punpcklbw mm4,mm5 \ + /*mm6=d7 d6 c7 c6 b7 b6 a7 a6*/ \ + __asm punpcklbw mm6,mm7 \ + /*mm5=d5 d4 c5 c4 b5 b4 a5 a4*/ \ + __asm movq mm5,mm4 \ + /*mm4=b7 b6 b5 b4 a7 a6 a5 a4*/ \ + __asm punpcklwd mm4,mm6 \ + /*mm5=d7 d6 d5 d4 c7 c6 c5 c4*/ \ + __asm punpckhwd mm5,mm6 \ + /*mm2=d3 d2 d1 d0 c3 c2 c1 c0*/ \ + __asm movq mm2,mm3 \ + /*mm0=a7 a6 a5 a4 a3 a2 a1 a0*/ \ + __asm punpckldq mm0,mm4 \ + /*mm1=b7 b6 b5 b4 b3 b2 b1 b0*/ \ + __asm punpckhdq mm1,mm4 \ + /*mm2=c7 c6 c5 c4 c3 c2 c1 c0*/ \ + __asm punpckldq mm2,mm5 \ + /*mm3=d7 d6 d5 d4 d3 d2 d1 d0*/ \ + __asm punpckhdq mm3,mm5 \ + OC_LOOP_FILTER8_MMX \ + /*mm2={b0+R_0'',...,b7+R_7''}*/ \ + __asm movq mm0,mm1 \ + /*mm1={b0+R_0'',c0-R_0'',...,b3+R_3'',c3-R_3''}*/ \ + __asm punpcklbw mm1,mm2 \ + /*mm2={b4+R_4'',c4-R_4'',...,b7+R_7'',c7-R_7''}*/ \ + __asm punpckhbw mm0,mm2 \ + /*[d]=c1 b1 c0 b0*/ \ + __asm movd D,mm1 \ + __asm mov [PIX+1],D_WORD \ + __asm psrlq mm1,32 \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE+1],D_WORD \ + /*[d]=c3 b3 c2 b2*/ \ + __asm movd D,mm1 \ + __asm mov [PIX+YSTRIDE*2+1],D_WORD \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE3+1],D_WORD \ + __asm lea PIX,[PIX+YSTRIDE*4] \ + /*[d]=c5 b5 c4 b4*/ \ + __asm movd D,mm0 \ + __asm mov [PIX+1],D_WORD \ + __asm psrlq mm0,32 \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE+1],D_WORD \ + /*[d]=c7 b7 c6 b6*/ \ + __asm movd D,mm0 \ + __asm mov [PIX+YSTRIDE*2+1],D_WORD \ + __asm shr D,16 \ + __asm mov [PIX+YSTRIDE3+1],D_WORD \ + } \ + while(0) + +# endif +#endif diff --git a/thirdparty/libtheora/x86_vc/mmxstate.c b/thirdparty/libtheora/x86_vc/mmxstate.c new file mode 100644 index 0000000000..73bd1981cf --- /dev/null +++ b/thirdparty/libtheora/x86_vc/mmxstate.c @@ -0,0 +1,211 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 16584 2009-09-26 19:35:55Z tterribe $ + + ********************************************************************/ + +/*MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek.*/ +#include +#include "x86int.h" +#include "mmxfrag.h" +#include "mmxloop.h" + +#if defined(OC_X86_ASM) + +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){ + unsigned char *dst; + ptrdiff_t frag_buf_off; + int ystride; + int mb_mode; + /*Apply the inverse transform.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + /*Note that this value must be unsigned, to keep the __asm__ block from + sign-extending it when it puts it in a register.*/ + ogg_uint16_t p; + /*We round this dequant product (and not any of the others) because there's + no iDCT rounding.*/ + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); + /*Fill _dct_coeffs with p.*/ + __asm{ +#define Y eax +#define P ecx + mov Y,_dct_coeffs + movzx P,p + /*mm0=0000 0000 0000 AAAA*/ + movd mm0,P + /*mm0=0000 0000 AAAA AAAA*/ + punpcklwd mm0,mm0 + /*mm0=AAAA AAAA AAAA AAAA*/ + punpckldq mm0,mm0 + movq [Y],mm0 + movq [8+Y],mm0 + movq [16+Y],mm0 + movq [24+Y],mm0 + movq [32+Y],mm0 + movq [40+Y],mm0 + movq [48+Y],mm0 + movq [56+Y],mm0 + movq [64+Y],mm0 + movq [72+Y],mm0 + movq [80+Y],mm0 + movq [88+Y],mm0 + movq [96+Y],mm0 + movq [104+Y],mm0 + movq [112+Y],mm0 + movq [120+Y],mm0 +#undef Y +#undef P + } + } + else{ + /*Dequantize the DC coefficient.*/ + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); + oc_idct8x8_mmx(_dct_coeffs,_last_zzi); + } + /*Fill in the target buffer.*/ + frag_buf_off=_state->frag_buf_offs[_fragi]; + mb_mode=_state->frags[_fragi].mb_mode; + ystride=_state->ref_ystride[_pli]; + dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off; + if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs); + else{ + const unsigned char *ref; + int mvoffsets[2]; + ref= + _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]] + +frag_buf_off; + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, + _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){ + oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, + _dct_coeffs); + } + else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs); + } +} + +/*We copy these entire function to inline the actual MMX routines so that we + use only a single indirect call.*/ + +/*Copies the fragments specified by the lists of fragment indices from one + frame to another. + _fragis: A pointer to a list of fragment indices. + _nfragis: The number of fragment indices to copy. + _dst_frame: The reference frame to copy to. + _src_frame: The reference frame to copy from. + _pli: The color plane the fragments lie in.*/ +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli){ + const ptrdiff_t *frag_buf_offs; + const unsigned char *src_frame_data; + unsigned char *dst_frame_data; + ptrdiff_t fragii; + int ystride; + dst_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_dst_frame]]; + src_frame_data=_state->ref_frame_data[_state->ref_frame_idx[_src_frame]]; + ystride=_state->ref_ystride[_pli]; + frag_buf_offs=_state->frag_buf_offs; + for(fragii=0;fragii<_nfragis;fragii++){ + ptrdiff_t frag_buf_off; + frag_buf_off=frag_buf_offs[_fragis[fragii]]; +#define SRC edx +#define DST eax +#define YSTRIDE ecx +#define YSTRIDE3 edi + OC_FRAG_COPY_MMX(dst_frame_data+frag_buf_off, + src_frame_data+frag_buf_off,ystride); +#undef SRC +#undef DST +#undef YSTRIDE +#undef YSTRIDE3 + } +} + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){ + OC_ALIGN8(unsigned char ll[8]); + const oc_fragment_plane *fplane; + const oc_fragment *frags; + const ptrdiff_t *frag_buf_offs; + unsigned char *ref_frame_data; + ptrdiff_t fragi_top; + ptrdiff_t fragi_bot; + ptrdiff_t fragi0; + ptrdiff_t fragi0_end; + int ystride; + int nhfrags; + memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll)); + fplane=_state->fplanes+_pli; + nhfrags=fplane->nhfrags; + fragi_top=fplane->froffset; + fragi_bot=fragi_top+fplane->nfrags; + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; + fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags; + ystride=_state->ref_ystride[_pli]; + frags=_state->frags; + frag_buf_offs=_state->frag_buf_offs; + ref_frame_data=_state->ref_frame_data[_refi]; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + while(fragi0fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,ll); + if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,ll); + if(fragi+1opt_vtable.frag_sub=oc_enc_frag_sub_mmx; + _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx; + _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_mmx; + } + if(cpu_flags&OC_CPU_X86_MMXEXT){ + _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext; + _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext; + _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext; + _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext; + _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext; + _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext; + _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext; + } + if(cpu_flags&OC_CPU_X86_SSE2){ +# if defined(OC_X86_64_ASM) + _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2; +# endif + } +} +#endif diff --git a/thirdparty/libtheora/x86_vc/x86enc.h b/thirdparty/libtheora/x86_vc/x86enc.h new file mode 100644 index 0000000000..581484641f --- /dev/null +++ b/thirdparty/libtheora/x86_vc/x86enc.h @@ -0,0 +1,47 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 15675 2009-02-06 09:43:27Z tterribe $ + + ********************************************************************/ + +#if !defined(_x86_vc_x86enc_H) +# define _x86_vc_x86enc_H (1) +# include "../encint.h" +# include "x86int.h" + +void oc_enc_vtable_init_x86(oc_enc_ctx *_enc); + +unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride); +unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref,int _ystride,unsigned _thresh); +unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src, + const unsigned char *_ref1,const unsigned char *_ref2,int _ystride, + unsigned _thresh); +unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride); +void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,const unsigned char *_y,int _stride); +void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64], + const unsigned char *_x,int _stride); +void oc_enc_frag_copy2_mmxext(unsigned char *_dst, + const unsigned char *_src1,const unsigned char *_src2,int _ystride); +void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]); +void oc_enc_fdct8x8_x86_64sse2(ogg_int16_t _y[64],const ogg_int16_t _x[64]); + +#endif diff --git a/thirdparty/libtheora/x86_vc/x86int.h b/thirdparty/libtheora/x86_vc/x86int.h new file mode 100644 index 0000000000..4cca485311 --- /dev/null +++ b/thirdparty/libtheora/x86_vc/x86int.h @@ -0,0 +1,42 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#if !defined(_x86_vc_x86int_H) +# define _x86_vc_x86int_H (1) +# include "../internal.h" + +void oc_state_vtable_init_x86(oc_theora_state *_state); + +void oc_frag_copy_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride); +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _ystride, + const ogg_int16_t *_residue); +void oc_frag_recon_inter_mmx(unsigned char *_dst, + const unsigned char *_src,int _ystride,const ogg_int16_t *_residue); +void oc_frag_recon_inter2_mmx(unsigned char *_dst,const unsigned char *_src1, + const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue); +void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi); +void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi, + int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant); +void oc_state_frag_copy_list_mmx(const oc_theora_state *_state, + const ptrdiff_t *_fragis,ptrdiff_t _nfragis, + int _dst_frame,int _src_frame,int _pli); +void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state, + int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end); +void oc_restore_fpu_mmx(void); + +#endif diff --git a/thirdparty/libtheora/x86_vc/x86state.c b/thirdparty/libtheora/x86_vc/x86state.c new file mode 100644 index 0000000000..a786bec284 --- /dev/null +++ b/thirdparty/libtheora/x86_vc/x86state.c @@ -0,0 +1,62 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86state.c 16503 2009-08-22 18:14:02Z giles $ + + ********************************************************************/ + +#include "x86int.h" + +#if defined(OC_X86_ASM) + +#include "../cpu.c" + +/*This table has been modified from OC_FZIG_ZAG by baking a 4x4 transpose into + each quadrant of the destination.*/ +static const unsigned char OC_FZIG_ZAG_MMX[128]={ + 0, 8, 1, 2, 9,16,24,17, + 10, 3,32,11,18,25, 4,12, + 5,26,19,40,33,34,41,48, + 27, 6,13,20,28,21,14, 7, + 56,49,42,35,43,50,57,36, + 15,22,29,30,23,44,37,58, + 51,59,38,45,52,31,60,53, + 46,39,47,54,61,62,55,63, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, + 64,64,64,64,64,64,64,64, +}; + +void oc_state_vtable_init_x86(oc_theora_state *_state){ + _state->cpu_flags=oc_cpu_flags_get(); + if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_copy=oc_frag_copy_mmx; + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; + _state->opt_vtable.idct8x8=oc_idct8x8_mmx; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; + _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx; + _state->opt_vtable.state_loop_filter_frag_rows= + oc_state_loop_filter_frag_rows_mmx; + _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX; + } + else oc_state_vtable_init_c(_state); +} +#endif -- cgit v1.2.3