diff options
634 files changed, 6 insertions, 176465 deletions
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 8ad6997463..ca7f065bd6 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -235,16 +235,6 @@ Copyright: 1995-2019, The PNG Reference Library Authors. 1995-1996, Guy Eric Schalnat, Group 42, Inc. License: Zlib -Files: ./thirdparty/libsimplewebm/ -Comment: libsimplewebm -Copyright: 2016, Błażej Szczygieł -License: Expat - -Files: ./thirdparty/libsimplewebm/libwebm/ -Comment: The WebM Project -Copyright: 2010, Google Inc. -License: BSD-3-clause - Files: ./thirdparty/libtheora/ Comment: OggTheora Copyright: 2002-2009, Xiph.org Foundation @@ -255,17 +245,6 @@ Comment: OggVorbis Copyright: 2002-2015, Xiph.org Foundation License: BSD-3-clause -Files: ./thirdparty/libvpx/ -Comment: The WebM Project -Copyright: 2010, The WebM Project authors. -License: BSD-3-clause - -Files: ./thirdparty/libvpx/third_party/android/cpu-features.c - ./thirdparty/libvpx/third_party/android/cpu-features.h -Comment: The Android Open Source Project -Copyright: 2010, The Android Open Source Project -License: BSD-2-clause - Files: ./thirdparty/libwebp/ Comment: WebP codec Copyright: 2010, Google Inc. @@ -388,14 +367,6 @@ Comment: Intel Open Image Denoise Copyright: 2009-2019, Intel Corporation License: Apache-2.0 -Files: ./thirdparty/opus/ -Comment: Opus -Copyright: 2001-2011, Xiph.Org, Skype Limited, Octasic, - Jean-Marc Valin, Timothy B. Terriberry, - CSIRO, Gregory Maxwell, Mark Borgerding, - Erik de Castro Lopo -License: BSD-3-clause - Files: ./thirdparty/pcre2/ Comment: PCRE2 Copyright: 1997-2020, University of Cambridge diff --git a/SConstruct b/SConstruct index ed57a66cb7..9e3930f756 100644 --- a/SConstruct +++ b/SConstruct @@ -166,12 +166,10 @@ opts.Add(BoolVariable("builtin_libogg", "Use the built-in libogg library", True) opts.Add(BoolVariable("builtin_libpng", "Use the built-in libpng library", True)) opts.Add(BoolVariable("builtin_libtheora", "Use the built-in libtheora library", True)) opts.Add(BoolVariable("builtin_libvorbis", "Use the built-in libvorbis library", True)) -opts.Add(BoolVariable("builtin_libvpx", "Use the built-in libvpx library", True)) opts.Add(BoolVariable("builtin_libwebp", "Use the built-in libwebp library", True)) opts.Add(BoolVariable("builtin_wslay", "Use the built-in wslay library", True)) opts.Add(BoolVariable("builtin_mbedtls", "Use the built-in mbedTLS library", True)) opts.Add(BoolVariable("builtin_miniupnpc", "Use the built-in miniupnpc library", True)) -opts.Add(BoolVariable("builtin_opus", "Use the built-in Opus library", True)) opts.Add(BoolVariable("builtin_pcre2", "Use the built-in PCRE2 library", True)) opts.Add(BoolVariable("builtin_pcre2_with_jit", "Use JIT compiler for the built-in PCRE2 library", True)) opts.Add(BoolVariable("builtin_recast", "Use the built-in Recast library", True)) diff --git a/doc/classes/VideoPlayer.xml b/doc/classes/VideoPlayer.xml index 4f60b9d567..c8590988f5 100644 --- a/doc/classes/VideoPlayer.xml +++ b/doc/classes/VideoPlayer.xml @@ -5,9 +5,9 @@ </brief_description> <description> Control node for playing video streams using [VideoStream] resources. - Supported video formats are [url=https://www.webmproject.org/]WebM[/url] ([code].webm[/code], [VideoStreamWebm]), [url=https://www.theora.org/]Ogg Theora[/url] ([code].ogv[/code], [VideoStreamTheora]), and any format exposed via a GDNative plugin using [VideoStreamGDNative]. + Supported video formats are [url=https://www.theora.org/]Ogg Theora[/url] ([code].ogv[/code], [VideoStreamTheora]) and any format exposed via a GDNative plugin using [VideoStreamGDNative]. [b]Note:[/b] Due to a bug, VideoPlayer does not support localization remapping yet. - [b]Warning:[/b] On HTML5, video playback [i]will[/i] perform poorly due to missing architecture-specific assembly optimizations, especially for VP8/VP9. + [b]Warning:[/b] On HTML5, video playback [i]will[/i] perform poorly due to missing architecture-specific assembly optimizations. </description> <tutorials> </tutorials> diff --git a/modules/ogg/SCsub b/modules/ogg/SCsub index e415d92498..f15525648f 100644 --- a/modules/ogg/SCsub +++ b/modules/ogg/SCsub @@ -3,9 +3,6 @@ Import("env") Import("env_modules") -# Only kept to build the thirdparty library used by the theora and webm -# modules. - env_ogg = env_modules.Clone() # Thirdparty source files diff --git a/modules/opus/SCsub b/modules/opus/SCsub deleted file mode 100644 index 1437cd86df..0000000000 --- a/modules/opus/SCsub +++ /dev/null @@ -1,252 +0,0 @@ -#!/usr/bin/env python - -Import("env") -Import("env_modules") - -# Only kept to build the thirdparty library used by the webm module. -# AudioStreamOpus was dropped in 3.0 due to incompatibility with the new audio -# engine. If you want to port it, fetch it from the Git history. - -env_opus = env_modules.Clone() - -# Thirdparty source files - -thirdparty_obj = [] - -# Thirdparty source files -if env["builtin_opus"]: - thirdparty_dir = "#thirdparty/opus/" - - thirdparty_sources = [ - # Sync with opus_sources.mk - "opus.c", - "opus_decoder.c", - "opus_encoder.c", - "opus_multistream.c", - "opus_multistream_encoder.c", - "opus_multistream_decoder.c", - "repacketizer.c", - "analysis.c", - "mlp.c", - "mlp_data.c", - # Sync with libopusfile Makefile.am - "info.c", - "internal.c", - "opusfile.c", - "stream.c", - # Sync with celt_sources.mk - "celt/bands.c", - "celt/celt.c", - "celt/celt_encoder.c", - "celt/celt_decoder.c", - "celt/cwrs.c", - "celt/entcode.c", - "celt/entdec.c", - "celt/entenc.c", - "celt/kiss_fft.c", - "celt/laplace.c", - "celt/mathops.c", - "celt/mdct.c", - "celt/modes.c", - "celt/pitch.c", - "celt/celt_lpc.c", - "celt/quant_bands.c", - "celt/rate.c", - "celt/vq.c", - # "celt/arm/arm_celt_map.c", - # "celt/arm/armcpu.c", - # "celt/arm/celt_ne10_fft.c", - # "celt/arm/celt_ne10_mdct.c", - # "celt/arm/celt_neon_intr.c", - # Sync with silk_sources.mk - "silk/CNG.c", - "silk/code_signs.c", - "silk/init_decoder.c", - "silk/decode_core.c", - "silk/decode_frame.c", - "silk/decode_parameters.c", - "silk/decode_indices.c", - "silk/decode_pulses.c", - "silk/decoder_set_fs.c", - "silk/dec_API.c", - "silk/enc_API.c", - "silk/encode_indices.c", - "silk/encode_pulses.c", - "silk/gain_quant.c", - "silk/interpolate.c", - "silk/LP_variable_cutoff.c", - "silk/NLSF_decode.c", - "silk/NSQ.c", - "silk/NSQ_del_dec.c", - "silk/PLC.c", - "silk/shell_coder.c", - "silk/tables_gain.c", - "silk/tables_LTP.c", - "silk/tables_NLSF_CB_NB_MB.c", - "silk/tables_NLSF_CB_WB.c", - "silk/tables_other.c", - "silk/tables_pitch_lag.c", - "silk/tables_pulses_per_block.c", - "silk/VAD.c", - "silk/control_audio_bandwidth.c", - "silk/quant_LTP_gains.c", - "silk/VQ_WMat_EC.c", - "silk/HP_variable_cutoff.c", - "silk/NLSF_encode.c", - "silk/NLSF_VQ.c", - "silk/NLSF_unpack.c", - "silk/NLSF_del_dec_quant.c", - "silk/process_NLSFs.c", - "silk/stereo_LR_to_MS.c", - "silk/stereo_MS_to_LR.c", - "silk/check_control_input.c", - "silk/control_SNR.c", - "silk/init_encoder.c", - "silk/control_codec.c", - "silk/A2NLSF.c", - "silk/ana_filt_bank_1.c", - "silk/biquad_alt.c", - "silk/bwexpander_32.c", - "silk/bwexpander.c", - "silk/debug.c", - "silk/decode_pitch.c", - "silk/inner_prod_aligned.c", - "silk/lin2log.c", - "silk/log2lin.c", - "silk/LPC_analysis_filter.c", - "silk/LPC_inv_pred_gain.c", - "silk/table_LSF_cos.c", - "silk/NLSF2A.c", - "silk/NLSF_stabilize.c", - "silk/NLSF_VQ_weights_laroia.c", - "silk/pitch_est_tables.c", - "silk/resampler.c", - "silk/resampler_down2_3.c", - "silk/resampler_down2.c", - "silk/resampler_private_AR2.c", - "silk/resampler_private_down_FIR.c", - "silk/resampler_private_IIR_FIR.c", - "silk/resampler_private_up2_HQ.c", - "silk/resampler_rom.c", - "silk/sigm_Q15.c", - "silk/sort.c", - "silk/sum_sqr_shift.c", - "silk/stereo_decode_pred.c", - "silk/stereo_encode_pred.c", - "silk/stereo_find_predictor.c", - "silk/stereo_quant_pred.c", - ] - - opus_sources_silk = [] - - if env["platform"] in ["android", "iphone", "javascript"]: - env_opus.Append(CPPDEFINES=["FIXED_POINT"]) - opus_sources_silk = [ - "silk/fixed/LTP_analysis_filter_FIX.c", - "silk/fixed/LTP_scale_ctrl_FIX.c", - "silk/fixed/corrMatrix_FIX.c", - "silk/fixed/encode_frame_FIX.c", - "silk/fixed/find_LPC_FIX.c", - "silk/fixed/find_LTP_FIX.c", - "silk/fixed/find_pitch_lags_FIX.c", - "silk/fixed/find_pred_coefs_FIX.c", - "silk/fixed/noise_shape_analysis_FIX.c", - "silk/fixed/prefilter_FIX.c", - "silk/fixed/process_gains_FIX.c", - "silk/fixed/regularize_correlations_FIX.c", - "silk/fixed/residual_energy16_FIX.c", - "silk/fixed/residual_energy_FIX.c", - "silk/fixed/solve_LS_FIX.c", - "silk/fixed/warped_autocorrelation_FIX.c", - "silk/fixed/apply_sine_window_FIX.c", - "silk/fixed/autocorr_FIX.c", - "silk/fixed/burg_modified_FIX.c", - "silk/fixed/k2a_FIX.c", - "silk/fixed/k2a_Q16_FIX.c", - "silk/fixed/pitch_analysis_core_FIX.c", - "silk/fixed/vector_ops_FIX.c", - "silk/fixed/schur64_FIX.c", - "silk/fixed/schur_FIX.c", - ] - else: - opus_sources_silk = [ - "silk/float/apply_sine_window_FLP.c", - "silk/float/corrMatrix_FLP.c", - "silk/float/encode_frame_FLP.c", - "silk/float/find_LPC_FLP.c", - "silk/float/find_LTP_FLP.c", - "silk/float/find_pitch_lags_FLP.c", - "silk/float/find_pred_coefs_FLP.c", - "silk/float/LPC_analysis_filter_FLP.c", - "silk/float/LTP_analysis_filter_FLP.c", - "silk/float/LTP_scale_ctrl_FLP.c", - "silk/float/noise_shape_analysis_FLP.c", - "silk/float/prefilter_FLP.c", - "silk/float/process_gains_FLP.c", - "silk/float/regularize_correlations_FLP.c", - "silk/float/residual_energy_FLP.c", - "silk/float/solve_LS_FLP.c", - "silk/float/warped_autocorrelation_FLP.c", - "silk/float/wrappers_FLP.c", - "silk/float/autocorrelation_FLP.c", - "silk/float/burg_modified_FLP.c", - "silk/float/bwexpander_FLP.c", - "silk/float/energy_FLP.c", - "silk/float/inner_product_FLP.c", - "silk/float/k2a_FLP.c", - "silk/float/levinsondurbin_FLP.c", - "silk/float/LPC_inv_pred_gain_FLP.c", - "silk/float/pitch_analysis_core_FLP.c", - "silk/float/scale_copy_vector_FLP.c", - "silk/float/scale_vector_FLP.c", - "silk/float/schur_FLP.c", - "silk/float/sort_FLP.c", - ] - - thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources + opus_sources_silk] - - # also requires libogg - if env["builtin_libogg"]: - env_opus.Prepend(CPPPATH=["#thirdparty/libogg"]) - - env_opus.Append(CPPDEFINES=["HAVE_CONFIG_H"]) - - thirdparty_include_paths = [ - "", - "celt", - "opus", - "silk", - "silk/fixed", - "silk/float", - ] - env_opus.Prepend(CPPPATH=[thirdparty_dir + "/" + dir for dir in thirdparty_include_paths]) - - if env["platform"] == "android": - if "android_arch" in env and env["android_arch"] == "armv7": - env_opus.Append(CPPDEFINES=["OPUS_ARM_OPT"]) - elif "android_arch" in env and env["android_arch"] == "arm64v8": - env_opus.Append(CPPDEFINES=["OPUS_ARM64_OPT"]) - elif env["platform"] == "iphone": - if "arch" in env and env["arch"] == "arm": - env_opus.Append(CPPDEFINES=["OPUS_ARM_OPT"]) - elif "arch" in env and env["arch"] == "arm64": - env_opus.Append(CPPDEFINES=["OPUS_ARM64_OPT"]) - elif env["platform"] == "osx": - if "arch" in env and env["arch"] == "arm64": - env_opus.Append(CPPDEFINES=["OPUS_ARM64_OPT"]) - - env_thirdparty = env_opus.Clone() - env_thirdparty.disable_warnings() - env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) - env.modules_sources += thirdparty_obj - - -# Godot source files - -module_obj = [] - -env_opus.add_source_files(module_obj, "*.cpp") -env.modules_sources += module_obj - -# Needed to force rebuilding the module files when the thirdparty library is updated. -env.Depends(module_obj, thirdparty_obj) diff --git a/modules/opus/config.py b/modules/opus/config.py deleted file mode 100644 index 9ff7b2dece..0000000000 --- a/modules/opus/config.py +++ /dev/null @@ -1,6 +0,0 @@ -def can_build(env, platform): - return env.module_check_dependencies("opus", ["ogg"]) - - -def configure(env): - pass diff --git a/modules/opus/register_types.cpp b/modules/opus/register_types.cpp deleted file mode 100644 index 02874a9a4b..0000000000 --- a/modules/opus/register_types.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/*************************************************************************/ -/* register_types.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "register_types.h" - -// Dummy module as libvorbis is needed by other modules (theora ...) - -void register_opus_types() {} - -void unregister_opus_types() {} diff --git a/modules/opus/register_types.h b/modules/opus/register_types.h deleted file mode 100644 index af889cf809..0000000000 --- a/modules/opus/register_types.h +++ /dev/null @@ -1,37 +0,0 @@ -/*************************************************************************/ -/* register_types.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef OPUS_REGISTER_TYPES_H -#define OPUS_REGISTER_TYPES_H - -void register_opus_types(); -void unregister_opus_types(); - -#endif // OPUS_REGISTER_TYPES_H diff --git a/modules/theora/doc_classes/VideoStreamTheora.xml b/modules/theora/doc_classes/VideoStreamTheora.xml index 2dfcd27dff..725f87b046 100644 --- a/modules/theora/doc_classes/VideoStreamTheora.xml +++ b/modules/theora/doc_classes/VideoStreamTheora.xml @@ -4,7 +4,7 @@ [VideoStream] resource for Ogg Theora videos. </brief_description> <description> - [VideoStream] resource handling the [url=https://www.theora.org/]Ogg Theora[/url] video format with [code].ogv[/code] extension. The Theora codec is less efficient than [VideoStreamWebm]'s VP8 and VP9, but it requires less CPU resources to decode. The Theora codec is decoded on the CPU. + [VideoStream] resource handling the [url=https://www.theora.org/]Ogg Theora[/url] video format with [code].ogv[/code] extension. The Theora codec is decoded on the CPU. [b]Note:[/b] While Ogg Theora videos can also have an [code].ogg[/code] extension, you will have to rename the extension to [code].ogv[/code] to use those videos within Godot. </description> <tutorials> diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp index 8e80dfffca..ef434f107e 100644 --- a/modules/theora/video_stream_theora.cpp +++ b/modules/theora/video_stream_theora.cpp @@ -603,7 +603,7 @@ float VideoStreamPlaybackTheora::get_playback_position() const { }; void VideoStreamPlaybackTheora::seek(float p_time) { - WARN_PRINT_ONCE("Seeking in Theora and WebM videos is not implemented yet (it's only supported for GDNative-provided video streams)."); + WARN_PRINT_ONCE("Seeking in Theora videos is not implemented yet (it's only supported for GDNative-provided video streams)."); } void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback, void *p_userdata) { diff --git a/modules/webm/SCsub b/modules/webm/SCsub deleted file mode 100644 index 44e80e2870..0000000000 --- a/modules/webm/SCsub +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python - -Import("env") -Import("env_modules") - -env_webm = env_modules.Clone() - -# Thirdparty source files - -thirdparty_obj = [] - -thirdparty_dir = "#thirdparty/libsimplewebm/" -thirdparty_sources = [ - "libwebm/mkvparser/mkvparser.cc", - "OpusVorbisDecoder.cpp", - "VPXDecoder.cpp", - "WebMDemuxer.cpp", -] -thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] - -env_webm.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "libwebm/"]) - -# also requires libogg, libvorbis and libopus -if env["builtin_libogg"]: - env_webm.Prepend(CPPPATH=["#thirdparty/libogg"]) -if env["builtin_libvorbis"]: - env_webm.Prepend(CPPPATH=["#thirdparty/libvorbis"]) -if env["builtin_opus"]: - env_webm.Prepend(CPPPATH=["#thirdparty/opus"]) - -if env["builtin_libvpx"]: - env_webm.Prepend(CPPPATH=["#thirdparty/libvpx"]) - SConscript("libvpx/SCsub") - -env_thirdparty = env_webm.Clone() -env_thirdparty.disable_warnings() -env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) -env.modules_sources += thirdparty_obj - -# Godot source files - -module_obj = [] - -env_webm.add_source_files(module_obj, "*.cpp") -env.modules_sources += module_obj - -# Needed to force rebuilding the module files when the thirdparty library is updated. -env.Depends(module_obj, thirdparty_obj) diff --git a/modules/webm/config.py b/modules/webm/config.py deleted file mode 100644 index 99f8ace114..0000000000 --- a/modules/webm/config.py +++ /dev/null @@ -1,19 +0,0 @@ -def can_build(env, platform): - if platform in ["iphone"]: - return False - - return env.module_check_dependencies("webm", ["ogg", "opus", "vorbis"]) - - -def configure(env): - pass - - -def get_doc_classes(): - return [ - "VideoStreamWebm", - ] - - -def get_doc_path(): - return "doc_classes" diff --git a/modules/webm/doc_classes/VideoStreamWebm.xml b/modules/webm/doc_classes/VideoStreamWebm.xml deleted file mode 100644 index e04d02d6ab..0000000000 --- a/modules/webm/doc_classes/VideoStreamWebm.xml +++ /dev/null @@ -1,28 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<class name="VideoStreamWebm" inherits="VideoStream" version="4.0"> - <brief_description> - [VideoStream] resource for WebM videos. - </brief_description> - <description> - [VideoStream] resource handling the [url=https://www.webmproject.org/]WebM[/url] video format with [code].webm[/code] extension. Both the VP8 and VP9 codecs are supported. The VP8 and VP9 codecs are more efficient than [VideoStreamTheora], but they require more CPU resources to decode (especially VP9). Both the VP8 and VP9 codecs are decoded on the CPU. - [b]Note:[/b] Alpha channel (also known as transparency) is not supported. The video will always appear to have a black background, even if it originally contains an alpha channel. - [b]Note:[/b] There are known bugs and performance issues with WebM video playback in Godot. If you run into problems, try using the Ogg Theora format instead: [VideoStreamTheora] - </description> - <tutorials> - </tutorials> - <methods> - <method name="get_file"> - <return type="String" /> - <description> - Returns the WebM video file handled by this [VideoStreamWebm]. - </description> - </method> - <method name="set_file"> - <return type="void" /> - <argument index="0" name="file" type="String" /> - <description> - Sets the WebM video file that this [VideoStreamWebm] resource handles. The [code]file[/code] name should have the [code].webm[/code] extension. - </description> - </method> - </methods> -</class> diff --git a/modules/webm/libvpx/SCsub b/modules/webm/libvpx/SCsub deleted file mode 100644 index 4334cf732b..0000000000 --- a/modules/webm/libvpx/SCsub +++ /dev/null @@ -1,382 +0,0 @@ -#!/usr/bin/env python - -Import("env") -Import("env_modules") - -# Thirdparty sources - -libvpx_dir = "#thirdparty/libvpx/" - -libvpx_sources = [ - "vp8/vp8_dx_iface.c", - "vp8/common/generic/systemdependent.c", - "vp8/common/alloccommon.c", - "vp8/common/blockd.c", - "vp8/common/copy_c.c", - "vp8/common/debugmodes.c", - "vp8/common/dequantize.c", - "vp8/common/entropy.c", - "vp8/common/entropymode.c", - "vp8/common/entropymv.c", - "vp8/common/extend.c", - "vp8/common/filter.c", - "vp8/common/findnearmv.c", - "vp8/common/idct_blk.c", - "vp8/common/idctllm.c", - "vp8/common/loopfilter_filters.c", - "vp8/common/mbpitch.c", - "vp8/common/modecont.c", - "vp8/common/quant_common.c", - "vp8/common/reconinter.c", - "vp8/common/reconintra.c", - "vp8/common/reconintra4x4.c", - "vp8/common/rtcd.c", - "vp8/common/setupintrarecon.c", - "vp8/common/swapyv12buffer.c", - "vp8/common/treecoder.c", - "vp8/common/vp8_loopfilter.c", - "vp8/decoder/dboolhuff.c", - "vp8/decoder/decodeframe.c", - "vp8/decoder/decodemv.c", - "vp8/decoder/detokenize.c", - "vp8/decoder/onyxd_if.c", - "vp9/vp9_dx_iface.c", - "vp9/common/vp9_alloccommon.c", - "vp9/common/vp9_blockd.c", - "vp9/common/vp9_common_data.c", - "vp9/common/vp9_debugmodes.c", - "vp9/common/vp9_entropy.c", - "vp9/common/vp9_entropymode.c", - "vp9/common/vp9_entropymv.c", - "vp9/common/vp9_filter.c", - "vp9/common/vp9_frame_buffers.c", - "vp9/common/vp9_idct.c", - "vp9/common/vp9_loopfilter.c", - "vp9/common/vp9_mvref_common.c", - "vp9/common/vp9_pred_common.c", - "vp9/common/vp9_quant_common.c", - "vp9/common/vp9_reconinter.c", - "vp9/common/vp9_reconintra.c", - "vp9/common/vp9_rtcd.c", - "vp9/common/vp9_scale.c", - "vp9/common/vp9_scan.c", - "vp9/common/vp9_seg_common.c", - "vp9/common/vp9_thread_common.c", - "vp9/common/vp9_tile_common.c", - "vp9/decoder/vp9_decodeframe.c", - "vp9/decoder/vp9_decodemv.c", - "vp9/decoder/vp9_decoder.c", - "vp9/decoder/vp9_detokenize.c", - "vp9/decoder/vp9_dsubexp.c", - "vp9/decoder/vp9_dthread.c", - "vpx/src/vpx_codec.c", - "vpx/src/vpx_decoder.c", - "vpx/src/vpx_image.c", - "vpx/src/vpx_psnr.c", - "vpx_dsp/bitreader.c", - "vpx_dsp/bitreader_buffer.c", - "vpx_dsp/intrapred.c", - "vpx_dsp/inv_txfm.c", - "vpx_dsp/loopfilter.c", - "vpx_dsp/prob.c", - "vpx_dsp/vpx_convolve.c", - "vpx_dsp/vpx_dsp_rtcd.c", - "vpx_mem/vpx_mem.c", - "vpx_scale/vpx_scale_rtcd.c", - "vpx_scale/generic/yv12config.c", - "vpx_scale/generic/yv12extend.c", - "vpx_util/vpx_thread.c", -] - -libvpx_sources_mt = [ - "vp8/decoder/threading.c", -] - -libvpx_sources_intrin_x86 = [ - "vp8/common/x86/filter_x86.c", - "vp8/common/x86/loopfilter_x86.c", - "vp8/common/x86/vp8_asm_stubs.c", - "vpx_dsp/x86/vpx_asm_stubs.c", -] -libvpx_sources_intrin_x86_mmx = [ - "vp8/common/x86/idct_blk_mmx.c", -] -libvpx_sources_intrin_x86_sse2 = [ - "vp8/common/x86/idct_blk_sse2.c", - "vp9/common/x86/vp9_idct_intrin_sse2.c", - "vpx_dsp/x86/inv_txfm_sse2.c", - "vpx_dsp/x86/loopfilter_sse2.c", -] -libvpx_sources_intrin_x86_ssse3 = [ - "vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c", -] -libvpx_sources_intrin_x86_avx2 = [ - "vpx_dsp/x86/loopfilter_avx2.c", - "vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c", -] -libvpx_sources_x86asm = [ - "vp8/common/x86/copy_sse2.asm", - "vp8/common/x86/copy_sse3.asm", - "vp8/common/x86/dequantize_mmx.asm", - "vp8/common/x86/idctllm_mmx.asm", - "vp8/common/x86/idctllm_sse2.asm", - "vp8/common/x86/iwalsh_mmx.asm", - "vp8/common/x86/iwalsh_sse2.asm", - "vp8/common/x86/loopfilter_sse2.asm", - "vp8/common/x86/recon_mmx.asm", - "vp8/common/x86/recon_sse2.asm", - "vp8/common/x86/subpixel_mmx.asm", - "vp8/common/x86/subpixel_sse2.asm", - "vp8/common/x86/subpixel_ssse3.asm", - "vp8/common/x86/vp8_loopfilter_mmx.asm", - "vpx_dsp/x86/intrapred_sse2.asm", - "vpx_dsp/x86/intrapred_ssse3.asm", - "vpx_dsp/x86/inv_wht_sse2.asm", - "vpx_dsp/x86/vpx_convolve_copy_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_8t_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm", - "vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm", - "vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm", - "vpx_ports/emms.asm", -] -libvpx_sources_x86_64asm = [ - "vp8/common/x86/loopfilter_block_sse2_x86_64.asm", - "vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm", -] - -libvpx_sources_arm = [ - "vpx_ports/arm_cpudetect.c", - "vp8/common/arm/loopfilter_arm.c", -] -libvpx_sources_arm_neon = [ - "vp8/common/arm/neon/bilinearpredict_neon.c", - "vp8/common/arm/neon/copymem_neon.c", - "vp8/common/arm/neon/dc_only_idct_add_neon.c", - "vp8/common/arm/neon/dequant_idct_neon.c", - "vp8/common/arm/neon/dequantizeb_neon.c", - "vp8/common/arm/neon/idct_blk_neon.c", - "vp8/common/arm/neon/idct_dequant_0_2x_neon.c", - "vp8/common/arm/neon/idct_dequant_full_2x_neon.c", - "vp8/common/arm/neon/iwalsh_neon.c", - "vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c", - "vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c", - "vp8/common/arm/neon/mbloopfilter_neon.c", - "vp8/common/arm/neon/shortidct4x4llm_neon.c", - "vp8/common/arm/neon/sixtappredict_neon.c", - "vp8/common/arm/neon/vp8_loopfilter_neon.c", - "vp9/common/arm/neon/vp9_iht4x4_add_neon.c", - "vp9/common/arm/neon/vp9_iht8x8_add_neon.c", - "vpx_dsp/arm/idct16x16_1_add_neon.c", - "vpx_dsp/arm/idct16x16_add_neon.c", - "vpx_dsp/arm/idct16x16_neon.c", - "vpx_dsp/arm/idct32x32_1_add_neon.c", - "vpx_dsp/arm/idct32x32_add_neon.c", - "vpx_dsp/arm/idct4x4_1_add_neon.c", - "vpx_dsp/arm/idct4x4_add_neon.c", - "vpx_dsp/arm/idct8x8_1_add_neon.c", - "vpx_dsp/arm/idct8x8_add_neon.c", - "vpx_dsp/arm/intrapred_neon.c", - "vpx_dsp/arm/loopfilter_16_neon.c", - "vpx_dsp/arm/loopfilter_4_neon.c", - "vpx_dsp/arm/loopfilter_8_neon.c", - "vpx_dsp/arm/loopfilter_neon.c", - "vpx_dsp/arm/vpx_convolve8_avg_neon.c", - "vpx_dsp/arm/vpx_convolve8_neon.c", - "vpx_dsp/arm/vpx_convolve_avg_neon.c", - "vpx_dsp/arm/vpx_convolve_copy_neon.c", - "vpx_dsp/arm/vpx_convolve_neon.c", -] -libvpx_sources_arm_neon_gas = [ - "vpx_dsp/arm/gas/intrapred_neon_asm.s", - "vpx_dsp/arm/gas/loopfilter_mb_neon.s", - "vpx_dsp/arm/gas/save_reg_neon.s", -] -libvpx_sources_arm_neon_armasm_ms = [ - "vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm", - "vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm", - "vpx_dsp/arm/armasm_ms/save_reg_neon.asm", -] -libvpx_sources_arm_neon_gas_apple = [ - "vpx_dsp/arm/gas_apple/intrapred_neon_asm.s", - "vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s", - "vpx_dsp/arm/gas_apple/save_reg_neon.s", -] - -libvpx_sources = [libvpx_dir + file for file in libvpx_sources] -libvpx_sources_mt = [libvpx_dir + file for file in libvpx_sources_mt] -libvpx_sources_intrin_x86 = [libvpx_dir + file for file in libvpx_sources_intrin_x86] -libvpx_sources_intrin_x86_mmx = [libvpx_dir + file for file in libvpx_sources_intrin_x86_mmx] -libvpx_sources_intrin_x86_sse2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_sse2] -libvpx_sources_intrin_x86_ssse3 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_ssse3] -libvpx_sources_intrin_x86_avx2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_avx2] -libvpx_sources_x86asm = [libvpx_dir + file for file in libvpx_sources_x86asm] -libvpx_sources_x86_64asm = [libvpx_dir + file for file in libvpx_sources_x86_64asm] -libvpx_sources_arm = [libvpx_dir + file for file in libvpx_sources_arm] -libvpx_sources_arm_neon = [libvpx_dir + file for file in libvpx_sources_arm_neon] -libvpx_sources_arm_neon_gas = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas] -libvpx_sources_arm_neon_armasm_ms = [libvpx_dir + file for file in libvpx_sources_arm_neon_armasm_ms] -libvpx_sources_arm_neon_gas_apple = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas_apple] - - -env_libvpx = env_modules.Clone() -env_libvpx.disable_warnings() -env_libvpx.Prepend(CPPPATH=[libvpx_dir]) - -webm_multithread = env["platform"] != "javascript" - -cpu_bits = env["bits"] -webm_cpu_x86 = False -webm_cpu_arm = False -if env["platform"] == "uwp": - if "arm" in env["PROGSUFFIX"]: - webm_cpu_arm = True - else: - webm_cpu_x86 = True -else: - import platform - - is_x11_or_server_arm = env["platform"] == "linuxbsd" and ( - platform.machine().startswith("arm") or platform.machine().startswith("aarch") - ) - is_macos_x86 = env["platform"] == "osx" and ("arch" in env and (env["arch"] != "arm64")) - is_ios_x86 = env["platform"] == "iphone" and ("arch" in env and env["arch"].startswith("x86")) - is_android_x86 = env["platform"] == "android" and env["android_arch"].startswith("x86") - if is_android_x86: - cpu_bits = "32" if env["android_arch"] == "x86" else "64" - webm_cpu_x86 = ( - not is_x11_or_server_arm - and (cpu_bits == "32" or cpu_bits == "64") - and ( - env["platform"] == "windows" - or env["platform"] == "linuxbsd" - or env["platform"] == "haiku" - or is_macos_x86 - or is_android_x86 - or is_ios_x86 - ) - ) - webm_cpu_arm = ( - is_x11_or_server_arm - or (not is_macos_x86 and env["platform"] == "osx") - or (not is_ios_x86 and env["platform"] == "iphone") - or (not is_android_x86 and env["platform"] == "android") - ) - -if webm_cpu_x86: - import subprocess - import os - - yasm_paths = [ - "yasm", - "../../../yasm", - ] - - yasm_found = False - - devnull = open(os.devnull) - for yasm_path in yasm_paths: - try: - yasm_found = True - subprocess.Popen([yasm_path, "--version"], stdout=devnull, stderr=devnull).communicate() - except Exception: - yasm_found = False - if yasm_found: - break - - if not yasm_found: - webm_cpu_x86 = False - print("YASM is necessary for WebM SIMD optimizations.") - -webm_simd_optimizations = False - -if webm_cpu_x86: - if env["platform"] == "windows" or env["platform"] == "uwp": - env_libvpx["ASFORMAT"] = "win" - elif env["platform"] == "osx" or env["platform"] == "iphone": - env_libvpx["ASFORMAT"] = "macho" - else: - env_libvpx["ASFORMAT"] = "elf" - env_libvpx["ASFORMAT"] += cpu_bits - - env_libvpx["AS"] = "yasm" - env_libvpx["ASFLAGS"] = "-I" + libvpx_dir[1:] + " -f $ASFORMAT -D $ASCPU" - env_libvpx["ASCOM"] = "$AS $ASFLAGS -o $TARGET $SOURCES" - - if cpu_bits == "32": - env_libvpx["ASCPU"] = "X86_32" - elif cpu_bits == "64": - env_libvpx["ASCPU"] = "X86_64" - - env_libvpx.Append(CPPDEFINES=["WEBM_X86ASM"]) - - webm_simd_optimizations = True - -if webm_cpu_arm: - if env["platform"] == "iphone": - env_libvpx["ASFLAGS"] = "-arch armv7" - elif env["platform"] == "android" and env["android_arch"] == "armv7" or env["platform"] == "linuxbsd": - env_libvpx["ASFLAGS"] = "-mfpu=neon" - elif env["platform"] == "uwp": - env_libvpx["AS"] = "armasm" - env_libvpx["ASFLAGS"] = "" - env_libvpx["ASCOM"] = "$AS $ASFLAGS -o $TARGET $SOURCES" - - env_libvpx.Append(CPPDEFINES=["WEBM_ARMASM"]) - - webm_simd_optimizations = True - -if webm_simd_optimizations == False: - print("WebM SIMD optimizations are disabled. Check if your CPU architecture, CPU bits or platform are supported!") - -env_libvpx.add_source_files(env.modules_sources, libvpx_sources) - -if webm_multithread: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_mt) - -if webm_cpu_x86: - is_clang_or_gcc = ( - ("gcc" in os.path.basename(env["CC"])) or ("clang" in os.path.basename(env["CC"])) or ("osxcross" in env) - ) - - env_libvpx_mmx = env_libvpx.Clone() - if cpu_bits == "32" and is_clang_or_gcc: - env_libvpx_mmx.Append(CCFLAGS=["-mmmx"]) - env_libvpx_mmx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_mmx) - - env_libvpx_sse2 = env_libvpx.Clone() - if cpu_bits == "32" and is_clang_or_gcc: - env_libvpx_sse2.Append(CCFLAGS=["-msse2"]) - env_libvpx_sse2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_sse2) - - env_libvpx_ssse3 = env_libvpx.Clone() - if is_clang_or_gcc: - env_libvpx_ssse3.Append(CCFLAGS=["-mssse3"]) - env_libvpx_ssse3.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_ssse3) - - env_libvpx_avx2 = env_libvpx.Clone() - if is_clang_or_gcc: - env_libvpx_avx2.Append(CCFLAGS=["-mavx2"]) - env_libvpx_avx2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_avx2) - - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86) - - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86asm) - if cpu_bits == "64": - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86_64asm) -elif webm_cpu_arm: - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm) - if env["platform"] == "android": - env_libvpx.Prepend(CPPPATH=[libvpx_dir + "third_party/android"]) - env_libvpx.add_source_files(env.modules_sources, [libvpx_dir + "third_party/android/cpu-features.c"]) - - env_libvpx_neon = env_libvpx.Clone() - env_libvpx_neon.add_source_files(env.modules_sources, libvpx_sources_arm_neon) - - if env["platform"] == "uwp": - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_armasm_ms) - elif env["platform"] == "iphone": - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas_apple) - elif (is_x11_or_server_arm and cpu_bits == "32") or ( - env["platform"] == "android" and not env["android_arch"] == "arm64v8" - ): - env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas) diff --git a/modules/webm/register_types.cpp b/modules/webm/register_types.cpp deleted file mode 100644 index 8f690a6892..0000000000 --- a/modules/webm/register_types.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/*************************************************************************/ -/* register_types.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "register_types.h" - -#include "video_stream_webm.h" - -static Ref<ResourceFormatLoaderWebm> resource_loader_webm; - -void register_webm_types() { - resource_loader_webm.instantiate(); - ResourceLoader::add_resource_format_loader(resource_loader_webm, true); - - GDREGISTER_CLASS(VideoStreamWebm); -} - -void unregister_webm_types() { - ResourceLoader::remove_resource_format_loader(resource_loader_webm); - resource_loader_webm.unref(); -} diff --git a/modules/webm/register_types.h b/modules/webm/register_types.h deleted file mode 100644 index d090fe745b..0000000000 --- a/modules/webm/register_types.h +++ /dev/null @@ -1,37 +0,0 @@ -/*************************************************************************/ -/* register_types.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef WEBM_REGISTER_TYPES_H -#define WEBM_REGISTER_TYPES_H - -void register_webm_types(); -void unregister_webm_types(); - -#endif // WEBM_REGISTER_TYPES_H diff --git a/modules/webm/video_stream_webm.cpp b/modules/webm/video_stream_webm.cpp deleted file mode 100644 index 187a27b6c2..0000000000 --- a/modules/webm/video_stream_webm.cpp +++ /dev/null @@ -1,469 +0,0 @@ -/*************************************************************************/ -/* video_stream_webm.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "video_stream_webm.h" - -#include "core/config/project_settings.h" -#include "core/io/file_access.h" -#include "core/os/os.h" -#include "servers/audio_server.h" - -#include "thirdparty/misc/yuv2rgb.h" - -// libsimplewebm -#include <OpusVorbisDecoder.hpp> -#include <VPXDecoder.hpp> - -// libvpx -#include <vpx/vpx_image.h> - -// libwebm -#include <mkvparser/mkvparser.h> - -class MkvReader : public mkvparser::IMkvReader { -public: - MkvReader(const String &p_file) { - file = FileAccess::open(p_file, FileAccess::READ); - - ERR_FAIL_COND_MSG(!file, "Failed loading resource: '" + p_file + "'."); - } - ~MkvReader() { - if (file) { - memdelete(file); - } - } - - virtual int Read(long long pos, long len, unsigned char *buf) { - if (file) { - if (file->get_position() != (uint64_t)pos) { - file->seek(pos); - } - if (file->get_buffer(buf, len) == (uint64_t)len) { - return 0; - } - } - return -1; - } - - virtual int Length(long long *total, long long *available) { - if (file) { - const uint64_t len = file->get_length(); - if (total) { - *total = len; - } - if (available) { - *available = len; - } - return 0; - } - return -1; - } - -private: - FileAccess *file; -}; - -/**/ - -VideoStreamPlaybackWebm::VideoStreamPlaybackWebm() : - - texture(memnew(ImageTexture)) {} -VideoStreamPlaybackWebm::~VideoStreamPlaybackWebm() { - delete_pointers(); -} - -bool VideoStreamPlaybackWebm::open_file(const String &p_file) { - file_name = p_file; - webm = memnew(WebMDemuxer(new MkvReader(file_name), 0, audio_track)); - if (webm->isOpen()) { - video = memnew(VPXDecoder(*webm, OS::get_singleton()->get_processor_count())); - if (video->isOpen()) { - audio = memnew(OpusVorbisDecoder(*webm)); - if (audio->isOpen()) { - audio_frame = memnew(WebMFrame); - pcm = (float *)memalloc(sizeof(float) * audio->getBufferSamples() * webm->getChannels()); - } else { - memdelete(audio); - audio = nullptr; - } - - frame_data.resize((webm->getWidth() * webm->getHeight()) << 2); - Ref<Image> img; - img.instantiate(); - img->create(webm->getWidth(), webm->getHeight(), false, Image::FORMAT_RGBA8); - texture->create_from_image(img); - - return true; - } - memdelete(video); - video = nullptr; - } - memdelete(webm); - webm = nullptr; - return false; -} - -void VideoStreamPlaybackWebm::stop() { - if (playing) { - delete_pointers(); - - pcm = nullptr; - - audio_frame = nullptr; - video_frames = nullptr; - - video = nullptr; - audio = nullptr; - - open_file(file_name); //Should not fail here... - - video_frames_capacity = video_frames_pos = 0; - num_decoded_samples = 0; - samples_offset = -1; - video_frame_delay = video_pos = 0.0; - } - time = 0.0; - playing = false; -} - -void VideoStreamPlaybackWebm::play() { - stop(); - - delay_compensation = ProjectSettings::get_singleton()->get("audio/video/video_delay_compensation_ms"); - delay_compensation /= 1000.0; - - playing = true; -} - -bool VideoStreamPlaybackWebm::is_playing() const { - return playing; -} - -void VideoStreamPlaybackWebm::set_paused(bool p_paused) { - paused = p_paused; -} - -bool VideoStreamPlaybackWebm::is_paused() const { - return paused; -} - -void VideoStreamPlaybackWebm::set_loop(bool p_enable) { - //Empty -} - -bool VideoStreamPlaybackWebm::has_loop() const { - return false; -} - -float VideoStreamPlaybackWebm::get_length() const { - if (webm) { - return webm->getLength(); - } - return 0.0f; -} - -float VideoStreamPlaybackWebm::get_playback_position() const { - return video_pos; -} - -void VideoStreamPlaybackWebm::seek(float p_time) { - WARN_PRINT_ONCE("Seeking in Theora and WebM videos is not implemented yet (it's only supported for GDNative-provided video streams)."); -} - -void VideoStreamPlaybackWebm::set_audio_track(int p_idx) { - audio_track = p_idx; -} - -Ref<Texture2D> VideoStreamPlaybackWebm::get_texture() const { - return texture; -} - -void VideoStreamPlaybackWebm::update(float p_delta) { - if ((!playing || paused) || !video) { - return; - } - - time += p_delta; - - if (time < video_pos) { - return; - } - - bool audio_buffer_full = false; - - if (samples_offset > -1) { - //Mix remaining samples - const int to_read = num_decoded_samples - samples_offset; - const int mixed = mix_callback(mix_udata, pcm + samples_offset * webm->getChannels(), to_read); - if (mixed != to_read) { - samples_offset += mixed; - audio_buffer_full = true; - } else { - samples_offset = -1; - } - } - - const bool hasAudio = (audio && mix_callback); - while ((hasAudio && !audio_buffer_full && !has_enough_video_frames()) || - (!hasAudio && video_frames_pos == 0)) { - if (hasAudio && !audio_buffer_full && audio_frame->isValid() && - audio->getPCMF(*audio_frame, pcm, num_decoded_samples) && num_decoded_samples > 0) { - const int mixed = mix_callback(mix_udata, pcm, num_decoded_samples); - - if (mixed != num_decoded_samples) { - samples_offset = mixed; - audio_buffer_full = true; - } - } - - WebMFrame *video_frame; - if (video_frames_pos >= video_frames_capacity) { - WebMFrame **video_frames_new = (WebMFrame **)memrealloc(video_frames, ++video_frames_capacity * sizeof(void *)); - ERR_FAIL_COND(!video_frames_new); //Out of memory - (video_frames = video_frames_new)[video_frames_capacity - 1] = memnew(WebMFrame); - } - video_frame = video_frames[video_frames_pos]; - - if (!webm->readFrame(video_frame, audio_frame)) { //This will invalidate frames - break; //Can't demux, EOS? - } - - if (video_frame->isValid()) { - ++video_frames_pos; - } - }; - - bool video_frame_done = false; - while (video_frames_pos > 0 && !video_frame_done) { - WebMFrame *video_frame = video_frames[0]; - - // It seems VPXDecoder::decode has to be executed even though we might skip this frame - if (video->decode(*video_frame)) { - VPXDecoder::IMAGE_ERROR err; - VPXDecoder::Image image; - - if (should_process(*video_frame)) { - if ((err = video->getImage(image)) != VPXDecoder::NO_FRAME) { - if (err == VPXDecoder::NO_ERROR && image.w == webm->getWidth() && image.h == webm->getHeight()) { - uint8_t *w = frame_data.ptrw(); - bool converted = false; - - if (image.chromaShiftW == 0 && image.chromaShiftH == 0 && image.cs == VPX_CS_SRGB) { - uint8_t *wp = w; - unsigned char *rRow = image.planes[2]; - unsigned char *gRow = image.planes[0]; - unsigned char *bRow = image.planes[1]; - for (int i = 0; i < image.h; i++) { - for (int j = 0; j < image.w; j++) { - *wp++ = rRow[j]; - *wp++ = gRow[j]; - *wp++ = bRow[j]; - *wp++ = 255; - } - rRow += image.linesize[2]; - gRow += image.linesize[0]; - bRow += image.linesize[1]; - } - converted = true; - } else if (image.chromaShiftW == 1 && image.chromaShiftH == 1) { - yuv420_2_rgb8888(w, image.planes[0], image.planes[1], image.planes[2], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2); - //libyuv::I420ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - converted = true; - } else if (image.chromaShiftW == 1 && image.chromaShiftH == 0) { - yuv422_2_rgb8888(w, image.planes[0], image.planes[1], image.planes[2], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2); - //libyuv::I422ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - converted = true; - } else if (image.chromaShiftW == 0 && image.chromaShiftH == 0) { - yuv444_2_rgb8888(w, image.planes[0], image.planes[1], image.planes[2], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2); - //libyuv::I444ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - converted = true; - } else if (image.chromaShiftW == 2 && image.chromaShiftH == 0) { - //libyuv::I411ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2] image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h); - //converted = true; - } - - if (converted) { - Ref<Image> img = memnew(Image(image.w, image.h, 0, Image::FORMAT_RGBA8, frame_data)); - texture->update(img); //Zero copy send to rendering server - video_frame_done = true; - } - } - } - } - } - - video_pos = video_frame->time; - memmove(video_frames, video_frames + 1, (--video_frames_pos) * sizeof(void *)); - video_frames[video_frames_pos] = video_frame; - } - - if (video_frames_pos == 0 && webm->isEOS()) { - stop(); - } -} - -void VideoStreamPlaybackWebm::set_mix_callback(VideoStreamPlayback::AudioMixCallback p_callback, void *p_userdata) { - mix_callback = p_callback; - mix_udata = p_userdata; -} - -int VideoStreamPlaybackWebm::get_channels() const { - if (audio) { - return webm->getChannels(); - } - return 0; -} - -int VideoStreamPlaybackWebm::get_mix_rate() const { - if (audio) { - return webm->getSampleRate(); - } - return 0; -} - -inline bool VideoStreamPlaybackWebm::has_enough_video_frames() const { - if (video_frames_pos > 0) { - // FIXME: AudioServer output latency was fixed in af9bb0e, previously it used to - // systematically return 0. Now that it gives a proper latency, it broke this - // code where the delay compensation likely never really worked. - //const double audio_delay = AudioServer::get_singleton()->get_output_latency(); - const double video_time = video_frames[video_frames_pos - 1]->time; - return video_time >= time + /* audio_delay + */ delay_compensation; - } - return false; -} - -bool VideoStreamPlaybackWebm::should_process(WebMFrame &video_frame) { - // FIXME: AudioServer output latency was fixed in af9bb0e, previously it used to - // systematically return 0. Now that it gives a proper latency, it broke this - // code where the delay compensation likely never really worked. - //const double audio_delay = AudioServer::get_singleton()->get_output_latency(); - return video_frame.time >= time + /* audio_delay + */ delay_compensation; -} - -void VideoStreamPlaybackWebm::delete_pointers() { - if (pcm) { - memfree(pcm); - } - - if (audio_frame) { - memdelete(audio_frame); - } - if (video_frames) { - for (int i = 0; i < video_frames_capacity; ++i) { - memdelete(video_frames[i]); - } - memfree(video_frames); - } - - if (video) { - memdelete(video); - } - if (audio) { - memdelete(audio); - } - - if (webm) { - memdelete(webm); - } -} - -/**/ - -VideoStreamWebm::VideoStreamWebm() {} - -Ref<VideoStreamPlayback> VideoStreamWebm::instance_playback() { - Ref<VideoStreamPlaybackWebm> pb = memnew(VideoStreamPlaybackWebm); - pb->set_audio_track(audio_track); - if (pb->open_file(file)) { - return pb; - } - return nullptr; -} - -void VideoStreamWebm::set_file(const String &p_file) { - file = p_file; -} - -String VideoStreamWebm::get_file() { - return file; -} - -void VideoStreamWebm::_bind_methods() { - ClassDB::bind_method(D_METHOD("set_file", "file"), &VideoStreamWebm::set_file); - ClassDB::bind_method(D_METHOD("get_file"), &VideoStreamWebm::get_file); - - ADD_PROPERTY(PropertyInfo(Variant::STRING, "file", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "set_file", "get_file"); -} - -void VideoStreamWebm::set_audio_track(int p_track) { - audio_track = p_track; -} - -//////////// - -RES ResourceFormatLoaderWebm::load(const String &p_path, const String &p_original_path, Error *r_error, bool p_use_sub_threads, float *r_progress, CacheMode p_cache_mode) { - FileAccess *f = FileAccess::open(p_path, FileAccess::READ); - if (!f) { - if (r_error) { - *r_error = ERR_CANT_OPEN; - } - return RES(); - } - - VideoStreamWebm *stream = memnew(VideoStreamWebm); - stream->set_file(p_path); - - Ref<VideoStreamWebm> webm_stream = Ref<VideoStreamWebm>(stream); - - if (r_error) { - *r_error = OK; - } - - f->close(); - memdelete(f); - return webm_stream; -} - -void ResourceFormatLoaderWebm::get_recognized_extensions(List<String> *p_extensions) const { - p_extensions->push_back("webm"); -} - -bool ResourceFormatLoaderWebm::handles_type(const String &p_type) const { - return ClassDB::is_parent_class(p_type, "VideoStream"); -} - -String ResourceFormatLoaderWebm::get_resource_type(const String &p_path) const { - String el = p_path.get_extension().to_lower(); - if (el == "webm") { - return "VideoStreamWebm"; - } - return ""; -} diff --git a/modules/webm/video_stream_webm.h b/modules/webm/video_stream_webm.h deleted file mode 100644 index 60e02ab38b..0000000000 --- a/modules/webm/video_stream_webm.h +++ /dev/null @@ -1,135 +0,0 @@ -/*************************************************************************/ -/* video_stream_webm.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef VIDEO_STREAM_WEBM_H -#define VIDEO_STREAM_WEBM_H - -#include "core/io/resource_loader.h" -#include "scene/resources/video_stream.h" - -class WebMFrame; -class WebMDemuxer; -class VPXDecoder; -class OpusVorbisDecoder; - -class VideoStreamPlaybackWebm : public VideoStreamPlayback { - GDCLASS(VideoStreamPlaybackWebm, VideoStreamPlayback); - - String file_name; - int audio_track = 0; - - WebMDemuxer *webm = nullptr; - VPXDecoder *video = nullptr; - OpusVorbisDecoder *audio = nullptr; - - WebMFrame **video_frames = nullptr, *audio_frame = nullptr; - int video_frames_pos = 0, video_frames_capacity = 0; - - int num_decoded_samples = 0, samples_offset = -1; - AudioMixCallback mix_callback = nullptr; - void *mix_udata = nullptr; - - bool playing = false, paused = false; - double delay_compensation = 0.0; - double time = 0.0, video_frame_delay = 0.0, video_pos = 0.0; - - Vector<uint8_t> frame_data; - Ref<ImageTexture> texture; - - float *pcm = nullptr; - -public: - VideoStreamPlaybackWebm(); - ~VideoStreamPlaybackWebm(); - - bool open_file(const String &p_file); - - virtual void stop() override; - virtual void play() override; - - virtual bool is_playing() const override; - - virtual void set_paused(bool p_paused) override; - virtual bool is_paused() const override; - - virtual void set_loop(bool p_enable) override; - virtual bool has_loop() const override; - - virtual float get_length() const override; - - virtual float get_playback_position() const override; - virtual void seek(float p_time) override; - - virtual void set_audio_track(int p_idx) override; - - virtual Ref<Texture2D> get_texture() const override; - virtual void update(float p_delta) override; - - virtual void set_mix_callback(AudioMixCallback p_callback, void *p_userdata) override; - virtual int get_channels() const override; - virtual int get_mix_rate() const override; - -private: - inline bool has_enough_video_frames() const; - bool should_process(WebMFrame &video_frame); - - void delete_pointers(); -}; - -/**/ - -class VideoStreamWebm : public VideoStream { - GDCLASS(VideoStreamWebm, VideoStream); - - String file; - int audio_track = 0; - -protected: - static void _bind_methods(); - -public: - VideoStreamWebm(); - - virtual Ref<VideoStreamPlayback> instance_playback() override; - - virtual void set_file(const String &p_file); - String get_file(); - virtual void set_audio_track(int p_track) override; -}; - -class ResourceFormatLoaderWebm : public ResourceFormatLoader { -public: - virtual RES load(const String &p_path, const String &p_original_path = "", Error *r_error = nullptr, bool p_use_sub_threads = false, float *r_progress = nullptr, CacheMode p_cache_mode = CACHE_MODE_REUSE); - virtual void get_recognized_extensions(List<String> *p_extensions) const; - virtual bool handles_type(const String &p_type) const; - virtual String get_resource_type(const String &p_path) const; -}; - -#endif // VIDEO_STREAM_WEBM_H diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index afb7c7b2ab..7ce0f77c51 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -291,17 +291,10 @@ def configure(env): if any(platform.machine() in s for s in list_of_x86): env["x86_libtheora_opt_gcc"] = True - if not env["builtin_libvpx"]: - env.ParseConfig("pkg-config vpx --cflags --libs") - if not env["builtin_libvorbis"]: env["builtin_libogg"] = False # Needed to link against system libvorbis env.ParseConfig("pkg-config vorbis vorbisfile --cflags --libs") - if not env["builtin_opus"]: - env["builtin_libogg"] = False # Needed to link against system opus - env.ParseConfig("pkg-config opus opusfile --cflags --libs") - if not env["builtin_libogg"]: env.ParseConfig("pkg-config ogg --cflags --libs") diff --git a/platform/osx/detect.py b/platform/osx/detect.py index 10cf2b591e..83523dde66 100644 --- a/platform/osx/detect.py +++ b/platform/osx/detect.py @@ -97,7 +97,6 @@ def configure(env): env["AR"] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ar" env["RANLIB"] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib" env["AS"] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as" - env.Append(CPPDEFINES=["__MACPORTS__"]) # hack to fix libvpx MM256_BROADCASTSI128_SI256 define else: env["CC"] = "clang" env["CXX"] = "clang++" @@ -125,7 +124,6 @@ def configure(env): env["AR"] = basecmd + "ar" env["RANLIB"] = basecmd + "ranlib" env["AS"] = basecmd + "as" - env.Append(CPPDEFINES=["__MACPORTS__"]) # hack to fix libvpx MM256_BROADCASTSI128_SI256 define if env["use_ubsan"] or env["use_asan"] or env["use_tsan"]: env.extra_suffix += "s" diff --git a/scene/gui/video_player.cpp b/scene/gui/video_player.cpp index 8734037a57..989aabc549 100644 --- a/scene/gui/video_player.cpp +++ b/scene/gui/video_player.cpp @@ -29,9 +29,9 @@ /*************************************************************************/ #include "video_player.h" -#include "scene/scene_string_names.h" #include "core/os/os.h" +#include "scene/scene_string_names.h" #include "servers/audio_server.h" int VideoPlayer::sp_get_channel_count() const { @@ -55,7 +55,7 @@ bool VideoPlayer::mix(AudioFrame *p_buffer, int p_frames) { return false; } -// Called from main thread (eg VideoStreamPlaybackWebm::update) +// Called from main thread (e.g. VideoStreamPlaybackTheora::update). int VideoPlayer::_audio_mix_callback(void *p_udata, const float *p_data, int p_frames) { ERR_FAIL_NULL_V(p_udata, 0); ERR_FAIL_NULL_V(p_data, 0); diff --git a/thirdparty/README.md b/thirdparty/README.md index 157622a2d1..964ac6246e 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -255,25 +255,6 @@ Files extracted from upstream source: - `LICENSE` -## libsimplewebm - -- Upstream: https://github.com/zaps166/libsimplewebm -- Version: git (fe57fd3cfe6c0af4c6af110b1f84a90cf191d943, 2019) -- License: MIT (main), BSD-3-Clause (libwebm) - -This contains libwebm, but the version in use is updated from the one used by libsimplewebm, -and may have *unmarked* alterations from that. - -Files extracted from upstream source: - -- all the .cpp, .hpp files in the main folder except `example.cpp` -- LICENSE - -Important: Some files have Godot-made changes. -They are marked with `// -- GODOT start --` and `// -- GODOT end --` -comments. - - ## libtheora - Upstream: https://www.theora.org @@ -303,23 +284,6 @@ Files extracted from upstream source: - COPYING -## libvpx - -- Upstream: https://chromium.googlesource.com/webm/libvpx/ -- Version: 1.6.0 (2016) -- License: BSD-3-Clause - -Files extracted from upstream source: - -TODO. - -Important: File `libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c` has -Godot-made change marked with `// -- GODOT --` comments. - -The files `libvpx/third_party/android/cpu-features.{c,h}` were copied -from the Android NDK r18. - - ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ @@ -531,23 +495,6 @@ Patch files are provided in `oidn/patches/`. - scripts/resource_to_cpp.py (used in modules/denoise/resource_to_cpp.py) -## opus - -- Upstream: https://opus-codec.org -- Version: 1.1.5 (opus) and 0.8 (opusfile) (2017) -- License: BSD-3-Clause - -Files extracted from upstream source: - -- all .c and .h files in src/ (both opus and opusfile) -- all .h files in include/ (both opus and opusfile) as opus/ -- remove unused `opus_demo.c`, -- remove `http.c`, `wincerts.c` and `winerrno.h` (part of - unused libopusurl) -- celt/ and silk/ subfolders -- COPYING - - ## pcre2 - Upstream: http://www.pcre.org diff --git a/thirdparty/libsimplewebm/LICENSE b/thirdparty/libsimplewebm/LICENSE deleted file mode 100644 index 058633ac18..0000000000 --- a/thirdparty/libsimplewebm/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2016 Błażej Szczygieł - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp b/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp deleted file mode 100644 index b5824b17be..0000000000 --- a/thirdparty/libsimplewebm/OpusVorbisDecoder.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include "OpusVorbisDecoder.hpp" - -#include <vorbis/codec.h> -#include <opus/opus.h> - -#include <string.h> - -struct VorbisDecoder -{ - vorbis_info info; - vorbis_dsp_state dspState; - vorbis_block block; - ogg_packet op; - - bool hasDSPState, hasBlock; -}; - -/**/ - -OpusVorbisDecoder::OpusVorbisDecoder(const WebMDemuxer &demuxer) : - m_vorbis(NULL), m_opus(NULL), - m_numSamples(0) -{ - switch (demuxer.getAudioCodec()) - { - case WebMDemuxer::AUDIO_VORBIS: - m_channels = demuxer.getChannels(); - if (openVorbis(demuxer)) - return; - break; - case WebMDemuxer::AUDIO_OPUS: - m_channels = demuxer.getChannels(); - if (openOpus(demuxer)) - return; - break; - default: - return; - } - close(); -} -OpusVorbisDecoder::~OpusVorbisDecoder() -{ - close(); -} - -bool OpusVorbisDecoder::isOpen() const -{ - return (m_vorbis || m_opus); -} - -bool OpusVorbisDecoder::getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples) -{ - if (m_vorbis) - { - m_vorbis->op.packet = frame.buffer; - m_vorbis->op.bytes = frame.bufferSize; - - if (vorbis_synthesis(&m_vorbis->block, &m_vorbis->op)) - return false; - if (vorbis_synthesis_blockin(&m_vorbis->dspState, &m_vorbis->block)) - return false; - - const int maxSamples = getBufferSamples(); - int samplesCount, count = 0; - float **pcm; - while ((samplesCount = vorbis_synthesis_pcmout(&m_vorbis->dspState, &pcm))) - { - const int toConvert = samplesCount <= maxSamples ? samplesCount : maxSamples; - for (int c = 0; c < m_channels; ++c) - { - float *samples = pcm[c]; - for (int i = 0, j = c; i < toConvert; ++i, j += m_channels) - { - int sample = samples[i] * 32767.0f; - if (sample > 32767) - sample = 32767; - else if (sample < -32768) - sample = -32768; - buffer[count + j] = sample; - } - } - vorbis_synthesis_read(&m_vorbis->dspState, toConvert); - count += toConvert; - } - - numOutSamples = count; - return true; - } - else if (m_opus) - { - const int samples = opus_decode(m_opus, frame.buffer, frame.bufferSize, buffer, m_numSamples, 0); - if (samples >= 0) - { - numOutSamples = samples; - return true; - } - } - return false; -} - -// -- GODOT begin -- -bool OpusVorbisDecoder::getPCMF(WebMFrame &frame, float *buffer, int &numOutSamples) { - if (m_vorbis) { - m_vorbis->op.packet = frame.buffer; - m_vorbis->op.bytes = frame.bufferSize; - - if (vorbis_synthesis(&m_vorbis->block, &m_vorbis->op)) - return false; - if (vorbis_synthesis_blockin(&m_vorbis->dspState, &m_vorbis->block)) - return false; - - const int maxSamples = getBufferSamples(); - int samplesCount, count = 0; - float **pcm; - while ((samplesCount = vorbis_synthesis_pcmout(&m_vorbis->dspState, &pcm))) { - const int toConvert = samplesCount <= maxSamples ? samplesCount : maxSamples; - for (int c = 0; c < m_channels; ++c) { - float *samples = pcm[c]; - for (int i = 0, j = c; i < toConvert; ++i, j += m_channels) { - buffer[count + j] = samples[i]; - } - } - vorbis_synthesis_read(&m_vorbis->dspState, toConvert); - count += toConvert; - } - - numOutSamples = count; - return true; - } else if (m_opus) { - const int samples = opus_decode_float(m_opus, frame.buffer, frame.bufferSize, buffer, m_numSamples, 0); - if (samples >= 0) { - numOutSamples = samples; - return true; - } - } - return false; -} -// -- GODOT end -- - -bool OpusVorbisDecoder::openVorbis(const WebMDemuxer &demuxer) -{ - size_t extradataSize = 0; - const unsigned char *extradata = demuxer.getAudioExtradata(extradataSize); - - if (extradataSize < 3 || !extradata || extradata[0] != 2) - return false; - - size_t headerSize[3] = {0}; - size_t offset = 1; - - /* Calculate three headers sizes */ - for (int i = 0; i < 2; ++i) - { - for (;;) - { - if (offset >= extradataSize) - return false; - headerSize[i] += extradata[offset]; - if (extradata[offset++] < 0xFF) - break; - } - } - headerSize[2] = extradataSize - (headerSize[0] + headerSize[1] + offset); - - if (headerSize[0] + headerSize[1] + headerSize[2] + offset != extradataSize) - return false; - - ogg_packet op[3]; - memset(op, 0, sizeof op); - - op[0].packet = (unsigned char *)extradata + offset; - op[0].bytes = headerSize[0]; - op[0].b_o_s = 1; - - op[1].packet = (unsigned char *)extradata + offset + headerSize[0]; - op[1].bytes = headerSize[1]; - - op[2].packet = (unsigned char *)extradata + offset + headerSize[0] + headerSize[1]; - op[2].bytes = headerSize[2]; - - m_vorbis = new VorbisDecoder; - m_vorbis->hasDSPState = m_vorbis->hasBlock = false; - vorbis_info_init(&m_vorbis->info); - - /* Upload three Vorbis headers into libvorbis */ - vorbis_comment vc; - vorbis_comment_init(&vc); - for (int i = 0; i < 3; ++i) - { - if (vorbis_synthesis_headerin(&m_vorbis->info, &vc, &op[i])) - { - vorbis_comment_clear(&vc); - return false; - } - } - vorbis_comment_clear(&vc); - - if (vorbis_synthesis_init(&m_vorbis->dspState, &m_vorbis->info)) - return false; - m_vorbis->hasDSPState = true; - - if (m_vorbis->info.channels != m_channels || m_vorbis->info.rate != demuxer.getSampleRate()) - return false; - - if (vorbis_block_init(&m_vorbis->dspState, &m_vorbis->block)) - return false; - m_vorbis->hasBlock = true; - - memset(&m_vorbis->op, 0, sizeof m_vorbis->op); - - m_numSamples = 4096 / m_channels; - - return true; -} -bool OpusVorbisDecoder::openOpus(const WebMDemuxer &demuxer) -{ - int opusErr = 0; - m_opus = opus_decoder_create(demuxer.getSampleRate(), m_channels, &opusErr); - if (!opusErr) - { - m_numSamples = demuxer.getSampleRate() * 0.06 + 0.5; //Maximum frame size (for 60 ms frame) - return true; - } - return false; -} - -void OpusVorbisDecoder::close() -{ - if (m_vorbis) - { - if (m_vorbis->hasBlock) - vorbis_block_clear(&m_vorbis->block); - if (m_vorbis->hasDSPState) - vorbis_dsp_clear(&m_vorbis->dspState); - vorbis_info_clear(&m_vorbis->info); - delete m_vorbis; - } - if (m_opus) - opus_decoder_destroy(m_opus); -} diff --git a/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp b/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp deleted file mode 100644 index f285b3fbd6..0000000000 --- a/thirdparty/libsimplewebm/OpusVorbisDecoder.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef OPUSVORBISDECODER_HPP -#define OPUSVORBISDECODER_HPP - -#include "WebMDemuxer.hpp" - -struct VorbisDecoder; -struct OpusDecoder; - -class OpusVorbisDecoder -{ - OpusVorbisDecoder(const OpusVorbisDecoder &); - void operator =(const OpusVorbisDecoder &); -public: - OpusVorbisDecoder(const WebMDemuxer &demuxer); - ~OpusVorbisDecoder(); - - bool isOpen() const; - - inline int getBufferSamples() const - { - return m_numSamples; - } - bool getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples); -// -- GODOT begin -- - bool getPCMF(WebMFrame &frame, float *buffer, int &numOutSamples); -// -- GODOT end -- - -private: - bool openVorbis(const WebMDemuxer &demuxer); - bool openOpus(const WebMDemuxer &demuxer); - - void close(); - - VorbisDecoder *m_vorbis; - OpusDecoder *m_opus; - int m_numSamples; - int m_channels; - -}; - -#endif // OPUSVORBISDECODER_HPP diff --git a/thirdparty/libsimplewebm/VPXDecoder.cpp b/thirdparty/libsimplewebm/VPXDecoder.cpp deleted file mode 100644 index e2606f83ba..0000000000 --- a/thirdparty/libsimplewebm/VPXDecoder.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include "VPXDecoder.hpp" - -#include <vpx/vpx_decoder.h> -#include <vpx/vp8dx.h> - -#include <stdlib.h> -#include <string.h> - -VPXDecoder::VPXDecoder(const WebMDemuxer &demuxer, unsigned threads) : - m_ctx(NULL), - m_iter(NULL), - m_delay(0), - m_last_space(VPX_CS_UNKNOWN) -{ - if (threads > 8) - threads = 8; - else if (threads < 1) - threads = 1; - - const vpx_codec_dec_cfg_t codecCfg = { - threads, - 0, - 0 - }; - vpx_codec_iface_t *codecIface = NULL; - - switch (demuxer.getVideoCodec()) - { - case WebMDemuxer::VIDEO_VP8: - codecIface = vpx_codec_vp8_dx(); - break; - case WebMDemuxer::VIDEO_VP9: - codecIface = vpx_codec_vp9_dx(); - m_delay = threads - 1; - break; - default: - return; - } - - m_ctx = new vpx_codec_ctx_t; - if (vpx_codec_dec_init(m_ctx, codecIface, &codecCfg, m_delay > 0 ? VPX_CODEC_USE_FRAME_THREADING : 0)) - { - delete m_ctx; - m_ctx = NULL; - } -} -VPXDecoder::~VPXDecoder() -{ - if (m_ctx) - { - vpx_codec_destroy(m_ctx); - delete m_ctx; - } -} - -bool VPXDecoder::decode(const WebMFrame &frame) -{ - m_iter = NULL; - return !vpx_codec_decode(m_ctx, frame.buffer, frame.bufferSize, NULL, 0); -} -VPXDecoder::IMAGE_ERROR VPXDecoder::getImage(Image &image) -{ - IMAGE_ERROR err = NO_FRAME; - if (vpx_image_t *img = vpx_codec_get_frame(m_ctx, &m_iter)) - { - // It seems to be a common problem that UNKNOWN comes up a lot, yet FFMPEG is somehow getting accurate colour-space information. - // After checking FFMPEG code, *they're* getting colour-space information, so I'm assuming something like this is going on. - // It appears to work, at least. - if (img->cs != VPX_CS_UNKNOWN) - m_last_space = img->cs; - if ((img->fmt & VPX_IMG_FMT_PLANAR) && !(img->fmt & (VPX_IMG_FMT_HAS_ALPHA | VPX_IMG_FMT_HIGHBITDEPTH))) - { - if (img->stride[0] && img->stride[1] && img->stride[2]) - { - const int uPlane = !!(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1; - const int vPlane = !(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1; - - image.w = img->d_w; - image.h = img->d_h; - image.cs = m_last_space; - image.chromaShiftW = img->x_chroma_shift; - image.chromaShiftH = img->y_chroma_shift; - - image.planes[0] = img->planes[0]; - image.planes[1] = img->planes[uPlane]; - image.planes[2] = img->planes[vPlane]; - - image.linesize[0] = img->stride[0]; - image.linesize[1] = img->stride[uPlane]; - image.linesize[2] = img->stride[vPlane]; - - err = NO_ERROR; - } - } - else - { - err = UNSUPPORTED_FRAME; - } - } - return err; -} - -/**/ - -// -- GODOT begin -- -#if 0 -// -- GODOT end -- - -static inline int ceilRshift(int val, int shift) -{ - return (val + (1 << shift) - 1) >> shift; -} - -int VPXDecoder::Image::getWidth(int plane) const -{ - if (!plane) - return w; - return ceilRshift(w, chromaShiftW); -} -int VPXDecoder::Image::getHeight(int plane) const -{ - if (!plane) - return h; - return ceilRshift(h, chromaShiftH); -} - -// -- GODOT begin -- -#endif -// -- GODOT end -- - diff --git a/thirdparty/libsimplewebm/VPXDecoder.hpp b/thirdparty/libsimplewebm/VPXDecoder.hpp deleted file mode 100644 index 5071b069cb..0000000000 --- a/thirdparty/libsimplewebm/VPXDecoder.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef VPXDECODER_HPP -#define VPXDECODER_HPP - -#include "WebMDemuxer.hpp" - -struct vpx_codec_ctx; - -class VPXDecoder -{ - VPXDecoder(const VPXDecoder &); - void operator =(const VPXDecoder &); -public: - class Image - { - public: -// -- GODOT begin -- -#if 0 -// -- GODOT end -- - int getWidth(int plane) const; - int getHeight(int plane) const; -// -- GODOT begin -- -#endif -// -- GODOT end -- - - int w, h; - int cs; - int chromaShiftW, chromaShiftH; - unsigned char *planes[3]; - int linesize[3]; - }; - - enum IMAGE_ERROR - { - UNSUPPORTED_FRAME = -1, - NO_ERROR, - NO_FRAME - }; - - VPXDecoder(const WebMDemuxer &demuxer, unsigned threads = 1); - ~VPXDecoder(); - - inline bool isOpen() const - { - return (bool)m_ctx; - } - - inline int getFramesDelay() const - { - return m_delay; - } - - bool decode(const WebMFrame &frame); - IMAGE_ERROR getImage(Image &image); //The data is NOT copied! Only 3-plane, 8-bit images are supported. - -private: - vpx_codec_ctx *m_ctx; - const void *m_iter; - int m_delay; - int m_last_space; -}; - -#endif // VPXDECODER_HPP diff --git a/thirdparty/libsimplewebm/WebMDemuxer.cpp b/thirdparty/libsimplewebm/WebMDemuxer.cpp deleted file mode 100644 index cb63deccd5..0000000000 --- a/thirdparty/libsimplewebm/WebMDemuxer.cpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#include "WebMDemuxer.hpp" - -#include "mkvparser/mkvparser.h" - -#include <assert.h> -#include <stdlib.h> -#include <string.h> - -WebMFrame::WebMFrame() : - bufferSize(0), bufferCapacity(0), - buffer(NULL), - time(0), - key(false) -{} -WebMFrame::~WebMFrame() -{ - free(buffer); -} - -/**/ - -WebMDemuxer::WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack, int audioTrack) : - m_reader(reader), - m_segment(NULL), - m_cluster(NULL), m_block(NULL), m_blockEntry(NULL), - m_blockFrameIndex(0), - m_videoTrack(NULL), m_vCodec(NO_VIDEO), - m_audioTrack(NULL), m_aCodec(NO_AUDIO), - m_isOpen(false), - m_eos(false) -{ - long long pos = 0; - if (mkvparser::EBMLHeader().Parse(m_reader, pos)) - return; - - if (mkvparser::Segment::CreateInstance(m_reader, pos, m_segment)) - return; - - if (m_segment->Load() < 0) - return; - - const mkvparser::Tracks *tracks = m_segment->GetTracks(); - const unsigned long tracksCount = tracks->GetTracksCount(); - int currVideoTrack = -1, currAudioTrack = -1; - for (unsigned long i = 0; i < tracksCount; ++i) - { - const mkvparser::Track *track = tracks->GetTrackByIndex(i); - if (const char *codecId = track->GetCodecId()) - { - if ((!m_videoTrack || currVideoTrack != videoTrack) && track->GetType() == mkvparser::Track::kVideo) - { - if (!strcmp(codecId, "V_VP8")) - m_vCodec = VIDEO_VP8; - else if (!strcmp(codecId, "V_VP9")) - m_vCodec = VIDEO_VP9; - if (m_vCodec != NO_VIDEO) - m_videoTrack = static_cast<const mkvparser::VideoTrack *>(track); - ++currVideoTrack; - } - if ((!m_audioTrack || currAudioTrack != audioTrack) && track->GetType() == mkvparser::Track::kAudio) - { - if (!strcmp(codecId, "A_VORBIS")) - m_aCodec = AUDIO_VORBIS; - else if (!strcmp(codecId, "A_OPUS")) - m_aCodec = AUDIO_OPUS; - if (m_aCodec != NO_AUDIO) - m_audioTrack = static_cast<const mkvparser::AudioTrack *>(track); - ++currAudioTrack; - } - } - } - if (!m_videoTrack && !m_audioTrack) - return; - - m_isOpen = true; -} -WebMDemuxer::~WebMDemuxer() -{ - delete m_segment; - delete m_reader; -} - -double WebMDemuxer::getLength() const -{ - return m_segment->GetDuration() / 1e9; -} - -WebMDemuxer::VIDEO_CODEC WebMDemuxer::getVideoCodec() const -{ - return m_vCodec; -} -int WebMDemuxer::getWidth() const -{ - return m_videoTrack->GetWidth(); -} -int WebMDemuxer::getHeight() const -{ - return m_videoTrack->GetHeight(); -} - -WebMDemuxer::AUDIO_CODEC WebMDemuxer::getAudioCodec() const -{ - return m_aCodec; -} -const unsigned char *WebMDemuxer::getAudioExtradata(size_t &size) const -{ - return m_audioTrack->GetCodecPrivate(size); -} -double WebMDemuxer::getSampleRate() const -{ - return m_audioTrack->GetSamplingRate(); -} -int WebMDemuxer::getChannels() const -{ - return m_audioTrack->GetChannels(); -} -int WebMDemuxer::getAudioDepth() const -{ - return m_audioTrack->GetBitDepth(); -} - -bool WebMDemuxer::readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame) -{ - const long videoTrackNumber = (videoFrame && m_videoTrack) ? m_videoTrack->GetNumber() : 0; - const long audioTrackNumber = (audioFrame && m_audioTrack) ? m_audioTrack->GetNumber() : 0; - bool blockEntryEOS = false; - - if (videoFrame) - videoFrame->bufferSize = 0; - if (audioFrame) - audioFrame->bufferSize = 0; - - if (videoTrackNumber == 0 && audioTrackNumber == 0) - return false; - - if (m_eos) - return false; - - if (!m_cluster) - m_cluster = m_segment->GetFirst(); - - do - { - bool getNewBlock = false; - long status = 0; - if (!m_blockEntry && !blockEntryEOS) - { - status = m_cluster->GetFirst(m_blockEntry); - getNewBlock = true; - } - else if (blockEntryEOS || m_blockEntry->EOS()) - { - m_cluster = m_segment->GetNext(m_cluster); - if (!m_cluster || m_cluster->EOS()) - { - m_eos = true; - return false; - } - status = m_cluster->GetFirst(m_blockEntry); - blockEntryEOS = false; - getNewBlock = true; - } - else if (!m_block || m_blockFrameIndex == m_block->GetFrameCount() || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber)) - { - status = m_cluster->GetNext(m_blockEntry, m_blockEntry); - if (!m_blockEntry || m_blockEntry->EOS()) - { - blockEntryEOS = true; - continue; - } - getNewBlock = true; - } - if (status || !m_blockEntry) - return false; - if (getNewBlock) - { - m_block = m_blockEntry->GetBlock(); - m_blockFrameIndex = 0; - } - } while (blockEntryEOS || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber)); - - WebMFrame *frame = NULL; - - const long trackNumber = m_block->GetTrackNumber(); - if (trackNumber == videoTrackNumber) - frame = videoFrame; - else if (trackNumber == audioTrackNumber) - frame = audioFrame; - else - { - //Should not be possible - assert(trackNumber == videoTrackNumber || trackNumber == audioTrackNumber); - return false; - } - - const mkvparser::Block::Frame &blockFrame = m_block->GetFrame(m_blockFrameIndex++); - if (blockFrame.len > frame->bufferCapacity) - { - unsigned char *newBuff = (unsigned char *)realloc(frame->buffer, frame->bufferCapacity = blockFrame.len); - if (newBuff) - frame->buffer = newBuff; - else // Out of memory - return false; - } - frame->bufferSize = blockFrame.len; - - frame->time = m_block->GetTime(m_cluster) / 1e9; - frame->key = m_block->IsKey(); - - return !blockFrame.Read(m_reader, frame->buffer); -} - -inline bool WebMDemuxer::notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const -{ - const long trackNumber = m_block->GetTrackNumber(); - return (trackNumber != videoTrackNumber && trackNumber != audioTrackNumber); -} diff --git a/thirdparty/libsimplewebm/WebMDemuxer.hpp b/thirdparty/libsimplewebm/WebMDemuxer.hpp deleted file mode 100644 index a45ddb3f26..0000000000 --- a/thirdparty/libsimplewebm/WebMDemuxer.hpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - MIT License - - Copyright (c) 2016 Błażej Szczygieł - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef WEBMDEMUXER_HPP -#define WEBMDEMUXER_HPP - -#include <stddef.h> - -namespace mkvparser { - class IMkvReader; - class Segment; - class Cluster; - class Block; - class BlockEntry; - class VideoTrack; - class AudioTrack; -} - -class WebMFrame -{ - WebMFrame(const WebMFrame &); - void operator =(const WebMFrame &); -public: - WebMFrame(); - ~WebMFrame(); - - inline bool isValid() const - { - return bufferSize > 0; - } - - long bufferSize, bufferCapacity; - unsigned char *buffer; - double time; - bool key; -}; - -class WebMDemuxer -{ - WebMDemuxer(const WebMDemuxer &); - void operator =(const WebMDemuxer &); -public: - enum VIDEO_CODEC - { - NO_VIDEO, - VIDEO_VP8, - VIDEO_VP9 - }; - enum AUDIO_CODEC - { - NO_AUDIO, - AUDIO_VORBIS, - AUDIO_OPUS - }; - - WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack = 0, int audioTrack = 0); - ~WebMDemuxer(); - - inline bool isOpen() const - { - return m_isOpen; - } - inline bool isEOS() const - { - return m_eos; - } - - double getLength() const; - - VIDEO_CODEC getVideoCodec() const; - int getWidth() const; - int getHeight() const; - - AUDIO_CODEC getAudioCodec() const; - const unsigned char *getAudioExtradata(size_t &size) const; // Needed for Vorbis - double getSampleRate() const; - int getChannels() const; - int getAudioDepth() const; - - bool readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame); - -private: - inline bool notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const; - - mkvparser::IMkvReader *m_reader; - mkvparser::Segment *m_segment; - - const mkvparser::Cluster *m_cluster; - const mkvparser::Block *m_block; - const mkvparser::BlockEntry *m_blockEntry; - - int m_blockFrameIndex; - - const mkvparser::VideoTrack *m_videoTrack; - VIDEO_CODEC m_vCodec; - - const mkvparser::AudioTrack *m_audioTrack; - AUDIO_CODEC m_aCodec; - - bool m_isOpen; - bool m_eos; -}; - -#endif // WEBMDEMUXER_HPP diff --git a/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT b/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT deleted file mode 100644 index 9686ac13eb..0000000000 --- a/thirdparty/libsimplewebm/libwebm/AUTHORS.TXT +++ /dev/null @@ -1,4 +0,0 @@ -# Names should be added to this file like so: -# Name or Organization <email address> - -Google Inc. diff --git a/thirdparty/libsimplewebm/libwebm/LICENSE.TXT b/thirdparty/libsimplewebm/libwebm/LICENSE.TXT deleted file mode 100644 index 7a6f99547d..0000000000 --- a/thirdparty/libsimplewebm/libwebm/LICENSE.TXT +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2010, Google Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/thirdparty/libsimplewebm/libwebm/PATENTS.TXT b/thirdparty/libsimplewebm/libwebm/PATENTS.TXT deleted file mode 100644 index caedf607e9..0000000000 --- a/thirdparty/libsimplewebm/libwebm/PATENTS.TXT +++ /dev/null @@ -1,23 +0,0 @@ -Additional IP Rights Grant (Patents) ------------------------------------- - -"These implementations" means the copyrightable works that implement the WebM -codecs distributed by Google as part of the WebM Project. - -Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, -royalty-free, irrevocable (except as stated in this section) patent license to -make, have made, use, offer to sell, sell, import, transfer, and otherwise -run, modify and propagate the contents of these implementations of WebM, where -such license applies only to those patent claims, both currently owned by -Google and acquired in the future, licensable by Google that are necessarily -infringed by these implementations of WebM. This grant does not include claims -that would be infringed only as a consequence of further modification of these -implementations. If you or your agent or exclusive licensee institute or order -or agree to the institution of patent litigation or any other patent -enforcement activity against any entity (including a cross-claim or -counterclaim in a lawsuit) alleging that any of these implementations of WebM -or any code incorporated within any of these implementations of WebM -constitute direct or contributory patent infringement, or inducement of -patent infringement, then any patent rights granted to you under this License -for these implementations of WebM shall terminate as of the date such -litigation is filed. diff --git a/thirdparty/libsimplewebm/libwebm/README.libvpx b/thirdparty/libsimplewebm/libwebm/README.libvpx deleted file mode 100644 index 1aa93b75aa..0000000000 --- a/thirdparty/libsimplewebm/libwebm/README.libvpx +++ /dev/null @@ -1,11 +0,0 @@ -URL: https://chromium.googlesource.com/webm/libwebm -Version: d7c62173ff6b4a5e0a2f86683a5b67db98cf09bf -License: BSD -License File: LICENSE.txt - -Description: -libwebm is used to handle WebM container I/O. - -Local Changes: -* Removed: "mkvmuxer", "hdr_util", "file_util", "mkv_reader". -* Make "~IMkvRerader()" public. diff --git a/thirdparty/libsimplewebm/libwebm/common/webmids.h b/thirdparty/libsimplewebm/libwebm/common/webmids.h deleted file mode 100644 index 89d722a71b..0000000000 --- a/thirdparty/libsimplewebm/libwebm/common/webmids.h +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef COMMON_WEBMIDS_H_ -#define COMMON_WEBMIDS_H_ - -namespace libwebm { - -enum MkvId { - kMkvEBML = 0x1A45DFA3, - kMkvEBMLVersion = 0x4286, - kMkvEBMLReadVersion = 0x42F7, - kMkvEBMLMaxIDLength = 0x42F2, - kMkvEBMLMaxSizeLength = 0x42F3, - kMkvDocType = 0x4282, - kMkvDocTypeVersion = 0x4287, - kMkvDocTypeReadVersion = 0x4285, - kMkvVoid = 0xEC, - kMkvSignatureSlot = 0x1B538667, - kMkvSignatureAlgo = 0x7E8A, - kMkvSignatureHash = 0x7E9A, - kMkvSignaturePublicKey = 0x7EA5, - kMkvSignature = 0x7EB5, - kMkvSignatureElements = 0x7E5B, - kMkvSignatureElementList = 0x7E7B, - kMkvSignedElement = 0x6532, - // segment - kMkvSegment = 0x18538067, - // Meta Seek Information - kMkvSeekHead = 0x114D9B74, - kMkvSeek = 0x4DBB, - kMkvSeekID = 0x53AB, - kMkvSeekPosition = 0x53AC, - // Segment Information - kMkvInfo = 0x1549A966, - kMkvTimecodeScale = 0x2AD7B1, - kMkvDuration = 0x4489, - kMkvDateUTC = 0x4461, - kMkvTitle = 0x7BA9, - kMkvMuxingApp = 0x4D80, - kMkvWritingApp = 0x5741, - // Cluster - kMkvCluster = 0x1F43B675, - kMkvTimecode = 0xE7, - kMkvPrevSize = 0xAB, - kMkvBlockGroup = 0xA0, - kMkvBlock = 0xA1, - kMkvBlockDuration = 0x9B, - kMkvReferenceBlock = 0xFB, - kMkvLaceNumber = 0xCC, - kMkvSimpleBlock = 0xA3, - kMkvBlockAdditions = 0x75A1, - kMkvBlockMore = 0xA6, - kMkvBlockAddID = 0xEE, - kMkvBlockAdditional = 0xA5, - kMkvDiscardPadding = 0x75A2, - // Track - kMkvTracks = 0x1654AE6B, - kMkvTrackEntry = 0xAE, - kMkvTrackNumber = 0xD7, - kMkvTrackUID = 0x73C5, - kMkvTrackType = 0x83, - kMkvFlagEnabled = 0xB9, - kMkvFlagDefault = 0x88, - kMkvFlagForced = 0x55AA, - kMkvFlagLacing = 0x9C, - kMkvDefaultDuration = 0x23E383, - kMkvMaxBlockAdditionID = 0x55EE, - kMkvName = 0x536E, - kMkvLanguage = 0x22B59C, - kMkvCodecID = 0x86, - kMkvCodecPrivate = 0x63A2, - kMkvCodecName = 0x258688, - kMkvCodecDelay = 0x56AA, - kMkvSeekPreRoll = 0x56BB, - // video - kMkvVideo = 0xE0, - kMkvFlagInterlaced = 0x9A, - kMkvStereoMode = 0x53B8, - kMkvAlphaMode = 0x53C0, - kMkvPixelWidth = 0xB0, - kMkvPixelHeight = 0xBA, - kMkvPixelCropBottom = 0x54AA, - kMkvPixelCropTop = 0x54BB, - kMkvPixelCropLeft = 0x54CC, - kMkvPixelCropRight = 0x54DD, - kMkvDisplayWidth = 0x54B0, - kMkvDisplayHeight = 0x54BA, - kMkvDisplayUnit = 0x54B2, - kMkvAspectRatioType = 0x54B3, - kMkvFrameRate = 0x2383E3, - // end video - // colour - kMkvColour = 0x55B0, - kMkvMatrixCoefficients = 0x55B1, - kMkvBitsPerChannel = 0x55B2, - kMkvChromaSubsamplingHorz = 0x55B3, - kMkvChromaSubsamplingVert = 0x55B4, - kMkvCbSubsamplingHorz = 0x55B5, - kMkvCbSubsamplingVert = 0x55B6, - kMkvChromaSitingHorz = 0x55B7, - kMkvChromaSitingVert = 0x55B8, - kMkvRange = 0x55B9, - kMkvTransferCharacteristics = 0x55BA, - kMkvPrimaries = 0x55BB, - kMkvMaxCLL = 0x55BC, - kMkvMaxFALL = 0x55BD, - // mastering metadata - kMkvMasteringMetadata = 0x55D0, - kMkvPrimaryRChromaticityX = 0x55D1, - kMkvPrimaryRChromaticityY = 0x55D2, - kMkvPrimaryGChromaticityX = 0x55D3, - kMkvPrimaryGChromaticityY = 0x55D4, - kMkvPrimaryBChromaticityX = 0x55D5, - kMkvPrimaryBChromaticityY = 0x55D6, - kMkvWhitePointChromaticityX = 0x55D7, - kMkvWhitePointChromaticityY = 0x55D8, - kMkvLuminanceMax = 0x55D9, - kMkvLuminanceMin = 0x55DA, - // end mastering metadata - // end colour - // projection - kMkvProjection = 0x7670, - kMkvProjectionType = 0x7671, - kMkvProjectionPrivate = 0x7672, - kMkvProjectionPoseYaw = 0x7673, - kMkvProjectionPosePitch = 0x7674, - kMkvProjectionPoseRoll = 0x7675, - // end projection - // audio - kMkvAudio = 0xE1, - kMkvSamplingFrequency = 0xB5, - kMkvOutputSamplingFrequency = 0x78B5, - kMkvChannels = 0x9F, - kMkvBitDepth = 0x6264, - // end audio - // ContentEncodings - kMkvContentEncodings = 0x6D80, - kMkvContentEncoding = 0x6240, - kMkvContentEncodingOrder = 0x5031, - kMkvContentEncodingScope = 0x5032, - kMkvContentEncodingType = 0x5033, - kMkvContentCompression = 0x5034, - kMkvContentCompAlgo = 0x4254, - kMkvContentCompSettings = 0x4255, - kMkvContentEncryption = 0x5035, - kMkvContentEncAlgo = 0x47E1, - kMkvContentEncKeyID = 0x47E2, - kMkvContentSignature = 0x47E3, - kMkvContentSigKeyID = 0x47E4, - kMkvContentSigAlgo = 0x47E5, - kMkvContentSigHashAlgo = 0x47E6, - kMkvContentEncAESSettings = 0x47E7, - kMkvAESSettingsCipherMode = 0x47E8, - kMkvAESSettingsCipherInitData = 0x47E9, - // end ContentEncodings - // Cueing Data - kMkvCues = 0x1C53BB6B, - kMkvCuePoint = 0xBB, - kMkvCueTime = 0xB3, - kMkvCueTrackPositions = 0xB7, - kMkvCueTrack = 0xF7, - kMkvCueClusterPosition = 0xF1, - kMkvCueBlockNumber = 0x5378, - // Chapters - kMkvChapters = 0x1043A770, - kMkvEditionEntry = 0x45B9, - kMkvChapterAtom = 0xB6, - kMkvChapterUID = 0x73C4, - kMkvChapterStringUID = 0x5654, - kMkvChapterTimeStart = 0x91, - kMkvChapterTimeEnd = 0x92, - kMkvChapterDisplay = 0x80, - kMkvChapString = 0x85, - kMkvChapLanguage = 0x437C, - kMkvChapCountry = 0x437E, - // Tags - kMkvTags = 0x1254C367, - kMkvTag = 0x7373, - kMkvSimpleTag = 0x67C8, - kMkvTagName = 0x45A3, - kMkvTagString = 0x4487 -}; - -} // namespace libwebm - -#endif // COMMON_WEBMIDS_H_ diff --git a/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h b/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h deleted file mode 100644 index e5db121605..0000000000 --- a/thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - -#ifndef MKVMUXER_MKVMUXERTYPES_H_ -#define MKVMUXER_MKVMUXERTYPES_H_ - -namespace mkvmuxer { -typedef unsigned char uint8; -typedef short int16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -} // namespace mkvmuxer - -// Copied from Chromium basictypes.h -// A macro to disallow the copy constructor and operator= functions -// This should be used in the private: declarations for a class -#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) - -#endif // MKVMUXER_MKVMUXERTYPES_HPP_ diff --git a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc deleted file mode 100644 index e7b76f7da1..0000000000 --- a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc +++ /dev/null @@ -1,8049 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#include "mkvparser/mkvparser.h" - -#if defined(_MSC_VER) && _MSC_VER < 1800 -#include <float.h> // _isnan() / _finite() -#define MSC_COMPAT -#endif - -#include <cassert> -#include <cfloat> -#include <climits> -#include <cmath> -#include <cstring> -#include <memory> -#include <new> - -#include "common/webmids.h" - -namespace mkvparser { -const long long kStringElementSizeLimit = 20 * 1000 * 1000; -const float MasteringMetadata::kValueNotPresent = FLT_MAX; -const long long Colour::kValueNotPresent = LLONG_MAX; -const float Projection::kValueNotPresent = FLT_MAX; - -#ifdef MSC_COMPAT -inline bool isnan(double val) { return !!_isnan(val); } -inline bool isinf(double val) { return !_finite(val); } -#else -inline bool isnan(double val) { return std::isnan(val); } -inline bool isinf(double val) { return std::isinf(val); } -#endif // MSC_COMPAT - -IMkvReader::~IMkvReader() {} - -template <typename Type> -Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size) { - if (num_elements == 0 || element_size == 0) - return NULL; - - const size_t kMaxAllocSize = 0x80000000; // 2GiB - const unsigned long long num_bytes = num_elements * element_size; - if (element_size > (kMaxAllocSize / num_elements)) - return NULL; - if (num_bytes != static_cast<size_t>(num_bytes)) - return NULL; - - return new (std::nothrow) Type[static_cast<size_t>(num_bytes)]; -} - -void GetVersion(int& major, int& minor, int& build, int& revision) { - major = 1; - minor = 0; - build = 0; - revision = 30; -} - -long long ReadUInt(IMkvReader* pReader, long long pos, long& len) { - if (!pReader || pos < 0) - return E_FILE_FORMAT_INVALID; - - len = 1; - unsigned char b; - int status = pReader->Read(pos, 1, &b); - - if (status < 0) // error or underflow - return status; - - if (status > 0) // interpreted as "underflow" - return E_BUFFER_NOT_FULL; - - if (b == 0) // we can't handle u-int values larger than 8 bytes - return E_FILE_FORMAT_INVALID; - - unsigned char m = 0x80; - - while (!(b & m)) { - m >>= 1; - ++len; - } - - long long result = b & (~m); - ++pos; - - for (int i = 1; i < len; ++i) { - status = pReader->Read(pos, 1, &b); - - if (status < 0) { - len = 1; - return status; - } - - if (status > 0) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result <<= 8; - result |= b; - - ++pos; - } - - return result; -} - -// Reads an EBML ID and returns it. -// An ID must at least 1 byte long, cannot exceed 4, and its value must be -// greater than 0. -// See known EBML values and EBMLMaxIDLength: -// http://www.matroska.org/technical/specs/index.html -// Returns the ID, or a value less than 0 to report an error while reading the -// ID. -long long ReadID(IMkvReader* pReader, long long pos, long& len) { - if (pReader == NULL || pos < 0) - return E_FILE_FORMAT_INVALID; - - // Read the first byte. The length in bytes of the ID is determined by - // finding the first set bit in the first byte of the ID. - unsigned char temp_byte = 0; - int read_status = pReader->Read(pos, 1, &temp_byte); - - if (read_status < 0) - return E_FILE_FORMAT_INVALID; - else if (read_status > 0) // No data to read. - return E_BUFFER_NOT_FULL; - - if (temp_byte == 0) // ID length > 8 bytes; invalid file. - return E_FILE_FORMAT_INVALID; - - int bit_pos = 0; - const int kMaxIdLengthInBytes = 4; - const int kCheckByte = 0x80; - - // Find the first bit that's set. - bool found_bit = false; - for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) { - if ((kCheckByte >> bit_pos) & temp_byte) { - found_bit = true; - break; - } - } - - if (!found_bit) { - // The value is too large to be a valid ID. - return E_FILE_FORMAT_INVALID; - } - - // Read the remaining bytes of the ID (if any). - const int id_length = bit_pos + 1; - long long ebml_id = temp_byte; - for (int i = 1; i < id_length; ++i) { - ebml_id <<= 8; - read_status = pReader->Read(pos + i, 1, &temp_byte); - - if (read_status < 0) - return E_FILE_FORMAT_INVALID; - else if (read_status > 0) - return E_BUFFER_NOT_FULL; - - ebml_id |= temp_byte; - } - - len = id_length; - return ebml_id; -} - -long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) { - if (!pReader || pos < 0) - return E_FILE_FORMAT_INVALID; - - long long total, available; - - int status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return E_FILE_FORMAT_INVALID; - - len = 1; - - if (pos >= available) - return pos; // too few bytes available - - unsigned char b; - - status = pReader->Read(pos, 1, &b); - - if (status != 0) - return status; - - if (b == 0) // we can't handle u-int values larger than 8 bytes - return E_FILE_FORMAT_INVALID; - - unsigned char m = 0x80; - - while (!(b & m)) { - m >>= 1; - ++len; - } - - return 0; // success -} - -// TODO(vigneshv): This function assumes that unsigned values never have their -// high bit set. -long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) { - if (!pReader || pos < 0 || (size <= 0) || (size > 8)) - return E_FILE_FORMAT_INVALID; - - long long result = 0; - - for (long long i = 0; i < size; ++i) { - unsigned char b; - - const long status = pReader->Read(pos, 1, &b); - - if (status < 0) - return status; - - result <<= 8; - result |= b; - - ++pos; - } - - return result; -} - -long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_, - double& result) { - if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8))) - return E_FILE_FORMAT_INVALID; - - const long size = static_cast<long>(size_); - - unsigned char buf[8]; - - const int status = pReader->Read(pos, size, buf); - - if (status < 0) // error - return status; - - if (size == 4) { - union { - float f; - unsigned long ff; - }; - - ff = 0; - - for (int i = 0;;) { - ff |= buf[i]; - - if (++i >= 4) - break; - - ff <<= 8; - } - - result = f; - } else { - union { - double d; - unsigned long long dd; - }; - - dd = 0; - - for (int i = 0;;) { - dd |= buf[i]; - - if (++i >= 8) - break; - - dd <<= 8; - } - - result = d; - } - - if (mkvparser::isinf(result) || mkvparser::isnan(result)) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long UnserializeInt(IMkvReader* pReader, long long pos, long long size, - long long& result_ref) { - if (!pReader || pos < 0 || size < 1 || size > 8) - return E_FILE_FORMAT_INVALID; - - signed char first_byte = 0; - const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte); - - if (status < 0) - return status; - - unsigned long long result = first_byte; - ++pos; - - for (long i = 1; i < size; ++i) { - unsigned char b; - - const long status = pReader->Read(pos, 1, &b); - - if (status < 0) - return status; - - result <<= 8; - result |= b; - - ++pos; - } - - result_ref = static_cast<long long>(result); - return 0; -} - -long UnserializeString(IMkvReader* pReader, long long pos, long long size, - char*& str) { - delete[] str; - str = NULL; - - if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit) - return E_FILE_FORMAT_INVALID; - - // +1 for '\0' terminator - const long required_size = static_cast<long>(size) + 1; - - str = SafeArrayAlloc<char>(1, required_size); - if (str == NULL) - return E_FILE_FORMAT_INVALID; - - unsigned char* const buf = reinterpret_cast<unsigned char*>(str); - - const long status = pReader->Read(pos, static_cast<long>(size), buf); - - if (status) { - delete[] str; - str = NULL; - - return status; - } - - str[required_size - 1] = '\0'; - return 0; -} - -long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop, - long long& id, long long& size) { - if (stop >= 0 && pos >= stop) - return E_FILE_FORMAT_INVALID; - - long len; - - id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume id - - if (stop >= 0 && pos >= stop) - return E_FILE_FORMAT_INVALID; - - size = ReadUInt(pReader, pos, len); - - if (size < 0 || len < 1 || len > 8) { - // Invalid: Negative payload size, negative or 0 length integer, or integer - // larger than 64 bits (libwebm cannot handle them). - return E_FILE_FORMAT_INVALID; - } - - // Avoid rolling over pos when very close to LLONG_MAX. - const unsigned long long rollover_check = - static_cast<unsigned long long>(pos) + len; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size - - // pos now designates payload - - if (stop >= 0 && pos > stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, - long long& val) { - if (!pReader || pos < 0) - return false; - - long long total = 0; - long long available = 0; - - const long status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return false; - - long len = 0; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (available - pos) > len) - return false; - - if (static_cast<unsigned long>(id) != expected_id) - return false; - - pos += len; // consume id - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len) - return false; - - pos += len; // consume length of size of payload - - val = UnserializeUInt(pReader, pos, size); - if (val < 0) - return false; - - pos += size; // consume size of payload - - return true; -} - -bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, - unsigned char*& buf, size_t& buflen) { - if (!pReader || pos < 0) - return false; - - long long total = 0; - long long available = 0; - - long status = pReader->Length(&total, &available); - if (status < 0 || (total >= 0 && available > total)) - return false; - - long len = 0; - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (available - pos) > len) - return false; - - if (static_cast<unsigned long>(id) != expected_id) - return false; - - pos += len; // consume id - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || len <= 0 || len > 8 || (available - pos) > len) - return false; - - unsigned long long rollover_check = - static_cast<unsigned long long>(pos) + len; - if (rollover_check > LLONG_MAX) - return false; - - pos += len; // consume length of size of payload - - rollover_check = static_cast<unsigned long long>(pos) + size; - if (rollover_check > LLONG_MAX) - return false; - - if ((pos + size) > available) - return false; - - if (size >= LONG_MAX) - return false; - - const long buflen_ = static_cast<long>(size); - - buf = SafeArrayAlloc<unsigned char>(1, buflen_); - if (!buf) - return false; - - status = pReader->Read(pos, buflen_, buf); - if (status != 0) - return false; - - buflen = buflen_; - - pos += size; // consume size of payload - return true; -} - -EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); } - -EBMLHeader::~EBMLHeader() { delete[] m_docType; } - -void EBMLHeader::Init() { - m_version = 1; - m_readVersion = 1; - m_maxIdLength = 4; - m_maxSizeLength = 8; - - if (m_docType) { - delete[] m_docType; - m_docType = NULL; - } - - m_docTypeVersion = 1; - m_docTypeReadVersion = 1; -} - -long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { - if (!pReader) - return E_FILE_FORMAT_INVALID; - - long long total, available; - - long status = pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - pos = 0; - - // Scan until we find what looks like the first byte of the EBML header. - const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; - const unsigned char kEbmlByte0 = 0x1A; - unsigned char scan_byte = 0; - - while (pos < kMaxScanBytes) { - status = pReader->Read(pos, 1, &scan_byte); - - if (status < 0) // error - return status; - else if (status > 0) - return E_BUFFER_NOT_FULL; - - if (scan_byte == kEbmlByte0) - break; - - ++pos; - } - - long len = 0; - const long long ebml_id = ReadID(pReader, pos, len); - - if (ebml_id == E_BUFFER_NOT_FULL) - return E_BUFFER_NOT_FULL; - - if (len != 4 || ebml_id != libwebm::kMkvEBML) - return E_FILE_FORMAT_INVALID; - - // Move read pos forward to the EBML header size field. - pos += 4; - - // Read length of size field. - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return E_FILE_FORMAT_INVALID; - else if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if (len < 1 || len > 8) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((total - pos) < len)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < len) - return pos + len; // try again later - - // Read the EBML header size. - result = ReadUInt(pReader, pos, len); - - if (result < 0) // error - return result; - - pos += len; // consume size field - - // pos now designates start of payload - - if ((total >= 0) && ((total - pos) < result)) - return E_FILE_FORMAT_INVALID; - - if ((available - pos) < result) - return pos + result; - - const long long end = pos + result; - - Init(); - - while (pos < end) { - long long id, size; - - status = ParseElementHeader(pReader, pos, end, id, size); - - if (status < 0) // error - return status; - - if (size == 0) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvEBMLVersion) { - m_version = UnserializeUInt(pReader, pos, size); - - if (m_version <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLReadVersion) { - m_readVersion = UnserializeUInt(pReader, pos, size); - - if (m_readVersion <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLMaxIDLength) { - m_maxIdLength = UnserializeUInt(pReader, pos, size); - - if (m_maxIdLength <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvEBMLMaxSizeLength) { - m_maxSizeLength = UnserializeUInt(pReader, pos, size); - - if (m_maxSizeLength <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDocType) { - if (m_docType) - return E_FILE_FORMAT_INVALID; - - status = UnserializeString(pReader, pos, size, m_docType); - - if (status) // error - return status; - } else if (id == libwebm::kMkvDocTypeVersion) { - m_docTypeVersion = UnserializeUInt(pReader, pos, size); - - if (m_docTypeVersion <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDocTypeReadVersion) { - m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); - - if (m_docTypeReadVersion <= 0) - return E_FILE_FORMAT_INVALID; - } - - pos += size; - } - - if (pos != end) - return E_FILE_FORMAT_INVALID; - - // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid. - if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0) - return E_FILE_FORMAT_INVALID; - - // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. - if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 || - m_maxSizeLength > 8) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -Segment::Segment(IMkvReader* pReader, long long elem_start, - // long long elem_size, - long long start, long long size) - : m_pReader(pReader), - m_element_start(elem_start), - // m_element_size(elem_size), - m_start(start), - m_size(size), - m_pos(start), - m_pUnknownSize(0), - m_pSeekHead(NULL), - m_pInfo(NULL), - m_pTracks(NULL), - m_pCues(NULL), - m_pChapters(NULL), - m_pTags(NULL), - m_clusters(NULL), - m_clusterCount(0), - m_clusterPreloadCount(0), - m_clusterSize(0) {} - -Segment::~Segment() { - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** i = m_clusters; - Cluster** j = m_clusters + count; - - while (i != j) { - Cluster* const p = *i++; - delete p; - } - - delete[] m_clusters; - - delete m_pTracks; - delete m_pInfo; - delete m_pCues; - delete m_pChapters; - delete m_pTags; - delete m_pSeekHead; -} - -long long Segment::CreateInstance(IMkvReader* pReader, long long pos, - Segment*& pSegment) { - if (pReader == NULL || pos < 0) - return E_PARSE_FAILED; - - pSegment = NULL; - - long long total, available; - - const long status = pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - if (available < 0) - return -1; - - if ((total >= 0) && (available > total)) - return -1; - - // I would assume that in practice this loop would execute - // exactly once, but we allow for other elements (e.g. Void) - // to immediately follow the EBML header. This is fine for - // the source filter case (since the entire file is available), - // but in the splitter case over a network we should probably - // just give up early. We could for example decide only to - // execute this loop a maximum of, say, 10 times. - // TODO: - // There is an implied "give up early" by only parsing up - // to the available limit. We do do that, but only if the - // total file size is unknown. We could decide to always - // use what's available as our limit (irrespective of whether - // we happen to know the total file length). This would have - // as its sense "parse this much of the file before giving up", - // which a slightly different sense from "try to parse up to - // 10 EMBL elements before giving up". - - for (;;) { - if ((total >= 0) && (pos >= total)) - return E_FILE_FORMAT_INVALID; - - // Read ID - long len; - long long result = GetUIntLength(pReader, pos, len); - - if (result) // error, or too few available bytes - return result; - - if ((total >= 0) && ((pos + len) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long idpos = pos; - const long long id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - // Read Size - - result = GetUIntLength(pReader, pos, len); - - if (result) // error, or too few available bytes - return result; - - if ((total >= 0) && ((pos + len) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return size; - - pos += len; // consume length of size of element - - // Pos now points to start of payload - - // Handle "unknown size" for live streaming of webm files. - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (id == libwebm::kMkvSegment) { - if (size == unknown_size) - size = -1; - - else if (total < 0) - size = -1; - - else if ((pos + size) > total) - size = -1; - - pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size); - if (pSegment == NULL) - return E_PARSE_FAILED; - - return 0; // success - } - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + size) > total)) - return E_FILE_FORMAT_INVALID; - - if ((pos + size) > available) - return pos + size; - - pos += size; // consume payload - } -} - -long long Segment::ParseHeaders() { - // Outermost (level 0) segment object has been constructed, - // and pos designates start of payload. We need to find the - // inner (level 1) elements. - long long total, available; - - const int status = m_pReader->Length(&total, &available); - - if (status < 0) // error - return status; - - if (total > 0 && available > total) - return E_FILE_FORMAT_INVALID; - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - if ((segment_stop >= 0 && total >= 0 && segment_stop > total) || - (segment_stop >= 0 && m_pos > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - for (;;) { - if ((total >= 0) && (m_pos >= total)) - break; - - if ((segment_stop >= 0) && (m_pos >= segment_stop)) - break; - - long long pos = m_pos; - const long long element_start = pos; - - // Avoid rolling over pos when very close to LLONG_MAX. - unsigned long long rollover_check = pos + 1ULL; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > available) - return (pos + 1); - - long len; - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return result; - - if (result > 0) { - // MkvReader doesn't have enough data to satisfy this read attempt. - return (pos + 1); - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvCluster) - break; - - pos += len; // consume ID - - if ((pos + 1) > available) - return (pos + 1); - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return result; - - if (result > 0) { - // MkvReader doesn't have enough data to satisfy this read attempt. - return (pos + 1); - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > available) - return pos + len; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0 || len < 1 || len > 8) { - // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or - // len > 8 is true instead of checking this _everywhere_. - return size; - } - - pos += len; // consume length of size of element - - // Avoid rolling over pos when very close to LLONG_MAX. - rollover_check = static_cast<unsigned long long>(pos) + size; - if (rollover_check > LLONG_MAX) - return E_FILE_FORMAT_INVALID; - - const long long element_size = size + pos - element_start; - - // Pos now points to start of payload - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - // We read EBML elements either in total or nothing at all. - - if ((pos + size) > available) - return pos + size; - - if (id == libwebm::kMkvInfo) { - if (m_pInfo) - return E_FILE_FORMAT_INVALID; - - m_pInfo = new (std::nothrow) - SegmentInfo(this, pos, size, element_start, element_size); - - if (m_pInfo == NULL) - return -1; - - const long status = m_pInfo->Parse(); - - if (status) - return status; - } else if (id == libwebm::kMkvTracks) { - if (m_pTracks) - return E_FILE_FORMAT_INVALID; - - m_pTracks = new (std::nothrow) - Tracks(this, pos, size, element_start, element_size); - - if (m_pTracks == NULL) - return -1; - - const long status = m_pTracks->Parse(); - - if (status) - return status; - } else if (id == libwebm::kMkvCues) { - if (m_pCues == NULL) { - m_pCues = new (std::nothrow) - Cues(this, pos, size, element_start, element_size); - - if (m_pCues == NULL) - return -1; - } - } else if (id == libwebm::kMkvSeekHead) { - if (m_pSeekHead == NULL) { - m_pSeekHead = new (std::nothrow) - SeekHead(this, pos, size, element_start, element_size); - - if (m_pSeekHead == NULL) - return -1; - - const long status = m_pSeekHead->Parse(); - - if (status) - return status; - } - } else if (id == libwebm::kMkvChapters) { - if (m_pChapters == NULL) { - m_pChapters = new (std::nothrow) - Chapters(this, pos, size, element_start, element_size); - - if (m_pChapters == NULL) - return -1; - - const long status = m_pChapters->Parse(); - - if (status) - return status; - } - } else if (id == libwebm::kMkvTags) { - if (m_pTags == NULL) { - m_pTags = new (std::nothrow) - Tags(this, pos, size, element_start, element_size); - - if (m_pTags == NULL) - return -1; - - const long status = m_pTags->Parse(); - - if (status) - return status; - } - } - - m_pos = pos + size; // consume payload - } - - if (segment_stop >= 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - if (m_pInfo == NULL) // TODO: liberalize this behavior - return E_FILE_FORMAT_INVALID; - - if (m_pTracks == NULL) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -long Segment::LoadCluster(long long& pos, long& len) { - for (;;) { - const long result = DoLoadCluster(pos, len); - - if (result <= 1) - return result; - } -} - -long Segment::DoLoadCluster(long long& pos, long& len) { - if (m_pos < 0) - return DoLoadClusterUnknownSize(pos, len); - - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && avail > total) - return E_FILE_FORMAT_INVALID; - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - long long cluster_off = -1; // offset relative to start of segment - long long cluster_size = -1; // size of cluster payload - - for (;;) { - if ((total >= 0) && (m_pos >= total)) - return 1; // no more clusters - - if ((segment_stop >= 0) && (m_pos >= segment_stop)) - return 1; // no more clusters - - pos = m_pos; - - // Read ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - pos += len; // consume length of size of element - - // pos now points to start of payload - - if (size == 0) { - // Missing element payload: move on. - m_pos = pos; - continue; - } - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if ((segment_stop >= 0) && (size != unknown_size) && - ((pos + size) > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - if (id == libwebm::kMkvCues) { - if (size == unknown_size) { - // Cues element of unknown size: Not supported. - return E_FILE_FORMAT_INVALID; - } - - if (m_pCues == NULL) { - const long long element_size = (pos - idpos) + size; - - m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size); - if (m_pCues == NULL) - return -1; - } - - m_pos = pos + size; // consume payload - continue; - } - - if (id != libwebm::kMkvCluster) { - // Besides the Segment, Libwebm allows only cluster elements of unknown - // size. Fail the parse upon encountering a non-cluster element reporting - // unknown size. - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - m_pos = pos + size; // consume payload - continue; - } - - // We have a cluster. - - cluster_off = idpos - m_start; // relative pos - - if (size != unknown_size) - cluster_size = size; - - break; - } - - if (cluster_off < 0) { - // No cluster, die. - return E_FILE_FORMAT_INVALID; - } - - long long pos_; - long len_; - - status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_); - - if (status < 0) { // error, or underflow - pos = pos_; - len = len_; - - return status; - } - - // status == 0 means "no block entries found" - // status > 0 means "found at least one block entry" - - // TODO: - // The issue here is that the segment increments its own - // pos ptr past the most recent cluster parsed, and then - // starts from there to parse the next cluster. If we - // don't know the size of the current cluster, then we - // must either parse its payload (as we do below), looking - // for the cluster (or cues) ID to terminate the parse. - // This isn't really what we want: rather, we really need - // a way to create the curr cluster object immediately. - // The pity is that cluster::parse can determine its own - // boundary, and we largely duplicate that same logic here. - // - // Maybe we need to get rid of our look-ahead preloading - // in source::parse??? - // - // As we're parsing the blocks in the curr cluster - //(in cluster::parse), we should have some way to signal - // to the segment that we have determined the boundary, - // so it can adjust its own segment::m_pos member. - // - // The problem is that we're asserting in asyncreadinit, - // because we adjust the pos down to the curr seek pos, - // and the resulting adjusted len is > 2GB. I'm suspicious - // that this is even correct, but even if it is, we can't - // be loading that much data in the cache anyway. - - const long idx = m_clusterCount; - - if (m_clusterPreloadCount > 0) { - if (idx >= m_clusterSize) - return E_FILE_FORMAT_INVALID; - - Cluster* const pCluster = m_clusters[idx]; - if (pCluster == NULL || pCluster->m_index >= 0) - return E_FILE_FORMAT_INVALID; - - const long long off = pCluster->GetPosition(); - if (off < 0) - return E_FILE_FORMAT_INVALID; - - if (off == cluster_off) { // preloaded already - if (status == 0) // no entries found - return E_FILE_FORMAT_INVALID; - - if (cluster_size >= 0) - pos += cluster_size; - else { - const long long element_size = pCluster->GetElementSize(); - - if (element_size <= 0) - return E_FILE_FORMAT_INVALID; // TODO: handle this case - - pos = pCluster->m_element_start + element_size; - } - - pCluster->m_index = idx; // move from preloaded to loaded - ++m_clusterCount; - --m_clusterPreloadCount; - - m_pos = pos; // consume payload - if (segment_stop >= 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success - } - } - - if (status == 0) { // no entries found - if (cluster_size >= 0) - pos += cluster_size; - - if ((total >= 0) && (pos >= total)) { - m_pos = total; - return 1; // no more clusters - } - - if ((segment_stop >= 0) && (pos >= segment_stop)) { - m_pos = segment_stop; - return 1; // no more clusters - } - - m_pos = pos; - return 2; // try again - } - - // status > 0 means we have an entry - - Cluster* const pCluster = Cluster::Create(this, idx, cluster_off); - if (pCluster == NULL) - return -1; - - if (!AppendCluster(pCluster)) { - delete pCluster; - return -1; - } - - if (cluster_size >= 0) { - pos += cluster_size; - - m_pos = pos; - - if (segment_stop > 0 && m_pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 0; - } - - m_pUnknownSize = pCluster; - m_pos = -pos; - - return 0; // partial success, since we have a new cluster - - // status == 0 means "no block entries found" - // pos designates start of payload - // m_pos has NOT been adjusted yet (in case we need to come back here) -} - -long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { - if (m_pos >= 0 || m_pUnknownSize == NULL) - return E_PARSE_FAILED; - - const long status = m_pUnknownSize->Parse(pos, len); - - if (status < 0) // error or underflow - return status; - - if (status == 0) // parsed a block - return 2; // continue parsing - - const long long start = m_pUnknownSize->m_element_start; - const long long size = m_pUnknownSize->GetElementSize(); - - if (size < 0) - return E_FILE_FORMAT_INVALID; - - pos = start + size; - m_pos = pos; - - m_pUnknownSize = 0; - - return 2; // continue parsing -} - -bool Segment::AppendCluster(Cluster* pCluster) { - if (pCluster == NULL || pCluster->m_index < 0) - return false; - - const long count = m_clusterCount + m_clusterPreloadCount; - - long& size = m_clusterSize; - const long idx = pCluster->m_index; - - if (size < count || idx != m_clusterCount) - return false; - - if (count >= size) { - const long n = (size <= 0) ? 2048 : 2 * size; - - Cluster** const qq = new (std::nothrow) Cluster*[n]; - if (qq == NULL) - return false; - - Cluster** q = qq; - Cluster** p = m_clusters; - Cluster** const pp = p + count; - - while (p != pp) - *q++ = *p++; - - delete[] m_clusters; - - m_clusters = qq; - size = n; - } - - if (m_clusterPreloadCount > 0) { - Cluster** const p = m_clusters + m_clusterCount; - if (*p == NULL || (*p)->m_index >= 0) - return false; - - Cluster** q = p + m_clusterPreloadCount; - if (q >= (m_clusters + size)) - return false; - - for (;;) { - Cluster** const qq = q - 1; - if ((*qq)->m_index >= 0) - return false; - - *q = *qq; - q = qq; - - if (q == p) - break; - } - } - - m_clusters[idx] = pCluster; - ++m_clusterCount; - return true; -} - -bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { - if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount) - return false; - - const long count = m_clusterCount + m_clusterPreloadCount; - - long& size = m_clusterSize; - if (size < count) - return false; - - if (count >= size) { - const long n = (size <= 0) ? 2048 : 2 * size; - - Cluster** const qq = new (std::nothrow) Cluster*[n]; - if (qq == NULL) - return false; - Cluster** q = qq; - - Cluster** p = m_clusters; - Cluster** const pp = p + count; - - while (p != pp) - *q++ = *p++; - - delete[] m_clusters; - - m_clusters = qq; - size = n; - } - - if (m_clusters == NULL) - return false; - - Cluster** const p = m_clusters + idx; - - Cluster** q = m_clusters + count; - if (q < p || q >= (m_clusters + size)) - return false; - - while (q > p) { - Cluster** const qq = q - 1; - - if ((*qq)->m_index >= 0) - return false; - - *q = *qq; - q = qq; - } - - m_clusters[idx] = pCluster; - ++m_clusterPreloadCount; - return true; -} - -long Segment::Load() { - if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0) - return E_PARSE_FAILED; - - // Outermost (level 0) segment object has been constructed, - // and pos designates start of payload. We need to find the - // inner (level 1) elements. - - const long long header_status = ParseHeaders(); - - if (header_status < 0) // error - return static_cast<long>(header_status); - - if (header_status > 0) // underflow - return E_BUFFER_NOT_FULL; - - if (m_pInfo == NULL || m_pTracks == NULL) - return E_FILE_FORMAT_INVALID; - - for (;;) { - const long status = LoadCluster(); - - if (status < 0) // error - return status; - - if (status >= 1) // no more clusters - return 0; - } -} - -SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {} - -SeekHead::SeekHead(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_entries(0), - m_entry_count(0), - m_void_elements(0), - m_void_element_count(0) {} - -SeekHead::~SeekHead() { - delete[] m_entries; - delete[] m_void_elements; -} - -long SeekHead::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; - const long long stop = m_start + m_size; - - // first count the seek head entries - - int entry_count = 0; - int void_element_count = 0; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSeek) - ++entry_count; - else if (id == libwebm::kMkvVoid) - ++void_element_count; - - pos += size; // consume payload - - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - if (entry_count > 0) { - m_entries = new (std::nothrow) Entry[entry_count]; - - if (m_entries == NULL) - return -1; - } - - if (void_element_count > 0) { - m_void_elements = new (std::nothrow) VoidElement[void_element_count]; - - if (m_void_elements == NULL) - return -1; - } - - // now parse the entries and void elements - - Entry* pEntry = m_entries; - VoidElement* pVoidElement = m_void_elements; - - pos = m_start; - - while (pos < stop) { - const long long idpos = pos; - - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSeek && entry_count > 0) { - if (ParseEntry(pReader, pos, size, pEntry)) { - Entry& e = *pEntry++; - - e.element_start = idpos; - e.element_size = (pos + size) - idpos; - } - } else if (id == libwebm::kMkvVoid && void_element_count > 0) { - VoidElement& e = *pVoidElement++; - - e.element_start = idpos; - e.element_size = (pos + size) - idpos; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); - assert(count_ >= 0); - assert(count_ <= entry_count); - - m_entry_count = static_cast<int>(count_); - - count_ = ptrdiff_t(pVoidElement - m_void_elements); - assert(count_ >= 0); - assert(count_ <= void_element_count); - - m_void_element_count = static_cast<int>(count_); - - return 0; -} - -int SeekHead::GetCount() const { return m_entry_count; } - -const SeekHead::Entry* SeekHead::GetEntry(int idx) const { - if (idx < 0) - return 0; - - if (idx >= m_entry_count) - return 0; - - return m_entries + idx; -} - -int SeekHead::GetVoidElementCount() const { return m_void_element_count; } - -const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const { - if (idx < 0) - return 0; - - if (idx >= m_void_element_count) - return 0; - - return m_void_elements + idx; -} - -long Segment::ParseCues(long long off, long long& pos, long& len) { - if (m_pCues) - return 0; // success - - if (off < 0) - return -1; - - long long total, avail; - - const int status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - pos = m_start + off; - - if ((total < 0) || (pos >= total)) - return 1; // don't bother parsing cues - - const long long element_start = pos; - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // underflow (weird) - { - len = 1; - return E_BUFFER_NOT_FULL; - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - - const long long id = ReadID(m_pReader, idpos, len); - - if (id != libwebm::kMkvCues) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - assert((segment_stop < 0) || (pos <= segment_stop)); - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // underflow (weird) - { - len = 1; - return E_BUFFER_NOT_FULL; - } - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - if (size == 0) // weird, although technically not illegal - return 1; // done - - pos += len; // consume length of size of element - assert((segment_stop < 0) || (pos <= segment_stop)); - - // Pos now points to start of payload - - const long long element_stop = pos + size; - - if ((segment_stop >= 0) && (element_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (element_stop > total)) - return 1; // don't bother parsing anymore - - len = static_cast<long>(size); - - if (element_stop > avail) - return E_BUFFER_NOT_FULL; - - const long long element_size = element_stop - element_start; - - m_pCues = - new (std::nothrow) Cues(this, pos, size, element_start, element_size); - if (m_pCues == NULL) - return -1; - - return 0; // success -} - -bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, - Entry* pEntry) { - if (size_ <= 0) - return false; - - long long pos = start; - const long long stop = start + size_; - - long len; - - // parse the container for the level-1 element ID - - const long long seekIdId = ReadID(pReader, pos, len); - if (seekIdId < 0) - return false; - - if (seekIdId != libwebm::kMkvSeekID) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume SeekID id - - const long long seekIdSize = ReadUInt(pReader, pos, len); - - if (seekIdSize <= 0) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume size of field - - if ((pos + seekIdSize) > stop) - return false; - - pEntry->id = ReadID(pReader, pos, len); // payload - - if (pEntry->id <= 0) - return false; - - if (len != seekIdSize) - return false; - - pos += seekIdSize; // consume SeekID payload - - const long long seekPosId = ReadID(pReader, pos, len); - - if (seekPosId != libwebm::kMkvSeekPosition) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume id - - const long long seekPosSize = ReadUInt(pReader, pos, len); - - if (seekPosSize <= 0) - return false; - - if ((pos + len) > stop) - return false; - - pos += len; // consume size - - if ((pos + seekPosSize) > stop) - return false; - - pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize); - - if (pEntry->pos < 0) - return false; - - pos += seekPosSize; // consume payload - - if (pos != stop) - return false; - - return true; -} - -Cues::Cues(Segment* pSegment, long long start_, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start_), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_cue_points(NULL), - m_count(0), - m_preload_count(0), - m_pos(start_) {} - -Cues::~Cues() { - const long n = m_count + m_preload_count; - - CuePoint** p = m_cue_points; - CuePoint** const q = p + n; - - while (p != q) { - CuePoint* const pCP = *p++; - assert(pCP); - - delete pCP; - } - - delete[] m_cue_points; -} - -long Cues::GetCount() const { - if (m_cue_points == NULL) - return -1; - - return m_count; // TODO: really ignore preload count? -} - -bool Cues::DoneParsing() const { - const long long stop = m_start + m_size; - return (m_pos >= stop); -} - -bool Cues::Init() const { - if (m_cue_points) - return true; - - if (m_count != 0 || m_preload_count != 0) - return false; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - const long long stop = m_start + m_size; - long long pos = m_start; - - long cue_points_size = 0; - - while (pos < stop) { - const long long idpos = pos; - - long len; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if (size < 0 || (pos + len > stop)) { - return false; - } - - pos += len; // consume Size field - if (pos + size > stop) { - return false; - } - - if (id == libwebm::kMkvCuePoint) { - if (!PreloadCuePoint(cue_points_size, idpos)) - return false; - } - - pos += size; // skip payload - } - return true; -} - -bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { - if (m_count != 0) - return false; - - if (m_preload_count >= cue_points_size) { - const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size; - - CuePoint** const qq = new (std::nothrow) CuePoint*[n]; - if (qq == NULL) - return false; - - CuePoint** q = qq; // beginning of target - - CuePoint** p = m_cue_points; // beginning of source - CuePoint** const pp = p + m_preload_count; // end of source - - while (p != pp) - *q++ = *p++; - - delete[] m_cue_points; - - m_cue_points = qq; - cue_points_size = n; - } - - CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos); - if (pCP == NULL) - return false; - - m_cue_points[m_preload_count++] = pCP; - return true; -} - -bool Cues::LoadCuePoint() const { - const long long stop = m_start + m_size; - - if (m_pos >= stop) - return false; // nothing else to do - - if (!Init()) { - m_pos = stop; - return false; - } - - IMkvReader* const pReader = m_pSegment->m_pReader; - - while (m_pos < stop) { - const long long idpos = m_pos; - - long len; - - const long long id = ReadID(pReader, m_pos, len); - if (id < 0 || (m_pos + len) > stop) - return false; - - m_pos += len; // consume ID - - const long long size = ReadUInt(pReader, m_pos, len); - if (size < 0 || (m_pos + len) > stop) - return false; - - m_pos += len; // consume Size field - if ((m_pos + size) > stop) - return false; - - if (id != libwebm::kMkvCuePoint) { - m_pos += size; // consume payload - if (m_pos > stop) - return false; - - continue; - } - - if (m_preload_count < 1) - return false; - - CuePoint* const pCP = m_cue_points[m_count]; - if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))) - return false; - - if (!pCP->Load(pReader)) { - m_pos = stop; - return false; - } - ++m_count; - --m_preload_count; - - m_pos += size; // consume payload - if (m_pos > stop) - return false; - - return true; // yes, we loaded a cue point - } - - return false; // no, we did not load a cue point -} - -bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, - const CuePoint::TrackPosition*& pTP) const { - if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0) - return false; - - CuePoint** const ii = m_cue_points; - CuePoint** i = ii; - - CuePoint** const jj = ii + m_count; - CuePoint** j = jj; - - pCP = *i; - if (pCP == NULL) - return false; - - if (time_ns <= pCP->GetTime(m_pSegment)) { - pTP = pCP->Find(pTrack); - return (pTP != NULL); - } - - while (i < j) { - // INVARIANT: - //[ii, i) <= time_ns - //[i, j) ? - //[j, jj) > time_ns - - CuePoint** const k = i + (j - i) / 2; - if (k >= jj) - return false; - - CuePoint* const pCP = *k; - if (pCP == NULL) - return false; - - const long long t = pCP->GetTime(m_pSegment); - - if (t <= time_ns) - i = k + 1; - else - j = k; - - if (i > j) - return false; - } - - if (i != j || i > jj || i <= ii) - return false; - - pCP = *--i; - - if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns) - return false; - - // TODO: here and elsewhere, it's probably not correct to search - // for the cue point with this time, and then search for a matching - // track. In principle, the matching track could be on some earlier - // cue point, and with our current algorithm, we'd miss it. To make - // this bullet-proof, we'd need to create a secondary structure, - // with a list of cue points that apply to a track, and then search - // that track-based structure for a matching cue point. - - pTP = pCP->Find(pTrack); - return (pTP != NULL); -} - -const CuePoint* Cues::GetFirst() const { - if (m_cue_points == NULL || m_count == 0) - return NULL; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL) - return NULL; - - CuePoint* const pCP = pp[0]; - if (pCP == NULL || pCP->GetTimeCode() < 0) - return NULL; - - return pCP; -} - -const CuePoint* Cues::GetLast() const { - if (m_cue_points == NULL || m_count <= 0) - return NULL; - - const long index = m_count - 1; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL) - return NULL; - - CuePoint* const pCP = pp[index]; - if (pCP == NULL || pCP->GetTimeCode() < 0) - return NULL; - - return pCP; -} - -const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { - if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL || - m_count < 1) { - return NULL; - } - - long index = pCurr->m_index; - if (index >= m_count) - return NULL; - - CuePoint* const* const pp = m_cue_points; - if (pp == NULL || pp[index] != pCurr) - return NULL; - - ++index; - - if (index >= m_count) - return NULL; - - CuePoint* const pNext = pp[index]; - - if (pNext == NULL || pNext->GetTimeCode() < 0) - return NULL; - - return pNext; -} - -const BlockEntry* Cues::GetBlock(const CuePoint* pCP, - const CuePoint::TrackPosition* pTP) const { - if (pCP == NULL || pTP == NULL) - return NULL; - - return m_pSegment->GetBlock(*pCP, *pTP); -} - -const BlockEntry* Segment::GetBlock(const CuePoint& cp, - const CuePoint::TrackPosition& tp) { - Cluster** const ii = m_clusters; - Cluster** i = ii; - - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** const jj = ii + count; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[ii, i) < pTP->m_pos - //[i, j) ? - //[j, jj) > pTP->m_pos - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pCluster = *k; - assert(pCluster); - - // const long long pos_ = pCluster->m_pos; - // assert(pos_); - // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); - - const long long pos = pCluster->GetPosition(); - assert(pos >= 0); - - if (pos < tp.m_pos) - i = k + 1; - else if (pos > tp.m_pos) - j = k; - else - return pCluster->GetEntry(cp, tp); - } - - assert(i == j); - // assert(Cluster::HasBlockEntries(this, tp.m_pos)); - - Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); - if (pCluster == NULL) - return NULL; - - const ptrdiff_t idx = i - m_clusters; - - if (!PreloadCluster(pCluster, idx)) { - delete pCluster; - return NULL; - } - assert(m_clusters); - assert(m_clusterPreloadCount > 0); - assert(m_clusters[idx] == pCluster); - - return pCluster->GetEntry(cp, tp); -} - -const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) { - if (requested_pos < 0) - return 0; - - Cluster** const ii = m_clusters; - Cluster** i = ii; - - const long count = m_clusterCount + m_clusterPreloadCount; - - Cluster** const jj = ii + count; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[ii, i) < pTP->m_pos - //[i, j) ? - //[j, jj) > pTP->m_pos - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pCluster = *k; - assert(pCluster); - - // const long long pos_ = pCluster->m_pos; - // assert(pos_); - // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); - - const long long pos = pCluster->GetPosition(); - assert(pos >= 0); - - if (pos < requested_pos) - i = k + 1; - else if (pos > requested_pos) - j = k; - else - return pCluster; - } - - assert(i == j); - // assert(Cluster::HasBlockEntries(this, tp.m_pos)); - - Cluster* const pCluster = Cluster::Create(this, -1, requested_pos); - if (pCluster == NULL) - return NULL; - - const ptrdiff_t idx = i - m_clusters; - - if (!PreloadCluster(pCluster, idx)) { - delete pCluster; - return NULL; - } - assert(m_clusters); - assert(m_clusterPreloadCount > 0); - assert(m_clusters[idx] == pCluster); - - return pCluster; -} - -CuePoint::CuePoint(long idx, long long pos) - : m_element_start(0), - m_element_size(0), - m_index(idx), - m_timecode(-1 * pos), - m_track_positions(NULL), - m_track_positions_count(0) { - assert(pos > 0); -} - -CuePoint::~CuePoint() { delete[] m_track_positions; } - -bool CuePoint::Load(IMkvReader* pReader) { - // odbgstream os; - // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl; - - if (m_timecode >= 0) // already loaded - return true; - - assert(m_track_positions == NULL); - assert(m_track_positions_count == 0); - - long long pos_ = -m_timecode; - const long long element_start = pos_; - - long long stop; - - { - long len; - - const long long id = ReadID(pReader, pos_, len); - if (id != libwebm::kMkvCuePoint) - return false; - - pos_ += len; // consume ID - - const long long size = ReadUInt(pReader, pos_, len); - assert(size >= 0); - - pos_ += len; // consume Size field - // pos_ now points to start of payload - - stop = pos_ + size; - } - - const long long element_size = stop - element_start; - - long long pos = pos_; - - // First count number of track positions - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if ((id < 0) || (pos + len > stop)) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if ((size < 0) || (pos + len > stop)) { - return false; - } - - pos += len; // consume Size field - if ((pos + size) > stop) { - return false; - } - - if (id == libwebm::kMkvCueTime) - m_timecode = UnserializeUInt(pReader, pos, size); - - else if (id == libwebm::kMkvCueTrackPositions) - ++m_track_positions_count; - - pos += size; // consume payload - } - - if (m_timecode < 0 || m_track_positions_count <= 0) { - return false; - } - - // os << "CuePoint::Load(cont'd): idpos=" << idpos - // << " timecode=" << m_timecode - // << endl; - - m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count]; - if (m_track_positions == NULL) - return false; - - // Now parse track positions - - TrackPosition* p = m_track_positions; - pos = pos_; - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) - return false; - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); - assert((pos + len) <= stop); - - pos += len; // consume Size field - assert((pos + size) <= stop); - - if (id == libwebm::kMkvCueTrackPositions) { - TrackPosition& tp = *p++; - if (!tp.Parse(pReader, pos, size)) { - return false; - } - } - - pos += size; // consume payload - if (pos > stop) - return false; - } - - assert(size_t(p - m_track_positions) == m_track_positions_count); - - m_element_start = element_start; - m_element_size = element_size; - - return true; -} - -bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, - long long size_) { - const long long stop = start_ + size_; - long long pos = start_; - - m_track = -1; - m_pos = -1; - m_block = 1; // default - - while (pos < stop) { - long len; - - const long long id = ReadID(pReader, pos, len); - if ((id < 0) || ((pos + len) > stop)) { - return false; - } - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - if ((size < 0) || ((pos + len) > stop)) { - return false; - } - - pos += len; // consume Size field - if ((pos + size) > stop) { - return false; - } - - if (id == libwebm::kMkvCueTrack) - m_track = UnserializeUInt(pReader, pos, size); - else if (id == libwebm::kMkvCueClusterPosition) - m_pos = UnserializeUInt(pReader, pos, size); - else if (id == libwebm::kMkvCueBlockNumber) - m_block = UnserializeUInt(pReader, pos, size); - - pos += size; // consume payload - } - - if ((m_pos < 0) || (m_track <= 0)) { - return false; - } - - return true; -} - -const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const { - if (pTrack == NULL) { - return NULL; - } - - const long long n = pTrack->GetNumber(); - - const TrackPosition* i = m_track_positions; - const TrackPosition* const j = i + m_track_positions_count; - - while (i != j) { - const TrackPosition& p = *i++; - - if (p.m_track == n) - return &p; - } - - return NULL; // no matching track number found -} - -long long CuePoint::GetTimeCode() const { return m_timecode; } - -long long CuePoint::GetTime(const Segment* pSegment) const { - assert(pSegment); - assert(m_timecode >= 0); - - const SegmentInfo* const pInfo = pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long time = scale * m_timecode; - - return time; -} - -bool Segment::DoneParsing() const { - if (m_size < 0) { - long long total, avail; - - const int status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return true; // must assume done - - if (total < 0) - return false; // assume live stream - - return (m_pos >= total); - } - - const long long stop = m_start + m_size; - - return (m_pos >= stop); -} - -const Cluster* Segment::GetFirst() const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - Cluster* const pCluster = m_clusters[0]; - assert(pCluster); - - return pCluster; -} - -const Cluster* Segment::GetLast() const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - const long idx = m_clusterCount - 1; - - Cluster* const pCluster = m_clusters[idx]; - assert(pCluster); - - return pCluster; -} - -unsigned long Segment::GetCount() const { return m_clusterCount; } - -const Cluster* Segment::GetNext(const Cluster* pCurr) { - assert(pCurr); - assert(pCurr != &m_eos); - assert(m_clusters); - - long idx = pCurr->m_index; - - if (idx >= 0) { - assert(m_clusterCount > 0); - assert(idx < m_clusterCount); - assert(pCurr == m_clusters[idx]); - - ++idx; - - if (idx >= m_clusterCount) - return &m_eos; // caller will LoadCluster as desired - - Cluster* const pNext = m_clusters[idx]; - assert(pNext); - assert(pNext->m_index >= 0); - assert(pNext->m_index == idx); - - return pNext; - } - - assert(m_clusterPreloadCount > 0); - - long long pos = pCurr->m_element_start; - - assert(m_size >= 0); // TODO - const long long stop = m_start + m_size; // end of segment - - { - long len; - - long long result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); - assert((pos + len) <= stop); // TODO - if (result != 0) - return NULL; - - const long long id = ReadID(m_pReader, pos, len); - if (id != libwebm::kMkvCluster) - return NULL; - - pos += len; // consume ID - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); // TODO - assert((pos + len) <= stop); // TODO - - const long long size = ReadUInt(m_pReader, pos, len); - assert(size > 0); // TODO - // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); - - pos += len; // consume length of size of element - assert((pos + size) <= stop); // TODO - - // Pos now points to start of payload - - pos += size; // consume payload - } - - long long off_next = 0; - - while (pos < stop) { - long len; - - long long result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); - assert((pos + len) <= stop); // TODO - if (result != 0) - return NULL; - - const long long idpos = pos; // pos of next (potential) cluster - - const long long id = ReadID(m_pReader, idpos, len); - if (id < 0) - return NULL; - - pos += len; // consume ID - - // Read Size - result = GetUIntLength(m_pReader, pos, len); - assert(result == 0); // TODO - assert((pos + len) <= stop); // TODO - - const long long size = ReadUInt(m_pReader, pos, len); - assert(size >= 0); // TODO - - pos += len; // consume length of size of element - assert((pos + size) <= stop); // TODO - - // Pos now points to start of payload - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvCluster) { - const long long off_next_ = idpos - m_start; - - long long pos_; - long len_; - - const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_); - - assert(status >= 0); - - if (status > 0) { - off_next = off_next_; - break; - } - } - - pos += size; // consume payload - } - - if (off_next <= 0) - return 0; - - Cluster** const ii = m_clusters + m_clusterCount; - Cluster** i = ii; - - Cluster** const jj = ii + m_clusterPreloadCount; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[0, i) < pos_next - //[i, j) ? - //[j, jj) > pos_next - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - Cluster* const pNext = *k; - assert(pNext); - assert(pNext->m_index < 0); - - // const long long pos_ = pNext->m_pos; - // assert(pos_); - // pos = pos_ * ((pos_ < 0) ? -1 : 1); - - pos = pNext->GetPosition(); - - if (pos < off_next) - i = k + 1; - else if (pos > off_next) - j = k; - else - return pNext; - } - - assert(i == j); - - Cluster* const pNext = Cluster::Create(this, -1, off_next); - if (pNext == NULL) - return NULL; - - const ptrdiff_t idx_next = i - m_clusters; // insertion position - - if (!PreloadCluster(pNext, idx_next)) { - delete pNext; - return NULL; - } - assert(m_clusters); - assert(idx_next < m_clusterSize); - assert(m_clusters[idx_next] == pNext); - - return pNext; -} - -long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, - long long& pos, long& len) { - assert(pCurr); - assert(!pCurr->EOS()); - assert(m_clusters); - - pResult = 0; - - if (pCurr->m_index >= 0) { // loaded (not merely preloaded) - assert(m_clusters[pCurr->m_index] == pCurr); - - const long next_idx = pCurr->m_index + 1; - - if (next_idx < m_clusterCount) { - pResult = m_clusters[next_idx]; - return 0; // success - } - - // curr cluster is last among loaded - - const long result = LoadCluster(pos, len); - - if (result < 0) // error or underflow - return result; - - if (result > 0) // no more clusters - { - // pResult = &m_eos; - return 1; - } - - pResult = GetLast(); - return 0; // success - } - - assert(m_pos > 0); - - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - // interrogate curr cluster - - pos = pCurr->m_element_start; - - if (pCurr->m_element_size >= 0) - pos += pCurr->m_element_size; - else { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadUInt(m_pReader, pos, len); - - if (id != libwebm::kMkvCluster) - return -1; - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - pos += len; // consume size field - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) // TODO: should never happen - return E_FILE_FORMAT_INVALID; // TODO: resolve this - - // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - // Pos now points to start of payload - - pos += size; // consume payload (that is, the current cluster) - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - // By consuming the payload, we are assuming that the curr - // cluster isn't interesting. That is, we don't bother checking - // whether the payload of the curr cluster is less than what - // happens to be available (obtained via IMkvReader::Length). - // Presumably the caller has already dispensed with the current - // cluster, and really does want the next cluster. - } - - // pos now points to just beyond the last fully-loaded cluster - - for (;;) { - const long status = DoParseNext(pResult, pos, len); - - if (status <= 1) - return status; - } -} - -long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { - long long total, avail; - - long status = m_pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; - - // Parse next cluster. This is strictly a parsing activity. - // Creation of a new cluster object happens later, after the - // parsing is done. - - long long off_next = 0; - long long cluster_size = -1; - - for (;;) { - if ((total >= 0) && (pos >= total)) - return 1; // EOF - - if ((segment_stop >= 0) && (pos >= segment_stop)) - return 1; // EOF - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; // absolute - const long long idoff = pos - m_start; // relative - - const long long id = ReadID(m_pReader, idpos, len); // absolute - - if (id < 0) // error - return static_cast<long>(id); - - if (id == 0) // weird - return -1; // generic error - - pos += len; // consume ID - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - pos += len; // consume length of size of element - - // Pos now points to start of payload - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if ((segment_stop >= 0) && (size != unknown_size) && - ((pos + size) > segment_stop)) { - return E_FILE_FORMAT_INVALID; - } - - if (id == libwebm::kMkvCues) { - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - const long long element_stop = pos + size; - - if ((segment_stop >= 0) && (element_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - const long long element_start = idpos; - const long long element_size = element_stop - element_start; - - if (m_pCues == NULL) { - m_pCues = new (std::nothrow) - Cues(this, pos, size, element_start, element_size); - if (m_pCues == NULL) - return false; - } - - pos += size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - if (id != libwebm::kMkvCluster) { // not a Cluster ID - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - // We have a cluster. - off_next = idoff; - - if (size != unknown_size) - cluster_size = size; - - break; - } - - assert(off_next > 0); // have cluster - - // We have parsed the next cluster. - // We have not created a cluster object yet. What we need - // to do now is determine whether it has already be preloaded - //(in which case, an object for this cluster has already been - // created), and if not, create a new cluster object. - - Cluster** const ii = m_clusters + m_clusterCount; - Cluster** i = ii; - - Cluster** const jj = ii + m_clusterPreloadCount; - Cluster** j = jj; - - while (i < j) { - // INVARIANT: - //[0, i) < pos_next - //[i, j) ? - //[j, jj) > pos_next - - Cluster** const k = i + (j - i) / 2; - assert(k < jj); - - const Cluster* const pNext = *k; - assert(pNext); - assert(pNext->m_index < 0); - - pos = pNext->GetPosition(); - assert(pos >= 0); - - if (pos < off_next) - i = k + 1; - else if (pos > off_next) - j = k; - else { - pResult = pNext; - return 0; // success - } - } - - assert(i == j); - - long long pos_; - long len_; - - status = Cluster::HasBlockEntries(this, off_next, pos_, len_); - - if (status < 0) { // error or underflow - pos = pos_; - len = len_; - - return status; - } - - if (status > 0) { // means "found at least one block entry" - Cluster* const pNext = Cluster::Create(this, - -1, // preloaded - off_next); - if (pNext == NULL) - return -1; - - const ptrdiff_t idx_next = i - m_clusters; // insertion position - - if (!PreloadCluster(pNext, idx_next)) { - delete pNext; - return -1; - } - assert(m_clusters); - assert(idx_next < m_clusterSize); - assert(m_clusters[idx_next] == pNext); - - pResult = pNext; - return 0; // success - } - - // status == 0 means "no block entries found" - - if (cluster_size < 0) { // unknown size - const long long payload_pos = pos; // absolute pos of cluster payload - - for (;;) { // determine cluster size - if ((total >= 0) && (pos >= total)) - break; - - if ((segment_stop >= 0) && (pos >= segment_stop)) - break; // no more clusters - - // Read ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long idpos = pos; - const long long id = ReadID(m_pReader, idpos, len); - - if (id < 0) // error (or underflow) - return static_cast<long>(id); - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues) - break; - - pos += len; // consume ID (of sub-element) - - // Read Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(m_pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(m_pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - pos += len; // consume size field of element - - // pos now points to start of sub-element's payload - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // not allowed for sub-elements - - if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird - return E_FILE_FORMAT_INVALID; - - pos += size; // consume payload of sub-element - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - } // determine cluster size - - cluster_size = pos - payload_pos; - assert(cluster_size >= 0); // TODO: handle cluster_size = 0 - - pos = payload_pos; // reset and re-parse original cluster - } - - pos += cluster_size; // consume payload - if (segment_stop >= 0 && pos > segment_stop) - return E_FILE_FORMAT_INVALID; - - return 2; // try to find a cluster that follows next -} - -const Cluster* Segment::FindCluster(long long time_ns) const { - if ((m_clusters == NULL) || (m_clusterCount <= 0)) - return &m_eos; - - { - Cluster* const pCluster = m_clusters[0]; - assert(pCluster); - assert(pCluster->m_index == 0); - - if (time_ns <= pCluster->GetTime()) - return pCluster; - } - - // Binary search of cluster array - - long i = 0; - long j = m_clusterCount; - - while (i < j) { - // INVARIANT: - //[0, i) <= time_ns - //[i, j) ? - //[j, m_clusterCount) > time_ns - - const long k = i + (j - i) / 2; - assert(k < m_clusterCount); - - Cluster* const pCluster = m_clusters[k]; - assert(pCluster); - assert(pCluster->m_index == k); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - i = k + 1; - else - j = k; - - assert(i <= j); - } - - assert(i == j); - assert(i > 0); - assert(i <= m_clusterCount); - - const long k = i - 1; - - Cluster* const pCluster = m_clusters[k]; - assert(pCluster); - assert(pCluster->m_index == k); - assert(pCluster->GetTime() <= time_ns); - - return pCluster; -} - -const Tracks* Segment::GetTracks() const { return m_pTracks; } -const SegmentInfo* Segment::GetInfo() const { return m_pInfo; } -const Cues* Segment::GetCues() const { return m_pCues; } -const Chapters* Segment::GetChapters() const { return m_pChapters; } -const Tags* Segment::GetTags() const { return m_pTags; } -const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; } - -long long Segment::GetDuration() const { - assert(m_pInfo); - return m_pInfo->GetDuration(); -} - -Chapters::Chapters(Segment* pSegment, long long payload_start, - long long payload_size, long long element_start, - long long element_size) - : m_pSegment(pSegment), - m_start(payload_start), - m_size(payload_size), - m_element_start(element_start), - m_element_size(element_size), - m_editions(NULL), - m_editions_size(0), - m_editions_count(0) {} - -Chapters::~Chapters() { - while (m_editions_count > 0) { - Edition& e = m_editions[--m_editions_count]; - e.Clear(); - } - delete[] m_editions; -} - -long Chapters::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; // payload start - const long long stop = pos + m_size; // payload stop - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvEditionEntry) { - status = ParseEdition(pos, size); - - if (status < 0) // error - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -int Chapters::GetEditionCount() const { return m_editions_count; } - -const Chapters::Edition* Chapters::GetEdition(int idx) const { - if (idx < 0) - return NULL; - - if (idx >= m_editions_count) - return NULL; - - return m_editions + idx; -} - -bool Chapters::ExpandEditionsArray() { - if (m_editions_size > m_editions_count) - return true; // nothing else to do - - const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size; - - Edition* const editions = new (std::nothrow) Edition[size]; - - if (editions == NULL) - return false; - - for (int idx = 0; idx < m_editions_count; ++idx) { - m_editions[idx].ShallowCopy(editions[idx]); - } - - delete[] m_editions; - m_editions = editions; - - m_editions_size = size; - return true; -} - -long Chapters::ParseEdition(long long pos, long long size) { - if (!ExpandEditionsArray()) - return -1; - - Edition& e = m_editions[m_editions_count++]; - e.Init(); - - return e.Parse(m_pSegment->m_pReader, pos, size); -} - -Chapters::Edition::Edition() {} - -Chapters::Edition::~Edition() {} - -int Chapters::Edition::GetAtomCount() const { return m_atoms_count; } - -const Chapters::Atom* Chapters::Edition::GetAtom(int index) const { - if (index < 0) - return NULL; - - if (index >= m_atoms_count) - return NULL; - - return m_atoms + index; -} - -void Chapters::Edition::Init() { - m_atoms = NULL; - m_atoms_size = 0; - m_atoms_count = 0; -} - -void Chapters::Edition::ShallowCopy(Edition& rhs) const { - rhs.m_atoms = m_atoms; - rhs.m_atoms_size = m_atoms_size; - rhs.m_atoms_count = m_atoms_count; -} - -void Chapters::Edition::Clear() { - while (m_atoms_count > 0) { - Atom& a = m_atoms[--m_atoms_count]; - a.Clear(); - } - - delete[] m_atoms; - m_atoms = NULL; - - m_atoms_size = 0; -} - -long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) - continue; - - if (id == libwebm::kMkvChapterAtom) { - status = ParseAtom(pReader, pos, size); - - if (status < 0) // error - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandAtomsArray()) - return -1; - - Atom& a = m_atoms[m_atoms_count++]; - a.Init(); - - return a.Parse(pReader, pos, size); -} - -bool Chapters::Edition::ExpandAtomsArray() { - if (m_atoms_size > m_atoms_count) - return true; // nothing else to do - - const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size; - - Atom* const atoms = new (std::nothrow) Atom[size]; - - if (atoms == NULL) - return false; - - for (int idx = 0; idx < m_atoms_count; ++idx) { - m_atoms[idx].ShallowCopy(atoms[idx]); - } - - delete[] m_atoms; - m_atoms = atoms; - - m_atoms_size = size; - return true; -} - -Chapters::Atom::Atom() {} - -Chapters::Atom::~Atom() {} - -unsigned long long Chapters::Atom::GetUID() const { return m_uid; } - -const char* Chapters::Atom::GetStringUID() const { return m_string_uid; } - -long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; } - -long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; } - -long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const { - return GetTime(pChapters, m_start_timecode); -} - -long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const { - return GetTime(pChapters, m_stop_timecode); -} - -int Chapters::Atom::GetDisplayCount() const { return m_displays_count; } - -const Chapters::Display* Chapters::Atom::GetDisplay(int index) const { - if (index < 0) - return NULL; - - if (index >= m_displays_count) - return NULL; - - return m_displays + index; -} - -void Chapters::Atom::Init() { - m_string_uid = NULL; - m_uid = 0; - m_start_timecode = -1; - m_stop_timecode = -1; - - m_displays = NULL; - m_displays_size = 0; - m_displays_count = 0; -} - -void Chapters::Atom::ShallowCopy(Atom& rhs) const { - rhs.m_string_uid = m_string_uid; - rhs.m_uid = m_uid; - rhs.m_start_timecode = m_start_timecode; - rhs.m_stop_timecode = m_stop_timecode; - - rhs.m_displays = m_displays; - rhs.m_displays_size = m_displays_size; - rhs.m_displays_count = m_displays_count; -} - -void Chapters::Atom::Clear() { - delete[] m_string_uid; - m_string_uid = NULL; - - while (m_displays_count > 0) { - Display& d = m_displays[--m_displays_count]; - d.Clear(); - } - - delete[] m_displays; - m_displays = NULL; - - m_displays_size = 0; -} - -long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // 0 length payload, skip. - continue; - - if (id == libwebm::kMkvChapterDisplay) { - status = ParseDisplay(pReader, pos, size); - - if (status < 0) // error - return status; - } else if (id == libwebm::kMkvChapterStringUID) { - status = UnserializeString(pReader, pos, size, m_string_uid); - - if (status < 0) // error - return status; - } else if (id == libwebm::kMkvChapterUID) { - long long val; - status = UnserializeInt(pReader, pos, size, val); - - if (status < 0) // error - return status; - - m_uid = static_cast<unsigned long long>(val); - } else if (id == libwebm::kMkvChapterTimeStart) { - const long long val = UnserializeUInt(pReader, pos, size); - - if (val < 0) // error - return static_cast<long>(val); - - m_start_timecode = val; - } else if (id == libwebm::kMkvChapterTimeEnd) { - const long long val = UnserializeUInt(pReader, pos, size); - - if (val < 0) // error - return static_cast<long>(val); - - m_stop_timecode = val; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long long Chapters::Atom::GetTime(const Chapters* pChapters, - long long timecode) { - if (pChapters == NULL) - return -1; - - Segment* const pSegment = pChapters->m_pSegment; - - if (pSegment == NULL) // weird - return -1; - - const SegmentInfo* const pInfo = pSegment->GetInfo(); - - if (pInfo == NULL) - return -1; - - const long long timecode_scale = pInfo->GetTimeCodeScale(); - - if (timecode_scale < 1) // weird - return -1; - - if (timecode < 0) - return -1; - - const long long result = timecode_scale * timecode; - - return result; -} - -long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandDisplaysArray()) - return -1; - - Display& d = m_displays[m_displays_count++]; - d.Init(); - - return d.Parse(pReader, pos, size); -} - -bool Chapters::Atom::ExpandDisplaysArray() { - if (m_displays_size > m_displays_count) - return true; // nothing else to do - - const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size; - - Display* const displays = new (std::nothrow) Display[size]; - - if (displays == NULL) - return false; - - for (int idx = 0; idx < m_displays_count; ++idx) { - m_displays[idx].ShallowCopy(displays[idx]); - } - - delete[] m_displays; - m_displays = displays; - - m_displays_size = size; - return true; -} - -Chapters::Display::Display() {} - -Chapters::Display::~Display() {} - -const char* Chapters::Display::GetString() const { return m_string; } - -const char* Chapters::Display::GetLanguage() const { return m_language; } - -const char* Chapters::Display::GetCountry() const { return m_country; } - -void Chapters::Display::Init() { - m_string = NULL; - m_language = NULL; - m_country = NULL; -} - -void Chapters::Display::ShallowCopy(Display& rhs) const { - rhs.m_string = m_string; - rhs.m_language = m_language; - rhs.m_country = m_country; -} - -void Chapters::Display::Clear() { - delete[] m_string; - m_string = NULL; - - delete[] m_language; - m_language = NULL; - - delete[] m_country; - m_country = NULL; -} - -long Chapters::Display::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // No payload. - continue; - - if (id == libwebm::kMkvChapString) { - status = UnserializeString(pReader, pos, size, m_string); - - if (status) - return status; - } else if (id == libwebm::kMkvChapLanguage) { - status = UnserializeString(pReader, pos, size, m_language); - - if (status) - return status; - } else if (id == libwebm::kMkvChapCountry) { - status = UnserializeString(pReader, pos, size, m_country); - - if (status) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(payload_start), - m_size(payload_size), - m_element_start(element_start), - m_element_size(element_size), - m_tags(NULL), - m_tags_size(0), - m_tags_count(0) {} - -Tags::~Tags() { - while (m_tags_count > 0) { - Tag& t = m_tags[--m_tags_count]; - t.Clear(); - } - delete[] m_tags; -} - -long Tags::Parse() { - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; // payload start - const long long stop = pos + m_size; // payload stop - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) - return status; - - if (size == 0) // 0 length tag, read another - continue; - - if (id == libwebm::kMkvTag) { - status = ParseTag(pos, size); - - if (status < 0) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -int Tags::GetTagCount() const { return m_tags_count; } - -const Tags::Tag* Tags::GetTag(int idx) const { - if (idx < 0) - return NULL; - - if (idx >= m_tags_count) - return NULL; - - return m_tags + idx; -} - -bool Tags::ExpandTagsArray() { - if (m_tags_size > m_tags_count) - return true; // nothing else to do - - const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size; - - Tag* const tags = new (std::nothrow) Tag[size]; - - if (tags == NULL) - return false; - - for (int idx = 0; idx < m_tags_count; ++idx) { - m_tags[idx].ShallowCopy(tags[idx]); - } - - delete[] m_tags; - m_tags = tags; - - m_tags_size = size; - return true; -} - -long Tags::ParseTag(long long pos, long long size) { - if (!ExpandTagsArray()) - return -1; - - Tag& t = m_tags[m_tags_count++]; - t.Init(); - - return t.Parse(m_pSegment->m_pReader, pos, size); -} - -Tags::Tag::Tag() {} - -Tags::Tag::~Tag() {} - -int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; } - -const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const { - if (index < 0) - return NULL; - - if (index >= m_simple_tags_count) - return NULL; - - return m_simple_tags + index; -} - -void Tags::Tag::Init() { - m_simple_tags = NULL; - m_simple_tags_size = 0; - m_simple_tags_count = 0; -} - -void Tags::Tag::ShallowCopy(Tag& rhs) const { - rhs.m_simple_tags = m_simple_tags; - rhs.m_simple_tags_size = m_simple_tags_size; - rhs.m_simple_tags_count = m_simple_tags_count; -} - -void Tags::Tag::Clear() { - while (m_simple_tags_count > 0) { - SimpleTag& d = m_simple_tags[--m_simple_tags_count]; - d.Clear(); - } - - delete[] m_simple_tags; - m_simple_tags = NULL; - - m_simple_tags_size = 0; -} - -long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) - return status; - - if (size == 0) // 0 length tag, read another - continue; - - if (id == libwebm::kMkvSimpleTag) { - status = ParseSimpleTag(pReader, pos, size); - - if (status < 0) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos, - long long size) { - if (!ExpandSimpleTagsArray()) - return -1; - - SimpleTag& st = m_simple_tags[m_simple_tags_count++]; - st.Init(); - - return st.Parse(pReader, pos, size); -} - -bool Tags::Tag::ExpandSimpleTagsArray() { - if (m_simple_tags_size > m_simple_tags_count) - return true; // nothing else to do - - const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size; - - SimpleTag* const displays = new (std::nothrow) SimpleTag[size]; - - if (displays == NULL) - return false; - - for (int idx = 0; idx < m_simple_tags_count; ++idx) { - m_simple_tags[idx].ShallowCopy(displays[idx]); - } - - delete[] m_simple_tags; - m_simple_tags = displays; - - m_simple_tags_size = size; - return true; -} - -Tags::SimpleTag::SimpleTag() {} - -Tags::SimpleTag::~SimpleTag() {} - -const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; } - -const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; } - -void Tags::SimpleTag::Init() { - m_tag_name = NULL; - m_tag_string = NULL; -} - -void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const { - rhs.m_tag_name = m_tag_name; - rhs.m_tag_string = m_tag_string; -} - -void Tags::SimpleTag::Clear() { - delete[] m_tag_name; - m_tag_name = NULL; - - delete[] m_tag_string; - m_tag_string = NULL; -} - -long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, - long long size) { - const long long stop = pos + size; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvTagName) { - status = UnserializeString(pReader, pos, size, m_tag_name); - - if (status) - return status; - } else if (id == libwebm::kMkvTagString) { - status = UnserializeString(pReader, pos, size, m_tag_string); - - if (status) - return status; - } - - pos += size; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_pMuxingAppAsUTF8(NULL), - m_pWritingAppAsUTF8(NULL), - m_pTitleAsUTF8(NULL) {} - -SegmentInfo::~SegmentInfo() { - delete[] m_pMuxingAppAsUTF8; - m_pMuxingAppAsUTF8 = NULL; - - delete[] m_pWritingAppAsUTF8; - m_pWritingAppAsUTF8 = NULL; - - delete[] m_pTitleAsUTF8; - m_pTitleAsUTF8 = NULL; -} - -long SegmentInfo::Parse() { - assert(m_pMuxingAppAsUTF8 == NULL); - assert(m_pWritingAppAsUTF8 == NULL); - assert(m_pTitleAsUTF8 == NULL); - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = m_start; - const long long stop = m_start + m_size; - - m_timecodeScale = 1000000; - m_duration = -1; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvTimecodeScale) { - m_timecodeScale = UnserializeUInt(pReader, pos, size); - - if (m_timecodeScale <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDuration) { - const long status = UnserializeFloat(pReader, pos, size, m_duration); - - if (status < 0) - return status; - - if (m_duration < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvMuxingApp) { - const long status = - UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvWritingApp) { - const long status = - UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvTitle) { - const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); - - if (status) - return status; - } - - pos += size; - - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - const double rollover_check = m_duration * m_timecodeScale; - if (rollover_check > static_cast<double>(LLONG_MAX)) - return E_FILE_FORMAT_INVALID; - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; } - -long long SegmentInfo::GetDuration() const { - if (m_duration < 0) - return -1; - - assert(m_timecodeScale >= 1); - - const double dd = double(m_duration) * double(m_timecodeScale); - const long long d = static_cast<long long>(dd); - - return d; -} - -const char* SegmentInfo::GetMuxingAppAsUTF8() const { - return m_pMuxingAppAsUTF8; -} - -const char* SegmentInfo::GetWritingAppAsUTF8() const { - return m_pWritingAppAsUTF8; -} - -const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; } - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -ContentEncoding::ContentCompression::ContentCompression() - : algo(0), settings(NULL), settings_len(0) {} - -ContentEncoding::ContentCompression::~ContentCompression() { - delete[] settings; -} - -ContentEncoding::ContentEncryption::ContentEncryption() - : algo(0), - key_id(NULL), - key_id_len(0), - signature(NULL), - signature_len(0), - sig_key_id(NULL), - sig_key_id_len(0), - sig_algo(0), - sig_hash_algo(0) {} - -ContentEncoding::ContentEncryption::~ContentEncryption() { - delete[] key_id; - delete[] signature; - delete[] sig_key_id; -} - -ContentEncoding::ContentEncoding() - : compression_entries_(NULL), - compression_entries_end_(NULL), - encryption_entries_(NULL), - encryption_entries_end_(NULL), - encoding_order_(0), - encoding_scope_(1), - encoding_type_(0) {} - -ContentEncoding::~ContentEncoding() { - ContentCompression** comp_i = compression_entries_; - ContentCompression** const comp_j = compression_entries_end_; - - while (comp_i != comp_j) { - ContentCompression* const comp = *comp_i++; - delete comp; - } - - delete[] compression_entries_; - - ContentEncryption** enc_i = encryption_entries_; - ContentEncryption** const enc_j = encryption_entries_end_; - - while (enc_i != enc_j) { - ContentEncryption* const enc = *enc_i++; - delete enc; - } - - delete[] encryption_entries_; -} - -const ContentEncoding::ContentCompression* -ContentEncoding::GetCompressionByIndex(unsigned long idx) const { - const ptrdiff_t count = compression_entries_end_ - compression_entries_; - assert(count >= 0); - - if (idx >= static_cast<unsigned long>(count)) - return NULL; - - return compression_entries_[idx]; -} - -unsigned long ContentEncoding::GetCompressionCount() const { - const ptrdiff_t count = compression_entries_end_ - compression_entries_; - assert(count >= 0); - - return static_cast<unsigned long>(count); -} - -const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex( - unsigned long idx) const { - const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; - assert(count >= 0); - - if (idx >= static_cast<unsigned long>(count)) - return NULL; - - return encryption_entries_[idx]; -} - -unsigned long ContentEncoding::GetEncryptionCount() const { - const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; - assert(count >= 0); - - return static_cast<unsigned long>(count); -} - -long ContentEncoding::ParseContentEncAESSettingsEntry( - long long start, long long size, IMkvReader* pReader, - ContentEncAESSettings* aes) { - assert(pReader); - assert(aes); - - long long pos = start; - const long long stop = start + size; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvAESSettingsCipherMode) { - aes->cipher_mode = UnserializeUInt(pReader, pos, size); - if (aes->cipher_mode != 1) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; -} - -long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, - IMkvReader* pReader) { - assert(pReader); - - long long pos = start; - const long long stop = start + size; - - // Count ContentCompression and ContentEncryption elements. - int compression_count = 0; - int encryption_count = 0; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentCompression) - ++compression_count; - - if (id == libwebm::kMkvContentEncryption) - ++encryption_count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (compression_count <= 0 && encryption_count <= 0) - return -1; - - if (compression_count > 0) { - compression_entries_ = - new (std::nothrow) ContentCompression*[compression_count]; - if (!compression_entries_) - return -1; - compression_entries_end_ = compression_entries_; - } - - if (encryption_count > 0) { - encryption_entries_ = - new (std::nothrow) ContentEncryption*[encryption_count]; - if (!encryption_entries_) { - delete[] compression_entries_; - return -1; - } - encryption_entries_end_ = encryption_entries_; - } - - pos = start; - while (pos < stop) { - long long id, size; - long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentEncodingOrder) { - encoding_order_ = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentEncodingScope) { - encoding_scope_ = UnserializeUInt(pReader, pos, size); - if (encoding_scope_ < 1) - return -1; - } else if (id == libwebm::kMkvContentEncodingType) { - encoding_type_ = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentCompression) { - ContentCompression* const compression = - new (std::nothrow) ContentCompression(); - if (!compression) - return -1; - - status = ParseCompressionEntry(pos, size, pReader, compression); - if (status) { - delete compression; - return status; - } - *compression_entries_end_++ = compression; - } else if (id == libwebm::kMkvContentEncryption) { - ContentEncryption* const encryption = - new (std::nothrow) ContentEncryption(); - if (!encryption) - return -1; - - status = ParseEncryptionEntry(pos, size, pReader, encryption); - if (status) { - delete encryption; - return status; - } - *encryption_entries_end_++ = encryption; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - return 0; -} - -long ContentEncoding::ParseCompressionEntry(long long start, long long size, - IMkvReader* pReader, - ContentCompression* compression) { - assert(pReader); - assert(compression); - - long long pos = start; - const long long stop = start + size; - - bool valid = false; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentCompAlgo) { - long long algo = UnserializeUInt(pReader, pos, size); - if (algo < 0) - return E_FILE_FORMAT_INVALID; - compression->algo = algo; - valid = true; - } else if (id == libwebm::kMkvContentCompSettings) { - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast<size_t>(size); - unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast<long>(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - compression->settings = buf; - compression->settings_len = buflen; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - // ContentCompAlgo is mandatory - if (!valid) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -long ContentEncoding::ParseEncryptionEntry(long long start, long long size, - IMkvReader* pReader, - ContentEncryption* encryption) { - assert(pReader); - assert(encryption); - - long long pos = start; - const long long stop = start + size; - - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - if (id == libwebm::kMkvContentEncAlgo) { - encryption->algo = UnserializeUInt(pReader, pos, size); - if (encryption->algo != 5) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvContentEncKeyID) { - delete[] encryption->key_id; - encryption->key_id = NULL; - encryption->key_id_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast<size_t>(size); - unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast<long>(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->key_id = buf; - encryption->key_id_len = buflen; - } else if (id == libwebm::kMkvContentSignature) { - delete[] encryption->signature; - encryption->signature = NULL; - encryption->signature_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast<size_t>(size); - unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast<long>(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->signature = buf; - encryption->signature_len = buflen; - } else if (id == libwebm::kMkvContentSigKeyID) { - delete[] encryption->sig_key_id; - encryption->sig_key_id = NULL; - encryption->sig_key_id_len = 0; - - if (size <= 0) - return E_FILE_FORMAT_INVALID; - - const size_t buflen = static_cast<size_t>(size); - unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); - if (buf == NULL) - return -1; - - const int read_status = - pReader->Read(pos, static_cast<long>(buflen), buf); - if (read_status) { - delete[] buf; - return status; - } - - encryption->sig_key_id = buf; - encryption->sig_key_id_len = buflen; - } else if (id == libwebm::kMkvContentSigAlgo) { - encryption->sig_algo = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentSigHashAlgo) { - encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvContentEncAESSettings) { - const long status = ParseContentEncAESSettingsEntry( - pos, size, pReader, &encryption->aes_settings); - if (status) - return status; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; -} - -Track::Track(Segment* pSegment, long long element_start, long long element_size) - : m_pSegment(pSegment), - m_element_start(element_start), - m_element_size(element_size), - content_encoding_entries_(NULL), - content_encoding_entries_end_(NULL) {} - -Track::~Track() { - Info& info = const_cast<Info&>(m_info); - info.Clear(); - - ContentEncoding** i = content_encoding_entries_; - ContentEncoding** const j = content_encoding_entries_end_; - - while (i != j) { - ContentEncoding* const encoding = *i++; - delete encoding; - } - - delete[] content_encoding_entries_; -} - -long Track::Create(Segment* pSegment, const Info& info, long long element_start, - long long element_size, Track*& pResult) { - if (pResult) - return -1; - - Track* const pTrack = - new (std::nothrow) Track(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { // error - delete pTrack; - return status; - } - - pResult = pTrack; - return 0; // success -} - -Track::Info::Info() - : uid(0), - defaultDuration(0), - codecDelay(0), - seekPreRoll(0), - nameAsUTF8(NULL), - language(NULL), - codecId(NULL), - codecNameAsUTF8(NULL), - codecPrivate(NULL), - codecPrivateSize(0), - lacing(false) {} - -Track::Info::~Info() { Clear(); } - -void Track::Info::Clear() { - delete[] nameAsUTF8; - nameAsUTF8 = NULL; - - delete[] language; - language = NULL; - - delete[] codecId; - codecId = NULL; - - delete[] codecPrivate; - codecPrivate = NULL; - codecPrivateSize = 0; - - delete[] codecNameAsUTF8; - codecNameAsUTF8 = NULL; -} - -int Track::Info::CopyStr(char* Info::*str, Info& dst_) const { - if (str == static_cast<char * Info::*>(NULL)) - return -1; - - char*& dst = dst_.*str; - - if (dst) // should be NULL already - return -1; - - const char* const src = this->*str; - - if (src == NULL) - return 0; - - const size_t len = strlen(src); - - dst = SafeArrayAlloc<char>(1, len + 1); - - if (dst == NULL) - return -1; - - strcpy(dst, src); - - return 0; -} - -int Track::Info::Copy(Info& dst) const { - if (&dst == this) - return 0; - - dst.type = type; - dst.number = number; - dst.defaultDuration = defaultDuration; - dst.codecDelay = codecDelay; - dst.seekPreRoll = seekPreRoll; - dst.uid = uid; - dst.lacing = lacing; - dst.settings = settings; - - // We now copy the string member variables from src to dst. - // This involves memory allocation so in principle the operation - // can fail (indeed, that's why we have Info::Copy), so we must - // report this to the caller. An error return from this function - // therefore implies that the copy was only partially successful. - - if (int status = CopyStr(&Info::nameAsUTF8, dst)) - return status; - - if (int status = CopyStr(&Info::language, dst)) - return status; - - if (int status = CopyStr(&Info::codecId, dst)) - return status; - - if (int status = CopyStr(&Info::codecNameAsUTF8, dst)) - return status; - - if (codecPrivateSize > 0) { - if (codecPrivate == NULL) - return -1; - - if (dst.codecPrivate) - return -1; - - if (dst.codecPrivateSize != 0) - return -1; - - dst.codecPrivate = SafeArrayAlloc<unsigned char>(1, codecPrivateSize); - - if (dst.codecPrivate == NULL) - return -1; - - memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize); - dst.codecPrivateSize = codecPrivateSize; - } - - return 0; -} - -const BlockEntry* Track::GetEOS() const { return &m_eos; } - -long Track::GetType() const { return m_info.type; } - -long Track::GetNumber() const { return m_info.number; } - -unsigned long long Track::GetUid() const { return m_info.uid; } - -const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; } - -const char* Track::GetLanguage() const { return m_info.language; } - -const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; } - -const char* Track::GetCodecId() const { return m_info.codecId; } - -const unsigned char* Track::GetCodecPrivate(size_t& size) const { - size = m_info.codecPrivateSize; - return m_info.codecPrivate; -} - -bool Track::GetLacing() const { return m_info.lacing; } - -unsigned long long Track::GetDefaultDuration() const { - return m_info.defaultDuration; -} - -unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; } - -unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; } - -long Track::GetFirst(const BlockEntry*& pBlockEntry) const { - const Cluster* pCluster = m_pSegment->GetFirst(); - - for (int i = 0;;) { - if (pCluster == NULL) { - pBlockEntry = GetEOS(); - return 1; - } - - if (pCluster->EOS()) { - if (m_pSegment->DoneParsing()) { - pBlockEntry = GetEOS(); - return 1; - } - - pBlockEntry = 0; - return E_BUFFER_NOT_FULL; - } - - long status = pCluster->GetFirst(pBlockEntry); - - if (status < 0) // error - return status; - - if (pBlockEntry == 0) { // empty cluster - pCluster = m_pSegment->GetNext(pCluster); - continue; - } - - for (;;) { - const Block* const pBlock = pBlockEntry->GetBlock(); - assert(pBlock); - - const long long tn = pBlock->GetTrackNumber(); - - if ((tn == m_info.number) && VetEntry(pBlockEntry)) - return 0; - - const BlockEntry* pNextEntry; - - status = pCluster->GetNext(pBlockEntry, pNextEntry); - - if (status < 0) // error - return status; - - if (pNextEntry == 0) - break; - - pBlockEntry = pNextEntry; - } - - ++i; - - if (i >= 100) - break; - - pCluster = m_pSegment->GetNext(pCluster); - } - - // NOTE: if we get here, it means that we didn't find a block with - // a matching track number. We interpret that as an error (which - // might be too conservative). - - pBlockEntry = GetEOS(); // so we can return a non-NULL value - return 1; -} - -long Track::GetNext(const BlockEntry* pCurrEntry, - const BlockEntry*& pNextEntry) const { - assert(pCurrEntry); - assert(!pCurrEntry->EOS()); //? - - const Block* const pCurrBlock = pCurrEntry->GetBlock(); - assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number); - if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number) - return -1; - - const Cluster* pCluster = pCurrEntry->GetCluster(); - assert(pCluster); - assert(!pCluster->EOS()); - - long status = pCluster->GetNext(pCurrEntry, pNextEntry); - - if (status < 0) // error - return status; - - for (int i = 0;;) { - while (pNextEntry) { - const Block* const pNextBlock = pNextEntry->GetBlock(); - assert(pNextBlock); - - if (pNextBlock->GetTrackNumber() == m_info.number) - return 0; - - pCurrEntry = pNextEntry; - - status = pCluster->GetNext(pCurrEntry, pNextEntry); - - if (status < 0) // error - return status; - } - - pCluster = m_pSegment->GetNext(pCluster); - - if (pCluster == NULL) { - pNextEntry = GetEOS(); - return 1; - } - - if (pCluster->EOS()) { - if (m_pSegment->DoneParsing()) { - pNextEntry = GetEOS(); - return 1; - } - - // TODO: there is a potential O(n^2) problem here: we tell the - // caller to (pre)load another cluster, which he does, but then he - // calls GetNext again, which repeats the same search. This is - // a pathological case, since the only way it can happen is if - // there exists a long sequence of clusters none of which contain a - // block from this track. One way around this problem is for the - // caller to be smarter when he loads another cluster: don't call - // us back until you have a cluster that contains a block from this - // track. (Of course, that's not cheap either, since our caller - // would have to scan the each cluster as it's loaded, so that - // would just push back the problem.) - - pNextEntry = NULL; - return E_BUFFER_NOT_FULL; - } - - status = pCluster->GetFirst(pNextEntry); - - if (status < 0) // error - return status; - - if (pNextEntry == NULL) // empty cluster - continue; - - ++i; - - if (i >= 100) - break; - } - - // NOTE: if we get here, it means that we didn't find a block with - // a matching track number after lots of searching, so we give - // up trying. - - pNextEntry = GetEOS(); // so we can return a non-NULL value - return 1; -} - -bool Track::VetEntry(const BlockEntry* pBlockEntry) const { - assert(pBlockEntry); - const Block* const pBlock = pBlockEntry->GetBlock(); - assert(pBlock); - assert(pBlock->GetTrackNumber() == m_info.number); - if (!pBlock || pBlock->GetTrackNumber() != m_info.number) - return false; - - // This function is used during a seek to determine whether the - // frame is a valid seek target. This default function simply - // returns true, which means all frames are valid seek targets. - // It gets overridden by the VideoTrack class, because only video - // keyframes can be used as seek target. - - return true; -} - -long Track::Seek(long long time_ns, const BlockEntry*& pResult) const { - const long status = GetFirst(pResult); - - if (status < 0) // buffer underflow, etc - return status; - - assert(pResult); - - if (pResult->EOS()) - return 0; - - const Cluster* pCluster = pResult->GetCluster(); - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - - if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) - return 0; - - Cluster** const clusters = m_pSegment->m_clusters; - assert(clusters); - - const long count = m_pSegment->GetCount(); // loaded only, not preloaded - assert(count > 0); - - Cluster** const i = clusters + pCluster->GetIndex(); - assert(i); - assert(*i == pCluster); - assert(pCluster->GetTime() <= time_ns); - - Cluster** const j = clusters + count; - - Cluster** lo = i; - Cluster** hi = j; - - while (lo < hi) { - // INVARIANT: - //[i, lo) <= time_ns - //[lo, hi) ? - //[hi, j) > time_ns - - Cluster** const mid = lo + (hi - lo) / 2; - assert(mid < hi); - - pCluster = *mid; - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - lo = mid + 1; - else - hi = mid; - - assert(lo <= hi); - } - - assert(lo == hi); - assert(lo > i); - assert(lo <= j); - - while (lo > i) { - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this); - - if ((pResult != 0) && !pResult->EOS()) - return 0; - - // landed on empty cluster (no entries) - } - - pResult = GetEOS(); // weird - return 0; -} - -const ContentEncoding* Track::GetContentEncodingByIndex( - unsigned long idx) const { - const ptrdiff_t count = - content_encoding_entries_end_ - content_encoding_entries_; - assert(count >= 0); - - if (idx >= static_cast<unsigned long>(count)) - return NULL; - - return content_encoding_entries_[idx]; -} - -unsigned long Track::GetContentEncodingCount() const { - const ptrdiff_t count = - content_encoding_entries_end_ - content_encoding_entries_; - assert(count >= 0); - - return static_cast<unsigned long>(count); -} - -long Track::ParseContentEncodingsEntry(long long start, long long size) { - IMkvReader* const pReader = m_pSegment->m_pReader; - assert(pReader); - - long long pos = start; - const long long stop = start + size; - - // Count ContentEncoding elements. - int count = 0; - while (pos < stop) { - long long id, size; - const long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - // pos now designates start of element - if (id == libwebm::kMkvContentEncoding) - ++count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (count <= 0) - return -1; - - content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count]; - if (!content_encoding_entries_) - return -1; - - content_encoding_entries_end_ = content_encoding_entries_; - - pos = start; - while (pos < stop) { - long long id, size; - long status = ParseElementHeader(pReader, pos, stop, id, size); - if (status < 0) // error - return status; - - // pos now designates start of element - if (id == libwebm::kMkvContentEncoding) { - ContentEncoding* const content_encoding = - new (std::nothrow) ContentEncoding(); - if (!content_encoding) - return -1; - - status = content_encoding->ParseContentEncodingEntry(pos, size, pReader); - if (status) { - delete content_encoding; - return status; - } - - *content_encoding_entries_end_++ = content_encoding; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; -} - -Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {} - -BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; } - -const Block* Track::EOSBlock::GetBlock() const { return NULL; } - -bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos, - long long value_size, bool is_x, - PrimaryChromaticity** chromaticity) { - if (!reader) - return false; - - if (!*chromaticity) - *chromaticity = new PrimaryChromaticity(); - - if (!*chromaticity) - return false; - - PrimaryChromaticity* pc = *chromaticity; - float* value = is_x ? &pc->x : &pc->y; - - double parser_value = 0; - const long long parse_status = - UnserializeFloat(reader, read_pos, value_size, parser_value); - - // Valid range is [0, 1]. Make sure the double is representable as a float - // before casting. - if (parse_status < 0 || parser_value < 0.0 || parser_value > 1.0 || - (parser_value > 0.0 && parser_value < FLT_MIN)) - return false; - - *value = static_cast<float>(parser_value); - - return true; -} - -bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, - long long mm_size, MasteringMetadata** mm) { - if (!reader || *mm) - return false; - - std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); - if (!mm_ptr.get()) - return false; - - const long long mm_end = mm_start + mm_size; - long long read_pos = mm_start; - - while (read_pos < mm_end) { - long long child_id = 0; - long long child_size = 0; - - const long long status = - ParseElementHeader(reader, read_pos, mm_end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvLuminanceMax) { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - if (value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - mm_ptr->luminance_max = static_cast<float>(value); - if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || - mm_ptr->luminance_max > 9999.99) { - return false; - } - } else if (child_id == libwebm::kMkvLuminanceMin) { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - if (value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - mm_ptr->luminance_min = static_cast<float>(value); - if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || - mm_ptr->luminance_min > 999.9999) { - return false; - } - } else { - bool is_x = false; - PrimaryChromaticity** chromaticity; - switch (child_id) { - case libwebm::kMkvPrimaryRChromaticityX: - case libwebm::kMkvPrimaryRChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryRChromaticityX; - chromaticity = &mm_ptr->r; - break; - case libwebm::kMkvPrimaryGChromaticityX: - case libwebm::kMkvPrimaryGChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryGChromaticityX; - chromaticity = &mm_ptr->g; - break; - case libwebm::kMkvPrimaryBChromaticityX: - case libwebm::kMkvPrimaryBChromaticityY: - is_x = child_id == libwebm::kMkvPrimaryBChromaticityX; - chromaticity = &mm_ptr->b; - break; - case libwebm::kMkvWhitePointChromaticityX: - case libwebm::kMkvWhitePointChromaticityY: - is_x = child_id == libwebm::kMkvWhitePointChromaticityX; - chromaticity = &mm_ptr->white_point; - break; - default: - return false; - } - const bool value_parse_status = PrimaryChromaticity::Parse( - reader, read_pos, child_size, is_x, chromaticity); - if (!value_parse_status) - return false; - } - - read_pos += child_size; - if (read_pos > mm_end) - return false; - } - - *mm = mm_ptr.release(); - return true; -} - -bool Colour::Parse(IMkvReader* reader, long long colour_start, - long long colour_size, Colour** colour) { - if (!reader || *colour) - return false; - - std::unique_ptr<Colour> colour_ptr(new Colour()); - if (!colour_ptr.get()) - return false; - - const long long colour_end = colour_start + colour_size; - long long read_pos = colour_start; - - while (read_pos < colour_end) { - long long child_id = 0; - long long child_size = 0; - - const long status = - ParseElementHeader(reader, read_pos, colour_end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvMatrixCoefficients) { - colour_ptr->matrix_coefficients = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->matrix_coefficients < 0) - return false; - } else if (child_id == libwebm::kMkvBitsPerChannel) { - colour_ptr->bits_per_channel = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->bits_per_channel < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) { - colour_ptr->chroma_subsampling_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_subsampling_horz < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSubsamplingVert) { - colour_ptr->chroma_subsampling_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_subsampling_vert < 0) - return false; - } else if (child_id == libwebm::kMkvCbSubsamplingHorz) { - colour_ptr->cb_subsampling_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->cb_subsampling_horz < 0) - return false; - } else if (child_id == libwebm::kMkvCbSubsamplingVert) { - colour_ptr->cb_subsampling_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->cb_subsampling_vert < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSitingHorz) { - colour_ptr->chroma_siting_horz = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_siting_horz < 0) - return false; - } else if (child_id == libwebm::kMkvChromaSitingVert) { - colour_ptr->chroma_siting_vert = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->chroma_siting_vert < 0) - return false; - } else if (child_id == libwebm::kMkvRange) { - colour_ptr->range = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->range < 0) - return false; - } else if (child_id == libwebm::kMkvTransferCharacteristics) { - colour_ptr->transfer_characteristics = - UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->transfer_characteristics < 0) - return false; - } else if (child_id == libwebm::kMkvPrimaries) { - colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->primaries < 0) - return false; - } else if (child_id == libwebm::kMkvMaxCLL) { - colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->max_cll < 0) - return false; - } else if (child_id == libwebm::kMkvMaxFALL) { - colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size); - if (colour_ptr->max_fall < 0) - return false; - } else if (child_id == libwebm::kMkvMasteringMetadata) { - if (!MasteringMetadata::Parse(reader, read_pos, child_size, - &colour_ptr->mastering_metadata)) - return false; - } else { - return false; - } - - read_pos += child_size; - if (read_pos > colour_end) - return false; - } - *colour = colour_ptr.release(); - return true; -} - -bool Projection::Parse(IMkvReader* reader, long long start, long long size, - Projection** projection) { - if (!reader || *projection) - return false; - - std::unique_ptr<Projection> projection_ptr(new Projection()); - if (!projection_ptr.get()) - return false; - - const long long end = start + size; - long long read_pos = start; - - while (read_pos < end) { - long long child_id = 0; - long long child_size = 0; - - const long long status = - ParseElementHeader(reader, read_pos, end, child_id, child_size); - if (status < 0) - return false; - - if (child_id == libwebm::kMkvProjectionType) { - long long projection_type = kTypeNotPresent; - projection_type = UnserializeUInt(reader, read_pos, child_size); - if (projection_type < 0) - return false; - - projection_ptr->type = static_cast<ProjectionType>(projection_type); - } else if (child_id == libwebm::kMkvProjectionPrivate) { - unsigned char* data = SafeArrayAlloc<unsigned char>(1, child_size); - - if (data == NULL) - return false; - - const int status = - reader->Read(read_pos, static_cast<long>(child_size), data); - - if (status) { - delete[] data; - return false; - } - - projection_ptr->private_data = data; - projection_ptr->private_data_length = static_cast<size_t>(child_size); - } else { - double value = 0; - const long long value_parse_status = - UnserializeFloat(reader, read_pos, child_size, value); - // Make sure value is representable as a float before casting. - if (value_parse_status < 0 || value < -FLT_MAX || value > FLT_MAX || - (value > 0.0 && value < FLT_MIN)) { - return false; - } - - switch (child_id) { - case libwebm::kMkvProjectionPoseYaw: - projection_ptr->pose_yaw = static_cast<float>(value); - break; - case libwebm::kMkvProjectionPosePitch: - projection_ptr->pose_pitch = static_cast<float>(value); - break; - case libwebm::kMkvProjectionPoseRoll: - projection_ptr->pose_roll = static_cast<float>(value); - break; - default: - return false; - } - } - - read_pos += child_size; - if (read_pos > end) - return false; - } - - *projection = projection_ptr.release(); - return true; -} - -VideoTrack::VideoTrack(Segment* pSegment, long long element_start, - long long element_size) - : Track(pSegment, element_start, element_size), - m_colour(NULL), - m_projection(NULL) {} - -VideoTrack::~VideoTrack() { - delete m_colour; - delete m_projection; -} - -long VideoTrack::Parse(Segment* pSegment, const Info& info, - long long element_start, long long element_size, - VideoTrack*& pResult) { - if (pResult) - return -1; - - if (info.type != Track::kVideo) - return -1; - - long long width = 0; - long long height = 0; - long long display_width = 0; - long long display_height = 0; - long long display_unit = 0; - long long stereo_mode = 0; - - double rate = 0.0; - - IMkvReader* const pReader = pSegment->m_pReader; - - const Settings& s = info.settings; - assert(s.start >= 0); - assert(s.size >= 0); - - long long pos = s.start; - assert(pos >= 0); - - const long long stop = pos + s.size; - - Colour* colour = NULL; - Projection* projection = NULL; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvPixelWidth) { - width = UnserializeUInt(pReader, pos, size); - - if (width <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvPixelHeight) { - height = UnserializeUInt(pReader, pos, size); - - if (height <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayWidth) { - display_width = UnserializeUInt(pReader, pos, size); - - if (display_width <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayHeight) { - display_height = UnserializeUInt(pReader, pos, size); - - if (display_height <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvDisplayUnit) { - display_unit = UnserializeUInt(pReader, pos, size); - - if (display_unit < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvStereoMode) { - stereo_mode = UnserializeUInt(pReader, pos, size); - - if (stereo_mode < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvFrameRate) { - const long status = UnserializeFloat(pReader, pos, size, rate); - - if (status < 0) - return status; - - if (rate <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvColour) { - if (!Colour::Parse(pReader, pos, size, &colour)) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvProjection) { - if (!Projection::Parse(pReader, pos, size, &projection)) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - VideoTrack* const pTrack = - new (std::nothrow) VideoTrack(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { // error - delete pTrack; - return status; - } - - pTrack->m_width = width; - pTrack->m_height = height; - pTrack->m_display_width = display_width; - pTrack->m_display_height = display_height; - pTrack->m_display_unit = display_unit; - pTrack->m_stereo_mode = stereo_mode; - pTrack->m_rate = rate; - pTrack->m_colour = colour; - pTrack->m_projection = projection; - - pResult = pTrack; - return 0; // success -} - -bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const { - return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey(); -} - -long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const { - const long status = GetFirst(pResult); - - if (status < 0) // buffer underflow, etc - return status; - - assert(pResult); - - if (pResult->EOS()) - return 0; - - const Cluster* pCluster = pResult->GetCluster(); - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - - if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) - return 0; - - Cluster** const clusters = m_pSegment->m_clusters; - assert(clusters); - - const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded - assert(count > 0); - - Cluster** const i = clusters + pCluster->GetIndex(); - assert(i); - assert(*i == pCluster); - assert(pCluster->GetTime() <= time_ns); - - Cluster** const j = clusters + count; - - Cluster** lo = i; - Cluster** hi = j; - - while (lo < hi) { - // INVARIANT: - //[i, lo) <= time_ns - //[lo, hi) ? - //[hi, j) > time_ns - - Cluster** const mid = lo + (hi - lo) / 2; - assert(mid < hi); - - pCluster = *mid; - assert(pCluster); - assert(pCluster->GetIndex() >= 0); - assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); - - const long long t = pCluster->GetTime(); - - if (t <= time_ns) - lo = mid + 1; - else - hi = mid; - - assert(lo <= hi); - } - - assert(lo == hi); - assert(lo > i); - assert(lo <= j); - - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this, time_ns); - - if ((pResult != 0) && !pResult->EOS()) // found a keyframe - return 0; - - while (lo != i) { - pCluster = *--lo; - assert(pCluster); - assert(pCluster->GetTime() <= time_ns); - - pResult = pCluster->GetEntry(this, time_ns); - - if ((pResult != 0) && !pResult->EOS()) - return 0; - } - - // weird: we're on the first cluster, but no keyframe found - // should never happen but we must return something anyway - - pResult = GetEOS(); - return 0; -} - -Colour* VideoTrack::GetColour() const { return m_colour; } - -Projection* VideoTrack::GetProjection() const { return m_projection; } - -long long VideoTrack::GetWidth() const { return m_width; } - -long long VideoTrack::GetHeight() const { return m_height; } - -long long VideoTrack::GetDisplayWidth() const { - return m_display_width > 0 ? m_display_width : GetWidth(); -} - -long long VideoTrack::GetDisplayHeight() const { - return m_display_height > 0 ? m_display_height : GetHeight(); -} - -long long VideoTrack::GetDisplayUnit() const { return m_display_unit; } - -long long VideoTrack::GetStereoMode() const { return m_stereo_mode; } - -double VideoTrack::GetFrameRate() const { return m_rate; } - -AudioTrack::AudioTrack(Segment* pSegment, long long element_start, - long long element_size) - : Track(pSegment, element_start, element_size) {} - -long AudioTrack::Parse(Segment* pSegment, const Info& info, - long long element_start, long long element_size, - AudioTrack*& pResult) { - if (pResult) - return -1; - - if (info.type != Track::kAudio) - return -1; - - IMkvReader* const pReader = pSegment->m_pReader; - - const Settings& s = info.settings; - assert(s.start >= 0); - assert(s.size >= 0); - - long long pos = s.start; - assert(pos >= 0); - - const long long stop = pos + s.size; - - double rate = 8000.0; // MKV default - long long channels = 1; - long long bit_depth = 0; - - while (pos < stop) { - long long id, size; - - long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (id == libwebm::kMkvSamplingFrequency) { - status = UnserializeFloat(pReader, pos, size, rate); - - if (status < 0) - return status; - - if (rate <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvChannels) { - channels = UnserializeUInt(pReader, pos, size); - - if (channels <= 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvBitDepth) { - bit_depth = UnserializeUInt(pReader, pos, size); - - if (bit_depth <= 0) - return E_FILE_FORMAT_INVALID; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - AudioTrack* const pTrack = - new (std::nothrow) AudioTrack(pSegment, element_start, element_size); - - if (pTrack == NULL) - return -1; // generic error - - const int status = info.Copy(pTrack->m_info); - - if (status) { - delete pTrack; - return status; - } - - pTrack->m_rate = rate; - pTrack->m_channels = channels; - pTrack->m_bitDepth = bit_depth; - - pResult = pTrack; - return 0; // success -} - -double AudioTrack::GetSamplingRate() const { return m_rate; } - -long long AudioTrack::GetChannels() const { return m_channels; } - -long long AudioTrack::GetBitDepth() const { return m_bitDepth; } - -Tracks::Tracks(Segment* pSegment, long long start, long long size_, - long long element_start, long long element_size) - : m_pSegment(pSegment), - m_start(start), - m_size(size_), - m_element_start(element_start), - m_element_size(element_size), - m_trackEntries(NULL), - m_trackEntriesEnd(NULL) {} - -long Tracks::Parse() { - assert(m_trackEntries == NULL); - assert(m_trackEntriesEnd == NULL); - - const long long stop = m_start + m_size; - IMkvReader* const pReader = m_pSegment->m_pReader; - - int count = 0; - long long pos = m_start; - - while (pos < stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, stop, id, size); - - if (status < 0) // error - return status; - - if (size == 0) // weird - continue; - - if (id == libwebm::kMkvTrackEntry) - ++count; - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - if (count <= 0) - return 0; // success - - m_trackEntries = new (std::nothrow) Track*[count]; - - if (m_trackEntries == NULL) - return -1; - - m_trackEntriesEnd = m_trackEntries; - - pos = m_start; - - while (pos < stop) { - const long long element_start = pos; - - long long id, payload_size; - - const long status = - ParseElementHeader(pReader, pos, stop, id, payload_size); - - if (status < 0) // error - return status; - - if (payload_size == 0) // weird - continue; - - const long long payload_stop = pos + payload_size; - assert(payload_stop <= stop); // checked in ParseElement - - const long long element_size = payload_stop - element_start; - - if (id == libwebm::kMkvTrackEntry) { - Track*& pTrack = *m_trackEntriesEnd; - pTrack = NULL; - - const long status = ParseTrackEntry(pos, payload_size, element_start, - element_size, pTrack); - if (status) - return status; - - if (pTrack) - ++m_trackEntriesEnd; - } - - pos = payload_stop; - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - return 0; // success -} - -unsigned long Tracks::GetTracksCount() const { - const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries; - assert(result >= 0); - - return static_cast<unsigned long>(result); -} - -long Tracks::ParseTrackEntry(long long track_start, long long track_size, - long long element_start, long long element_size, - Track*& pResult) const { - if (pResult) - return -1; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = track_start; - const long long track_stop = track_start + track_size; - - Track::Info info; - - info.type = 0; - info.number = 0; - info.uid = 0; - info.defaultDuration = 0; - - Track::Settings v; - v.start = -1; - v.size = -1; - - Track::Settings a; - a.start = -1; - a.size = -1; - - Track::Settings e; // content_encodings_settings; - e.start = -1; - e.size = -1; - - long long lacing = 1; // default is true - - while (pos < track_stop) { - long long id, size; - - const long status = ParseElementHeader(pReader, pos, track_stop, id, size); - - if (status < 0) // error - return status; - - if (size < 0) - return E_FILE_FORMAT_INVALID; - - const long long start = pos; - - if (id == libwebm::kMkvVideo) { - v.start = start; - v.size = size; - } else if (id == libwebm::kMkvAudio) { - a.start = start; - a.size = size; - } else if (id == libwebm::kMkvContentEncodings) { - e.start = start; - e.size = size; - } else if (id == libwebm::kMkvTrackUID) { - if (size > 8) - return E_FILE_FORMAT_INVALID; - - info.uid = 0; - - long long pos_ = start; - const long long pos_end = start + size; - - while (pos_ != pos_end) { - unsigned char b; - - const int status = pReader->Read(pos_, 1, &b); - - if (status) - return status; - - info.uid <<= 8; - info.uid |= b; - - ++pos_; - } - } else if (id == libwebm::kMkvTrackNumber) { - const long long num = UnserializeUInt(pReader, pos, size); - - if ((num <= 0) || (num > 127)) - return E_FILE_FORMAT_INVALID; - - info.number = static_cast<long>(num); - } else if (id == libwebm::kMkvTrackType) { - const long long type = UnserializeUInt(pReader, pos, size); - - if ((type <= 0) || (type > 254)) - return E_FILE_FORMAT_INVALID; - - info.type = static_cast<long>(type); - } else if (id == libwebm::kMkvName) { - const long status = - UnserializeString(pReader, pos, size, info.nameAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvLanguage) { - const long status = UnserializeString(pReader, pos, size, info.language); - - if (status) - return status; - } else if (id == libwebm::kMkvDefaultDuration) { - const long long duration = UnserializeUInt(pReader, pos, size); - - if (duration < 0) - return E_FILE_FORMAT_INVALID; - - info.defaultDuration = static_cast<unsigned long long>(duration); - } else if (id == libwebm::kMkvCodecID) { - const long status = UnserializeString(pReader, pos, size, info.codecId); - - if (status) - return status; - } else if (id == libwebm::kMkvFlagLacing) { - lacing = UnserializeUInt(pReader, pos, size); - - if ((lacing < 0) || (lacing > 1)) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvCodecPrivate) { - delete[] info.codecPrivate; - info.codecPrivate = NULL; - info.codecPrivateSize = 0; - - const size_t buflen = static_cast<size_t>(size); - - if (buflen) { - unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen); - - if (buf == NULL) - return -1; - - const int status = pReader->Read(pos, static_cast<long>(buflen), buf); - - if (status) { - delete[] buf; - return status; - } - - info.codecPrivate = buf; - info.codecPrivateSize = buflen; - } - } else if (id == libwebm::kMkvCodecName) { - const long status = - UnserializeString(pReader, pos, size, info.codecNameAsUTF8); - - if (status) - return status; - } else if (id == libwebm::kMkvCodecDelay) { - info.codecDelay = UnserializeUInt(pReader, pos, size); - } else if (id == libwebm::kMkvSeekPreRoll) { - info.seekPreRoll = UnserializeUInt(pReader, pos, size); - } - - pos += size; // consume payload - if (pos > track_stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != track_stop) - return E_FILE_FORMAT_INVALID; - - if (info.number <= 0) // not specified - return E_FILE_FORMAT_INVALID; - - if (GetTrackByNumber(info.number)) - return E_FILE_FORMAT_INVALID; - - if (info.type <= 0) // not specified - return E_FILE_FORMAT_INVALID; - - info.lacing = (lacing > 0) ? true : false; - - if (info.type == Track::kVideo) { - if (v.start < 0) - return E_FILE_FORMAT_INVALID; - - if (a.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings = v; - - VideoTrack* pTrack = NULL; - - const long status = VideoTrack::Parse(m_pSegment, info, element_start, - element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - - if (e.start >= 0) - pResult->ParseContentEncodingsEntry(e.start, e.size); - } else if (info.type == Track::kAudio) { - if (a.start < 0) - return E_FILE_FORMAT_INVALID; - - if (v.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings = a; - - AudioTrack* pTrack = NULL; - - const long status = AudioTrack::Parse(m_pSegment, info, element_start, - element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - - if (e.start >= 0) - pResult->ParseContentEncodingsEntry(e.start, e.size); - } else { - // neither video nor audio - probably metadata or subtitles - - if (a.start >= 0) - return E_FILE_FORMAT_INVALID; - - if (v.start >= 0) - return E_FILE_FORMAT_INVALID; - - if (info.type == Track::kMetadata && e.start >= 0) - return E_FILE_FORMAT_INVALID; - - info.settings.start = -1; - info.settings.size = 0; - - Track* pTrack = NULL; - - const long status = - Track::Create(m_pSegment, info, element_start, element_size, pTrack); - - if (status) - return status; - - pResult = pTrack; - assert(pResult); - } - - return 0; // success -} - -Tracks::~Tracks() { - Track** i = m_trackEntries; - Track** const j = m_trackEntriesEnd; - - while (i != j) { - Track* const pTrack = *i++; - delete pTrack; - } - - delete[] m_trackEntries; -} - -const Track* Tracks::GetTrackByNumber(long tn) const { - if (tn < 0) - return NULL; - - Track** i = m_trackEntries; - Track** const j = m_trackEntriesEnd; - - while (i != j) { - Track* const pTrack = *i++; - - if (pTrack == NULL) - continue; - - if (tn == pTrack->GetNumber()) - return pTrack; - } - - return NULL; // not found -} - -const Track* Tracks::GetTrackByIndex(unsigned long idx) const { - const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries; - - if (idx >= static_cast<unsigned long>(count)) - return NULL; - - return m_trackEntries[idx]; -} - -long Cluster::Load(long long& pos, long& len) const { - if (m_pSegment == NULL) - return E_PARSE_FAILED; - - if (m_timecode >= 0) // at least partially loaded - return 0; - - if (m_pos != m_element_start || m_element_size >= 0) - return E_PARSE_FAILED; - - IMkvReader* const pReader = m_pSegment->m_pReader; - long long total, avail; - const int status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && (avail > total || m_pos > total)) - return E_FILE_FORMAT_INVALID; - - pos = m_pos; - - long long cluster_size = -1; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error or underflow - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id_ = ReadID(pReader, pos, len); - - if (id_ < 0) // error - return static_cast<long>(id_); - - if (id_ != libwebm::kMkvCluster) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume id - - // read cluster size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(cluster_size); - - if (size == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size of element - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) - cluster_size = size; - - // pos points to start of payload - long long timecode = -1; - long long new_pos = -1; - bool bBlock = false; - - long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size; - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - break; - - // Parse ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast<long>(id); - - if (id == 0) - return E_FILE_FORMAT_INVALID; - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster) - break; - - if (id == libwebm::kMkvCues) - break; - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume size field - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // pos now points to start of payload - - if (size == 0) - continue; - - if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvTimecode) { - len = static_cast<long>(size); - - if ((pos + size) > avail) - return E_BUFFER_NOT_FULL; - - timecode = UnserializeUInt(pReader, pos, size); - - if (timecode < 0) // error (or underflow) - return static_cast<long>(timecode); - - new_pos = pos + size; - - if (bBlock) - break; - } else if (id == libwebm::kMkvBlockGroup) { - bBlock = true; - break; - } else if (id == libwebm::kMkvSimpleBlock) { - bBlock = true; - break; - } - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } - - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if (timecode < 0) // no timecode found - return E_FILE_FORMAT_INVALID; - - if (!bBlock) - return E_FILE_FORMAT_INVALID; - - m_pos = new_pos; // designates position just beyond timecode payload - m_timecode = timecode; // m_timecode >= 0 means we're partially loaded - - if (cluster_size >= 0) - m_element_size = cluster_stop - m_element_start; - - return 0; -} - -long Cluster::Parse(long long& pos, long& len) const { - long status = Load(pos, len); - - if (status < 0) - return status; - - if (m_pos < m_element_start || m_timecode < 0) - return E_PARSE_FAILED; - - const long long cluster_stop = - (m_element_size < 0) ? -1 : m_element_start + m_element_size; - - if ((cluster_stop >= 0) && (m_pos >= cluster_stop)) - return 1; // nothing else to do - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - if (total >= 0 && avail > total) - return E_FILE_FORMAT_INVALID; - - pos = m_pos; - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - break; - - if ((total >= 0) && (pos >= total)) { - if (m_element_size < 0) - m_element_size = pos - m_element_start; - - break; - } - - // Parse ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) - return E_FILE_FORMAT_INVALID; - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) { - if (m_element_size < 0) - m_element_size = pos - m_element_start; - - break; - } - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume size field - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // pos now points to start of payload - - if (size == 0) - continue; - - // const long long block_start = pos; - const long long block_stop = pos + size; - - if (cluster_stop >= 0) { - if (block_stop > cluster_stop) { - if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) { - return E_FILE_FORMAT_INVALID; - } - - pos = cluster_stop; - break; - } - } else if ((total >= 0) && (block_stop > total)) { - m_element_size = total - m_element_start; - pos = total; - break; - } else if (block_stop > avail) { - len = static_cast<long>(size); - return E_BUFFER_NOT_FULL; - } - - Cluster* const this_ = const_cast<Cluster*>(this); - - if (id == libwebm::kMkvBlockGroup) - return this_->ParseBlockGroup(size, pos, len); - - if (id == libwebm::kMkvSimpleBlock) - return this_->ParseSimpleBlock(size, pos, len); - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } - - if (m_element_size < 1) - return E_FILE_FORMAT_INVALID; - - m_pos = pos; - if (cluster_stop >= 0 && m_pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if (m_entries_count > 0) { - const long idx = m_entries_count - 1; - - const BlockEntry* const pLast = m_entries[idx]; - if (pLast == NULL) - return E_PARSE_FAILED; - - const Block* const pBlock = pLast->GetBlock(); - if (pBlock == NULL) - return E_PARSE_FAILED; - - const long long start = pBlock->m_start; - - if ((total >= 0) && (start > total)) - return E_PARSE_FAILED; // defend against trucated stream - - const long long size = pBlock->m_size; - - const long long stop = start + size; - if (cluster_stop >= 0 && stop > cluster_stop) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (stop > total)) - return E_PARSE_FAILED; // defend against trucated stream - } - - return 1; // no more entries -} - -long Cluster::ParseSimpleBlock(long long block_size, long long& pos, - long& len) { - const long long block_start = pos; - const long long block_stop = pos + block_size; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - // parse track number - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long track = ReadUInt(pReader, pos, len); - - if (track < 0) // error - return static_cast<long>(track); - - if (track == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((pos + 2) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 2) > avail) { - len = 2; - return E_BUFFER_NOT_FULL; - } - - pos += 2; // consume timecode - - if ((pos + 1) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - unsigned char flags; - - status = pReader->Read(pos, 1, &flags); - - if (status < 0) { // error or underflow - len = 1; - return status; - } - - ++pos; // consume flags byte - assert(pos <= avail); - - if (pos >= block_stop) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(flags & 0x06) >> 1; - - if ((lacing != 0) && (block_stop > avail)) { - len = static_cast<long>(block_stop - pos); - return E_BUFFER_NOT_FULL; - } - - status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size, - 0); // DiscardPadding - - if (status != 0) - return status; - - m_pos = block_stop; - - return 0; // success -} - -long Cluster::ParseBlockGroup(long long payload_size, long long& pos, - long& len) { - const long long payload_start = pos; - const long long payload_stop = pos + payload_size; - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - if ((total >= 0) && (payload_stop > total)) - return E_FILE_FORMAT_INVALID; - - if (payload_stop > avail) { - len = static_cast<long>(payload_size); - return E_BUFFER_NOT_FULL; - } - - long long discard_padding = 0; - - while (pos < payload_stop) { - // parse sub-block element ID - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > payload_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast<long>(id); - - if (id == 0) // not a valid ID - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID field - - // Parse Size - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > payload_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - pos += len; // consume size field - - // pos now points to start of sub-block group payload - - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvDiscardPadding) { - status = UnserializeInt(pReader, pos, size, discard_padding); - - if (status < 0) // error - return status; - } - - if (id != libwebm::kMkvBlock) { - pos += size; // consume sub-part of block group - - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - - continue; - } - - const long long block_stop = pos + size; - - if (block_stop > payload_stop) - return E_FILE_FORMAT_INVALID; - - // parse track number - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((pos + len) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long track = ReadUInt(pReader, pos, len); - - if (track < 0) // error - return static_cast<long>(track); - - if (track == 0) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((pos + 2) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 2) > avail) { - len = 2; - return E_BUFFER_NOT_FULL; - } - - pos += 2; // consume timecode - - if ((pos + 1) > block_stop) - return E_FILE_FORMAT_INVALID; - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - unsigned char flags; - - status = pReader->Read(pos, 1, &flags); - - if (status < 0) { // error or underflow - len = 1; - return status; - } - - ++pos; // consume flags byte - assert(pos <= avail); - - if (pos >= block_stop) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(flags & 0x06) >> 1; - - if ((lacing != 0) && (block_stop > avail)) { - len = static_cast<long>(block_stop - pos); - return E_BUFFER_NOT_FULL; - } - - pos = block_stop; // consume block-part of block group - if (pos > payload_stop) - return E_FILE_FORMAT_INVALID; - } - - if (pos != payload_stop) - return E_FILE_FORMAT_INVALID; - - status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size, - discard_padding); - if (status != 0) - return status; - - m_pos = payload_stop; - - return 0; // success -} - -long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const { - assert(m_pos >= m_element_start); - - pEntry = NULL; - - if (index < 0) - return -1; // generic error - - if (m_entries_count < 0) - return E_BUFFER_NOT_FULL; - - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count <= m_entries_size); - - if (index < m_entries_count) { - pEntry = m_entries[index]; - assert(pEntry); - - return 1; // found entry - } - - if (m_element_size < 0) // we don't know cluster end yet - return E_BUFFER_NOT_FULL; // underflow - - const long long element_stop = m_element_start + m_element_size; - - if (m_pos >= element_stop) - return 0; // nothing left to parse - - return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed -} - -Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) { - if (!pSegment || off < 0) - return NULL; - - const long long element_start = pSegment->m_start + off; - - Cluster* const pCluster = - new (std::nothrow) Cluster(pSegment, idx, element_start); - - return pCluster; -} - -Cluster::Cluster() - : m_pSegment(NULL), - m_element_start(0), - m_index(0), - m_pos(0), - m_element_size(0), - m_timecode(0), - m_entries(NULL), - m_entries_size(0), - m_entries_count(0) // means "no entries" -{} - -Cluster::Cluster(Segment* pSegment, long idx, long long element_start - /* long long element_size */) - : m_pSegment(pSegment), - m_element_start(element_start), - m_index(idx), - m_pos(element_start), - m_element_size(-1 /* element_size */), - m_timecode(-1), - m_entries(NULL), - m_entries_size(0), - m_entries_count(-1) // means "has not been parsed yet" -{} - -Cluster::~Cluster() { - if (m_entries_count <= 0) { - delete[] m_entries; - return; - } - - BlockEntry** i = m_entries; - BlockEntry** const j = m_entries + m_entries_count; - - while (i != j) { - BlockEntry* p = *i++; - assert(p); - - delete p; - } - - delete[] m_entries; -} - -bool Cluster::EOS() const { return (m_pSegment == NULL); } - -long Cluster::GetIndex() const { return m_index; } - -long long Cluster::GetPosition() const { - const long long pos = m_element_start - m_pSegment->m_start; - assert(pos >= 0); - - return pos; -} - -long long Cluster::GetElementSize() const { return m_element_size; } - -long Cluster::HasBlockEntries( - const Segment* pSegment, - long long off, // relative to start of segment payload - long long& pos, long& len) { - assert(pSegment); - assert(off >= 0); // relative to segment - - IMkvReader* const pReader = pSegment->m_pReader; - - long long total, avail; - - long status = pReader->Length(&total, &avail); - - if (status < 0) // error - return status; - - assert((total < 0) || (avail <= total)); - - pos = pSegment->m_start + off; // absolute - - if ((total >= 0) && (pos >= total)) - return 0; // we don't even have a complete cluster - - const long long segment_stop = - (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size; - - long long cluster_stop = -1; // interpreted later to mean "unknown size" - - { - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + len) > total)) - return 0; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast<long>(id); - - if (id != libwebm::kMkvCluster) - return E_PARSE_FAILED; - - pos += len; // consume Cluster ID field - - // read size field - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // weird - return E_BUFFER_NOT_FULL; - - if ((segment_stop >= 0) && ((pos + len) > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && ((pos + len) > total)) - return 0; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - if (size == 0) - return 0; // cluster does not have entries - - pos += len; // consume size field - - // pos now points to start of payload - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size != unknown_size) { - cluster_stop = pos + size; - assert(cluster_stop >= 0); - - if ((segment_stop >= 0) && (cluster_stop > segment_stop)) - return E_FILE_FORMAT_INVALID; - - if ((total >= 0) && (cluster_stop > total)) - // return E_FILE_FORMAT_INVALID; //too conservative - return 0; // cluster does not have any entries - } - } - - for (;;) { - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - return 0; // no entries detected - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - long long result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // need more data - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long id = ReadID(pReader, pos, len); - - if (id < 0) // error - return static_cast<long>(id); - - // This is the distinguished set of ID's we use to determine - // that we have exhausted the sub-element's inside the cluster - // whose ID we parsed earlier. - - if (id == libwebm::kMkvCluster) - return 0; // no entries found - - if (id == libwebm::kMkvCues) - return 0; // no entries found - - pos += len; // consume id field - - if ((cluster_stop >= 0) && (pos >= cluster_stop)) - return E_FILE_FORMAT_INVALID; - - // read size field - - if ((pos + 1) > avail) { - len = 1; - return E_BUFFER_NOT_FULL; - } - - result = GetUIntLength(pReader, pos, len); - - if (result < 0) // error - return static_cast<long>(result); - - if (result > 0) // underflow - return E_BUFFER_NOT_FULL; - - if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > avail) - return E_BUFFER_NOT_FULL; - - const long long size = ReadUInt(pReader, pos, len); - - if (size < 0) // error - return static_cast<long>(size); - - pos += len; // consume size field - - // pos now points to start of payload - - if ((cluster_stop >= 0) && (pos > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (size == 0) // weird - continue; - - const long long unknown_size = (1LL << (7 * len)) - 1; - - if (size == unknown_size) - return E_FILE_FORMAT_INVALID; // not supported inside cluster - - if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) - return E_FILE_FORMAT_INVALID; - - if (id == libwebm::kMkvBlockGroup) - return 1; // have at least one entry - - if (id == libwebm::kMkvSimpleBlock) - return 1; // have at least one entry - - pos += size; // consume payload - if (cluster_stop >= 0 && pos > cluster_stop) - return E_FILE_FORMAT_INVALID; - } -} - -long long Cluster::GetTimeCode() const { - long long pos; - long len; - - const long status = Load(pos, len); - - if (status < 0) // error - return status; - - return m_timecode; -} - -long long Cluster::GetTime() const { - const long long tc = GetTimeCode(); - - if (tc < 0) - return tc; - - const SegmentInfo* const pInfo = m_pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - const long long t = m_timecode * scale; - - return t; -} - -long long Cluster::GetFirstTime() const { - const BlockEntry* pEntry; - - const long status = GetFirst(pEntry); - - if (status < 0) // error - return status; - - if (pEntry == NULL) // empty cluster - return GetTime(); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - return pBlock->GetTime(this); -} - -long long Cluster::GetLastTime() const { - const BlockEntry* pEntry; - - const long status = GetLast(pEntry); - - if (status < 0) // error - return status; - - if (pEntry == NULL) // empty cluster - return GetTime(); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - return pBlock->GetTime(this); -} - -long Cluster::CreateBlock(long long id, - long long pos, // absolute pos of payload - long long size, long long discard_padding) { - if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock) - return E_PARSE_FAILED; - - if (m_entries_count < 0) { // haven't parsed anything yet - assert(m_entries == NULL); - assert(m_entries_size == 0); - - m_entries_size = 1024; - m_entries = new (std::nothrow) BlockEntry*[m_entries_size]; - if (m_entries == NULL) - return -1; - - m_entries_count = 0; - } else { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count <= m_entries_size); - - if (m_entries_count >= m_entries_size) { - const long entries_size = 2 * m_entries_size; - - BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size]; - if (entries == NULL) - return -1; - - BlockEntry** src = m_entries; - BlockEntry** const src_end = src + m_entries_count; - - BlockEntry** dst = entries; - - while (src != src_end) - *dst++ = *src++; - - delete[] m_entries; - - m_entries = entries; - m_entries_size = entries_size; - } - } - - if (id == libwebm::kMkvBlockGroup) - return CreateBlockGroup(pos, size, discard_padding); - else - return CreateSimpleBlock(pos, size); -} - -long Cluster::CreateBlockGroup(long long start_offset, long long size, - long long discard_padding) { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count >= 0); - assert(m_entries_count < m_entries_size); - - IMkvReader* const pReader = m_pSegment->m_pReader; - - long long pos = start_offset; - const long long stop = start_offset + size; - - // For WebM files, there is a bias towards previous reference times - //(in order to support alt-ref frames, which refer back to the previous - // keyframe). Normally a 0 value is not possible, but here we tenatively - // allow 0 as the value of a reference frame, with the interpretation - // that this is a "previous" reference time. - - long long prev = 1; // nonce - long long next = 0; // nonce - long long duration = -1; // really, this is unsigned - - long long bpos = -1; - long long bsize = -1; - - while (pos < stop) { - long len; - const long long id = ReadID(pReader, pos, len); - if (id < 0 || (pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume ID - - const long long size = ReadUInt(pReader, pos, len); - assert(size >= 0); // TODO - assert((pos + len) <= stop); - - pos += len; // consume size - - if (id == libwebm::kMkvBlock) { - if (bpos < 0) { // Block ID - bpos = pos; - bsize = size; - } - } else if (id == libwebm::kMkvBlockDuration) { - if (size > 8) - return E_FILE_FORMAT_INVALID; - - duration = UnserializeUInt(pReader, pos, size); - - if (duration < 0) - return E_FILE_FORMAT_INVALID; - } else if (id == libwebm::kMkvReferenceBlock) { - if (size > 8 || size <= 0) - return E_FILE_FORMAT_INVALID; - const long size_ = static_cast<long>(size); - - long long time; - - long status = UnserializeInt(pReader, pos, size_, time); - assert(status == 0); - if (status != 0) - return -1; - - if (time <= 0) // see note above - prev = time; - else - next = time; - } - - pos += size; // consume payload - if (pos > stop) - return E_FILE_FORMAT_INVALID; - } - if (bpos < 0) - return E_FILE_FORMAT_INVALID; - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - assert(bsize >= 0); - - const long idx = m_entries_count; - - BlockEntry** const ppEntry = m_entries + idx; - BlockEntry*& pEntry = *ppEntry; - - pEntry = new (std::nothrow) - BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding); - - if (pEntry == NULL) - return -1; // generic error - - BlockGroup* const p = static_cast<BlockGroup*>(pEntry); - - const long status = p->Parse(); - - if (status == 0) { // success - ++m_entries_count; - return 0; - } - - delete pEntry; - pEntry = 0; - - return status; -} - -long Cluster::CreateSimpleBlock(long long st, long long sz) { - assert(m_entries); - assert(m_entries_size > 0); - assert(m_entries_count >= 0); - assert(m_entries_count < m_entries_size); - - const long idx = m_entries_count; - - BlockEntry** const ppEntry = m_entries + idx; - BlockEntry*& pEntry = *ppEntry; - - pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz); - - if (pEntry == NULL) - return -1; // generic error - - SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry); - - const long status = p->Parse(); - - if (status == 0) { - ++m_entries_count; - return 0; - } - - delete pEntry; - pEntry = 0; - - return status; -} - -long Cluster::GetFirst(const BlockEntry*& pFirst) const { - if (m_entries_count <= 0) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pFirst = NULL; - return status; - } - - if (m_entries_count <= 0) { // empty cluster - pFirst = NULL; - return 0; - } - } - - assert(m_entries); - - pFirst = m_entries[0]; - assert(pFirst); - - return 0; // success -} - -long Cluster::GetLast(const BlockEntry*& pLast) const { - for (;;) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pLast = NULL; - return status; - } - - if (status > 0) // no new block - break; - } - - if (m_entries_count <= 0) { - pLast = NULL; - return 0; - } - - assert(m_entries); - - const long idx = m_entries_count - 1; - - pLast = m_entries[idx]; - assert(pLast); - - return 0; -} - -long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const { - assert(pCurr); - assert(m_entries); - assert(m_entries_count > 0); - - size_t idx = pCurr->GetIndex(); - assert(idx < size_t(m_entries_count)); - assert(m_entries[idx] == pCurr); - - ++idx; - - if (idx >= size_t(m_entries_count)) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) { // error - pNext = NULL; - return status; - } - - if (status > 0) { - pNext = NULL; - return 0; - } - - assert(m_entries); - assert(m_entries_count > 0); - assert(idx < size_t(m_entries_count)); - } - - pNext = m_entries[idx]; - assert(pNext); - - return 0; -} - -long Cluster::GetEntryCount() const { return m_entries_count; } - -const BlockEntry* Cluster::GetEntry(const Track* pTrack, - long long time_ns) const { - assert(pTrack); - - if (m_pSegment == NULL) // this is the special EOS cluster - return pTrack->GetEOS(); - - const BlockEntry* pResult = pTrack->GetEOS(); - - long index = 0; - - for (;;) { - if (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - assert(status >= 0); - - if (status > 0) // completely parsed, and no more entries - return pResult; - - if (status < 0) // should never happen - return 0; - - assert(m_entries); - assert(index < m_entries_count); - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if (pBlock->GetTrackNumber() != pTrack->GetNumber()) { - ++index; - continue; - } - - if (pTrack->VetEntry(pEntry)) { - if (time_ns < 0) // just want first candidate block - return pEntry; - - const long long ns = pBlock->GetTime(this); - - if (ns > time_ns) - return pResult; - - pResult = pEntry; // have a candidate - } else if (time_ns >= 0) { - const long long ns = pBlock->GetTime(this); - - if (ns > time_ns) - return pResult; - } - - ++index; - } -} - -const BlockEntry* Cluster::GetEntry(const CuePoint& cp, - const CuePoint::TrackPosition& tp) const { - assert(m_pSegment); - const long long tc = cp.GetTimeCode(); - - if (tp.m_block > 0) { - const long block = static_cast<long>(tp.m_block); - const long index = block - 1; - - while (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) // TODO: can this happen? - return NULL; - - if (status > 0) // nothing remains to be parsed - return NULL; - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if ((pBlock->GetTrackNumber() == tp.m_track) && - (pBlock->GetTimeCode(this) == tc)) { - return pEntry; - } - } - - long index = 0; - - for (;;) { - if (index >= m_entries_count) { - long long pos; - long len; - - const long status = Parse(pos, len); - - if (status < 0) // TODO: can this happen? - return NULL; - - if (status > 0) // nothing remains to be parsed - return NULL; - - assert(m_entries); - assert(index < m_entries_count); - } - - const BlockEntry* const pEntry = m_entries[index]; - assert(pEntry); - assert(!pEntry->EOS()); - - const Block* const pBlock = pEntry->GetBlock(); - assert(pBlock); - - if (pBlock->GetTrackNumber() != tp.m_track) { - ++index; - continue; - } - - const long long tc_ = pBlock->GetTimeCode(this); - - if (tc_ < tc) { - ++index; - continue; - } - - if (tc_ > tc) - return NULL; - - const Tracks* const pTracks = m_pSegment->GetTracks(); - assert(pTracks); - - const long tn = static_cast<long>(tp.m_track); - const Track* const pTrack = pTracks->GetTrackByNumber(tn); - - if (pTrack == NULL) - return NULL; - - const long long type = pTrack->GetType(); - - if (type == 2) // audio - return pEntry; - - if (type != 1) // not video - return NULL; - - if (!pBlock->IsKey()) - return NULL; - - return pEntry; - } -} - -BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {} -BlockEntry::~BlockEntry() {} -const Cluster* BlockEntry::GetCluster() const { return m_pCluster; } -long BlockEntry::GetIndex() const { return m_index; } - -SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start, - long long size) - : BlockEntry(pCluster, idx), m_block(start, size, 0) {} - -long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); } -BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; } -const Block* SimpleBlock::GetBlock() const { return &m_block; } - -BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start, - long long block_size, long long prev, long long next, - long long duration, long long discard_padding) - : BlockEntry(pCluster, idx), - m_block(block_start, block_size, discard_padding), - m_prev(prev), - m_next(next), - m_duration(duration) {} - -long BlockGroup::Parse() { - const long status = m_block.Parse(m_pCluster); - - if (status) - return status; - - m_block.SetKey((m_prev > 0) && (m_next <= 0)); - - return 0; -} - -BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; } -const Block* BlockGroup::GetBlock() const { return &m_block; } -long long BlockGroup::GetPrevTimeCode() const { return m_prev; } -long long BlockGroup::GetNextTimeCode() const { return m_next; } -long long BlockGroup::GetDurationTimeCode() const { return m_duration; } - -Block::Block(long long start, long long size_, long long discard_padding) - : m_start(start), - m_size(size_), - m_track(0), - m_timecode(-1), - m_flags(0), - m_frames(NULL), - m_frame_count(-1), - m_discard_padding(discard_padding) {} - -Block::~Block() { delete[] m_frames; } - -long Block::Parse(const Cluster* pCluster) { - if (pCluster == NULL) - return -1; - - if (pCluster->m_pSegment == NULL) - return -1; - - assert(m_start >= 0); - assert(m_size >= 0); - assert(m_track <= 0); - assert(m_frames == NULL); - assert(m_frame_count <= 0); - - long long pos = m_start; - const long long stop = m_start + m_size; - - long len; - - IMkvReader* const pReader = pCluster->m_pSegment->m_pReader; - - m_track = ReadUInt(pReader, pos, len); - - if (m_track <= 0) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume track number - - if ((stop - pos) < 2) - return E_FILE_FORMAT_INVALID; - - long status; - long long value; - - status = UnserializeInt(pReader, pos, 2, value); - - if (status) - return E_FILE_FORMAT_INVALID; - - if (value < SHRT_MIN) - return E_FILE_FORMAT_INVALID; - - if (value > SHRT_MAX) - return E_FILE_FORMAT_INVALID; - - m_timecode = static_cast<short>(value); - - pos += 2; - - if ((stop - pos) <= 0) - return E_FILE_FORMAT_INVALID; - - status = pReader->Read(pos, 1, &m_flags); - - if (status) - return E_FILE_FORMAT_INVALID; - - const int lacing = int(m_flags & 0x06) >> 1; - - ++pos; // consume flags byte - - if (lacing == 0) { // no lacing - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - m_frame_count = 1; - m_frames = new (std::nothrow) Frame[m_frame_count]; - if (m_frames == NULL) - return -1; - - Frame& f = m_frames[0]; - f.pos = pos; - - const long long frame_size = stop - pos; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = static_cast<long>(frame_size); - - return 0; // success - } - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - unsigned char biased_count; - - status = pReader->Read(pos, 1, &biased_count); - - if (status) - return E_FILE_FORMAT_INVALID; - - ++pos; // consume frame count - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - m_frame_count = int(biased_count) + 1; - - m_frames = new (std::nothrow) Frame[m_frame_count]; - if (m_frames == NULL) - return -1; - - if (!m_frames) - return E_FILE_FORMAT_INVALID; - - if (lacing == 1) { // Xiph - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - long long size = 0; - int frame_count = m_frame_count; - - while (frame_count > 1) { - long frame_size = 0; - - for (;;) { - unsigned char val; - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - status = pReader->Read(pos, 1, &val); - - if (status) - return E_FILE_FORMAT_INVALID; - - ++pos; // consume xiph size byte - - frame_size += val; - - if (val < 255) - break; - } - - Frame& f = *pf++; - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - f.pos = 0; // patch later - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = frame_size; - size += frame_size; // contribution of this frame - - --frame_count; - } - - if (pf >= pf_end || pos > stop) - return E_FILE_FORMAT_INVALID; - - { - Frame& f = *pf++; - - if (pf != pf_end) - return E_FILE_FORMAT_INVALID; - - f.pos = 0; // patch later - - const long long total_size = stop - pos; - - if (total_size < size) - return E_FILE_FORMAT_INVALID; - - const long long frame_size = total_size - size; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - f.len = static_cast<long>(frame_size); - } - - pf = m_frames; - while (pf != pf_end) { - Frame& f = *pf++; - assert((pos + f.len) <= stop); - - if ((pos + f.len) > stop) - return E_FILE_FORMAT_INVALID; - - f.pos = pos; - pos += f.len; - } - - assert(pos == stop); - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - } else if (lacing == 2) { // fixed-size lacing - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - const long long total_size = stop - pos; - - if ((total_size % m_frame_count) != 0) - return E_FILE_FORMAT_INVALID; - - const long long frame_size = total_size / m_frame_count; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - while (pf != pf_end) { - assert((pos + frame_size) <= stop); - if ((pos + frame_size) > stop) - return E_FILE_FORMAT_INVALID; - - Frame& f = *pf++; - - f.pos = pos; - f.len = static_cast<long>(frame_size); - - pos += frame_size; - } - - assert(pos == stop); - if (pos != stop) - return E_FILE_FORMAT_INVALID; - - } else { - assert(lacing == 3); // EBML lacing - - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - long long size = 0; - int frame_count = m_frame_count; - - long long frame_size = ReadUInt(pReader, pos, len); - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - if (frame_size > LONG_MAX) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of size of first frame - - if ((pos + frame_size) > stop) - return E_FILE_FORMAT_INVALID; - - Frame* pf = m_frames; - Frame* const pf_end = pf + m_frame_count; - - { - Frame& curr = *pf; - - curr.pos = 0; // patch later - - curr.len = static_cast<long>(frame_size); - size += curr.len; // contribution of this frame - } - - --frame_count; - - while (frame_count > 1) { - if (pos >= stop) - return E_FILE_FORMAT_INVALID; - - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - const Frame& prev = *pf++; - assert(prev.len == frame_size); - if (prev.len != frame_size) - return E_FILE_FORMAT_INVALID; - - assert(pf < pf_end); - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - Frame& curr = *pf; - - curr.pos = 0; // patch later - - const long long delta_size_ = ReadUInt(pReader, pos, len); - - if (delta_size_ < 0) - return E_FILE_FORMAT_INVALID; - - if ((pos + len) > stop) - return E_FILE_FORMAT_INVALID; - - pos += len; // consume length of (delta) size - if (pos > stop) - return E_FILE_FORMAT_INVALID; - - const long exp = 7 * len - 1; - const long long bias = (1LL << exp) - 1LL; - const long long delta_size = delta_size_ - bias; - - frame_size += delta_size; - - if (frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - if (frame_size > LONG_MAX) - return E_FILE_FORMAT_INVALID; - - curr.len = static_cast<long>(frame_size); - // Check if size + curr.len could overflow. - if (size > LLONG_MAX - curr.len) { - return E_FILE_FORMAT_INVALID; - } - size += curr.len; // contribution of this frame - - --frame_count; - } - - // parse last frame - if (frame_count > 0) { - if (pos > stop || pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - const Frame& prev = *pf++; - assert(prev.len == frame_size); - if (prev.len != frame_size) - return E_FILE_FORMAT_INVALID; - - if (pf >= pf_end) - return E_FILE_FORMAT_INVALID; - - Frame& curr = *pf++; - if (pf != pf_end) - return E_FILE_FORMAT_INVALID; - - curr.pos = 0; // patch later - - const long long total_size = stop - pos; - - if (total_size < size) - return E_FILE_FORMAT_INVALID; - - frame_size = total_size - size; - - if (frame_size > LONG_MAX || frame_size <= 0) - return E_FILE_FORMAT_INVALID; - - curr.len = static_cast<long>(frame_size); - } - - pf = m_frames; - while (pf != pf_end) { - Frame& f = *pf++; - if ((pos + f.len) > stop) - return E_FILE_FORMAT_INVALID; - - f.pos = pos; - pos += f.len; - } - - if (pos != stop) - return E_FILE_FORMAT_INVALID; - } - - return 0; // success -} - -long long Block::GetTimeCode(const Cluster* pCluster) const { - if (pCluster == 0) - return m_timecode; - - const long long tc0 = pCluster->GetTimeCode(); - assert(tc0 >= 0); - - // Check if tc0 + m_timecode would overflow. - if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) { - return -1; - } - - const long long tc = tc0 + m_timecode; - - return tc; // unscaled timecode units -} - -long long Block::GetTime(const Cluster* pCluster) const { - assert(pCluster); - - const long long tc = GetTimeCode(pCluster); - - const Segment* const pSegment = pCluster->m_pSegment; - const SegmentInfo* const pInfo = pSegment->GetInfo(); - assert(pInfo); - - const long long scale = pInfo->GetTimeCodeScale(); - assert(scale >= 1); - - // Check if tc * scale could overflow. - if (tc != 0 && scale > LLONG_MAX / tc) { - return -1; - } - const long long ns = tc * scale; - - return ns; -} - -long long Block::GetTrackNumber() const { return m_track; } - -bool Block::IsKey() const { - return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0); -} - -void Block::SetKey(bool bKey) { - if (bKey) - m_flags |= static_cast<unsigned char>(1 << 7); - else - m_flags &= 0x7F; -} - -bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); } - -Block::Lacing Block::GetLacing() const { - const int value = int(m_flags & 0x06) >> 1; - return static_cast<Lacing>(value); -} - -int Block::GetFrameCount() const { return m_frame_count; } - -const Block::Frame& Block::GetFrame(int idx) const { - assert(idx >= 0); - assert(idx < m_frame_count); - - const Frame& f = m_frames[idx]; - assert(f.pos > 0); - assert(f.len > 0); - - return f; -} - -long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const { - assert(pReader); - assert(buf); - - const long status = pReader->Read(pos, len, buf); - return status; -} - -long long Block::GetDiscardPadding() const { return m_discard_padding; } - -} // namespace mkvparser diff --git a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h b/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h deleted file mode 100644 index 6dce7e50ba..0000000000 --- a/thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h +++ /dev/null @@ -1,1145 +0,0 @@ -// Copyright (c) 2012 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -#ifndef MKVPARSER_MKVPARSER_H_ -#define MKVPARSER_MKVPARSER_H_ - -#include <cstddef> - -namespace mkvparser { - -const int E_PARSE_FAILED = -1; -const int E_FILE_FORMAT_INVALID = -2; -const int E_BUFFER_NOT_FULL = -3; - -class IMkvReader { - public: - virtual int Read(long long pos, long len, unsigned char* buf) = 0; - virtual int Length(long long* total, long long* available) = 0; - - public: - virtual ~IMkvReader(); -}; - -template <typename Type> -Type* SafeArrayAlloc(unsigned long long num_elements, - unsigned long long element_size); -long long GetUIntLength(IMkvReader*, long long, long&); -long long ReadUInt(IMkvReader*, long long, long&); -long long ReadID(IMkvReader* pReader, long long pos, long& len); -long long UnserializeUInt(IMkvReader*, long long pos, long long size); - -long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); -long UnserializeInt(IMkvReader*, long long pos, long long size, - long long& result); - -long UnserializeString(IMkvReader*, long long pos, long long size, char*& str); - -long ParseElementHeader(IMkvReader* pReader, - long long& pos, // consume id and size fields - long long stop, // if you know size of element's parent - long long& id, long long& size); - -bool Match(IMkvReader*, long long&, unsigned long, long long&); -bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&); - -void GetVersion(int& major, int& minor, int& build, int& revision); - -struct EBMLHeader { - EBMLHeader(); - ~EBMLHeader(); - long long m_version; - long long m_readVersion; - long long m_maxIdLength; - long long m_maxSizeLength; - char* m_docType; - long long m_docTypeVersion; - long long m_docTypeReadVersion; - - long long Parse(IMkvReader*, long long&); - void Init(); -}; - -class Segment; -class Track; -class Cluster; - -class Block { - Block(const Block&); - Block& operator=(const Block&); - - public: - const long long m_start; - const long long m_size; - - Block(long long start, long long size, long long discard_padding); - ~Block(); - - long Parse(const Cluster*); - - long long GetTrackNumber() const; - long long GetTimeCode(const Cluster*) const; // absolute, but not scaled - long long GetTime(const Cluster*) const; // absolute, and scaled (ns) - bool IsKey() const; - void SetKey(bool); - bool IsInvisible() const; - - enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml }; - Lacing GetLacing() const; - - int GetFrameCount() const; // to index frames: [0, count) - - struct Frame { - long long pos; // absolute offset - long len; - - long Read(IMkvReader*, unsigned char*) const; - }; - - const Frame& GetFrame(int frame_index) const; - - long long GetDiscardPadding() const; - - private: - long long m_track; // Track::Number() - short m_timecode; // relative to cluster - unsigned char m_flags; - - Frame* m_frames; - int m_frame_count; - - protected: - const long long m_discard_padding; -}; - -class BlockEntry { - BlockEntry(const BlockEntry&); - BlockEntry& operator=(const BlockEntry&); - - protected: - BlockEntry(Cluster*, long index); - - public: - virtual ~BlockEntry(); - - bool EOS() const { return (GetKind() == kBlockEOS); } - const Cluster* GetCluster() const; - long GetIndex() const; - virtual const Block* GetBlock() const = 0; - - enum Kind { kBlockEOS, kBlockSimple, kBlockGroup }; - virtual Kind GetKind() const = 0; - - protected: - Cluster* const m_pCluster; - const long m_index; -}; - -class SimpleBlock : public BlockEntry { - SimpleBlock(const SimpleBlock&); - SimpleBlock& operator=(const SimpleBlock&); - - public: - SimpleBlock(Cluster*, long index, long long start, long long size); - long Parse(); - - Kind GetKind() const; - const Block* GetBlock() const; - - protected: - Block m_block; -}; - -class BlockGroup : public BlockEntry { - BlockGroup(const BlockGroup&); - BlockGroup& operator=(const BlockGroup&); - - public: - BlockGroup(Cluster*, long index, - long long block_start, // absolute pos of block's payload - long long block_size, // size of block's payload - long long prev, long long next, long long duration, - long long discard_padding); - - long Parse(); - - Kind GetKind() const; - const Block* GetBlock() const; - - long long GetPrevTimeCode() const; // relative to block's time - long long GetNextTimeCode() const; // as above - long long GetDurationTimeCode() const; - - private: - Block m_block; - const long long m_prev; - const long long m_next; - const long long m_duration; -}; - -/////////////////////////////////////////////////////////////// -// ContentEncoding element -// Elements used to describe if the track data has been encrypted or -// compressed with zlib or header stripping. -class ContentEncoding { - public: - enum { kCTR = 1 }; - - ContentEncoding(); - ~ContentEncoding(); - - // ContentCompression element names - struct ContentCompression { - ContentCompression(); - ~ContentCompression(); - - unsigned long long algo; - unsigned char* settings; - long long settings_len; - }; - - // ContentEncAESSettings element names - struct ContentEncAESSettings { - ContentEncAESSettings() : cipher_mode(kCTR) {} - ~ContentEncAESSettings() {} - - unsigned long long cipher_mode; - }; - - // ContentEncryption element names - struct ContentEncryption { - ContentEncryption(); - ~ContentEncryption(); - - unsigned long long algo; - unsigned char* key_id; - long long key_id_len; - unsigned char* signature; - long long signature_len; - unsigned char* sig_key_id; - long long sig_key_id_len; - unsigned long long sig_algo; - unsigned long long sig_hash_algo; - - ContentEncAESSettings aes_settings; - }; - - // Returns ContentCompression represented by |idx|. Returns NULL if |idx| - // is out of bounds. - const ContentCompression* GetCompressionByIndex(unsigned long idx) const; - - // Returns number of ContentCompression elements in this ContentEncoding - // element. - unsigned long GetCompressionCount() const; - - // Parses the ContentCompression element from |pReader|. |start| is the - // starting offset of the ContentCompression payload. |size| is the size in - // bytes of the ContentCompression payload. |compression| is where the parsed - // values will be stored. - long ParseCompressionEntry(long long start, long long size, - IMkvReader* pReader, - ContentCompression* compression); - - // Returns ContentEncryption represented by |idx|. Returns NULL if |idx| - // is out of bounds. - const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const; - - // Returns number of ContentEncryption elements in this ContentEncoding - // element. - unsigned long GetEncryptionCount() const; - - // Parses the ContentEncAESSettings element from |pReader|. |start| is the - // starting offset of the ContentEncAESSettings payload. |size| is the - // size in bytes of the ContentEncAESSettings payload. |encryption| is - // where the parsed values will be stored. - long ParseContentEncAESSettingsEntry(long long start, long long size, - IMkvReader* pReader, - ContentEncAESSettings* aes); - - // Parses the ContentEncoding element from |pReader|. |start| is the - // starting offset of the ContentEncoding payload. |size| is the size in - // bytes of the ContentEncoding payload. Returns true on success. - long ParseContentEncodingEntry(long long start, long long size, - IMkvReader* pReader); - - // Parses the ContentEncryption element from |pReader|. |start| is the - // starting offset of the ContentEncryption payload. |size| is the size in - // bytes of the ContentEncryption payload. |encryption| is where the parsed - // values will be stored. - long ParseEncryptionEntry(long long start, long long size, - IMkvReader* pReader, ContentEncryption* encryption); - - unsigned long long encoding_order() const { return encoding_order_; } - unsigned long long encoding_scope() const { return encoding_scope_; } - unsigned long long encoding_type() const { return encoding_type_; } - - private: - // Member variables for list of ContentCompression elements. - ContentCompression** compression_entries_; - ContentCompression** compression_entries_end_; - - // Member variables for list of ContentEncryption elements. - ContentEncryption** encryption_entries_; - ContentEncryption** encryption_entries_end_; - - // ContentEncoding element names - unsigned long long encoding_order_; - unsigned long long encoding_scope_; - unsigned long long encoding_type_; - - // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); - ContentEncoding(const ContentEncoding&); - ContentEncoding& operator=(const ContentEncoding&); -}; - -class Track { - Track(const Track&); - Track& operator=(const Track&); - - public: - class Info; - static long Create(Segment*, const Info&, long long element_start, - long long element_size, Track*&); - - enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 }; - - Segment* const m_pSegment; - const long long m_element_start; - const long long m_element_size; - virtual ~Track(); - - long GetType() const; - long GetNumber() const; - unsigned long long GetUid() const; - const char* GetNameAsUTF8() const; - const char* GetLanguage() const; - const char* GetCodecNameAsUTF8() const; - const char* GetCodecId() const; - const unsigned char* GetCodecPrivate(size_t&) const; - bool GetLacing() const; - unsigned long long GetDefaultDuration() const; - unsigned long long GetCodecDelay() const; - unsigned long long GetSeekPreRoll() const; - - const BlockEntry* GetEOS() const; - - struct Settings { - long long start; - long long size; - }; - - class Info { - public: - Info(); - ~Info(); - int Copy(Info&) const; - void Clear(); - long type; - long number; - unsigned long long uid; - unsigned long long defaultDuration; - unsigned long long codecDelay; - unsigned long long seekPreRoll; - char* nameAsUTF8; - char* language; - char* codecId; - char* codecNameAsUTF8; - unsigned char* codecPrivate; - size_t codecPrivateSize; - bool lacing; - Settings settings; - - private: - Info(const Info&); - Info& operator=(const Info&); - int CopyStr(char* Info::*str, Info&) const; - }; - - long GetFirst(const BlockEntry*&) const; - long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const; - virtual bool VetEntry(const BlockEntry*) const; - virtual long Seek(long long time_ns, const BlockEntry*&) const; - - const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const; - unsigned long GetContentEncodingCount() const; - - long ParseContentEncodingsEntry(long long start, long long size); - - protected: - Track(Segment*, long long element_start, long long element_size); - - Info m_info; - - class EOSBlock : public BlockEntry { - public: - EOSBlock(); - - Kind GetKind() const; - const Block* GetBlock() const; - }; - - EOSBlock m_eos; - - private: - ContentEncoding** content_encoding_entries_; - ContentEncoding** content_encoding_entries_end_; -}; - -struct PrimaryChromaticity { - PrimaryChromaticity() : x(0), y(0) {} - ~PrimaryChromaticity() {} - static bool Parse(IMkvReader* reader, long long read_pos, - long long value_size, bool is_x, - PrimaryChromaticity** chromaticity); - float x; - float y; -}; - -struct MasteringMetadata { - static const float kValueNotPresent; - - MasteringMetadata() - : r(NULL), - g(NULL), - b(NULL), - white_point(NULL), - luminance_max(kValueNotPresent), - luminance_min(kValueNotPresent) {} - ~MasteringMetadata() { - delete r; - delete g; - delete b; - delete white_point; - } - - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, - MasteringMetadata** mastering_metadata); - - PrimaryChromaticity* r; - PrimaryChromaticity* g; - PrimaryChromaticity* b; - PrimaryChromaticity* white_point; - float luminance_max; - float luminance_min; -}; - -struct Colour { - static const long long kValueNotPresent; - - // Unless otherwise noted all values assigned upon construction are the - // equivalent of unspecified/default. - Colour() - : matrix_coefficients(kValueNotPresent), - bits_per_channel(kValueNotPresent), - chroma_subsampling_horz(kValueNotPresent), - chroma_subsampling_vert(kValueNotPresent), - cb_subsampling_horz(kValueNotPresent), - cb_subsampling_vert(kValueNotPresent), - chroma_siting_horz(kValueNotPresent), - chroma_siting_vert(kValueNotPresent), - range(kValueNotPresent), - transfer_characteristics(kValueNotPresent), - primaries(kValueNotPresent), - max_cll(kValueNotPresent), - max_fall(kValueNotPresent), - mastering_metadata(NULL) {} - ~Colour() { - delete mastering_metadata; - mastering_metadata = NULL; - } - - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, Colour** colour); - - long long matrix_coefficients; - long long bits_per_channel; - long long chroma_subsampling_horz; - long long chroma_subsampling_vert; - long long cb_subsampling_horz; - long long cb_subsampling_vert; - long long chroma_siting_horz; - long long chroma_siting_vert; - long long range; - long long transfer_characteristics; - long long primaries; - long long max_cll; - long long max_fall; - - MasteringMetadata* mastering_metadata; -}; - -struct Projection { - enum ProjectionType { - kTypeNotPresent = -1, - kRectangular = 0, - kEquirectangular = 1, - kCubeMap = 2, - kMesh = 3, - }; - static const float kValueNotPresent; - Projection() - : type(kTypeNotPresent), - private_data(NULL), - private_data_length(0), - pose_yaw(kValueNotPresent), - pose_pitch(kValueNotPresent), - pose_roll(kValueNotPresent) {} - ~Projection() { delete[] private_data; } - static bool Parse(IMkvReader* reader, long long element_start, - long long element_size, Projection** projection); - - ProjectionType type; - unsigned char* private_data; - size_t private_data_length; - float pose_yaw; - float pose_pitch; - float pose_roll; -}; - -class VideoTrack : public Track { - VideoTrack(const VideoTrack&); - VideoTrack& operator=(const VideoTrack&); - - VideoTrack(Segment*, long long element_start, long long element_size); - - public: - virtual ~VideoTrack(); - static long Parse(Segment*, const Info&, long long element_start, - long long element_size, VideoTrack*&); - - long long GetWidth() const; - long long GetHeight() const; - long long GetDisplayWidth() const; - long long GetDisplayHeight() const; - long long GetDisplayUnit() const; - long long GetStereoMode() const; - double GetFrameRate() const; - - bool VetEntry(const BlockEntry*) const; - long Seek(long long time_ns, const BlockEntry*&) const; - - Colour* GetColour() const; - - Projection* GetProjection() const; - - private: - long long m_width; - long long m_height; - long long m_display_width; - long long m_display_height; - long long m_display_unit; - long long m_stereo_mode; - - double m_rate; - - Colour* m_colour; - Projection* m_projection; -}; - -class AudioTrack : public Track { - AudioTrack(const AudioTrack&); - AudioTrack& operator=(const AudioTrack&); - - AudioTrack(Segment*, long long element_start, long long element_size); - - public: - static long Parse(Segment*, const Info&, long long element_start, - long long element_size, AudioTrack*&); - - double GetSamplingRate() const; - long long GetChannels() const; - long long GetBitDepth() const; - - private: - double m_rate; - long long m_channels; - long long m_bitDepth; -}; - -class Tracks { - Tracks(const Tracks&); - Tracks& operator=(const Tracks&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Tracks(Segment*, long long start, long long size, long long element_start, - long long element_size); - - ~Tracks(); - - long Parse(); - - unsigned long GetTracksCount() const; - - const Track* GetTrackByNumber(long tn) const; - const Track* GetTrackByIndex(unsigned long idx) const; - - private: - Track** m_trackEntries; - Track** m_trackEntriesEnd; - - long ParseTrackEntry(long long payload_start, long long payload_size, - long long element_start, long long element_size, - Track*&) const; -}; - -class Chapters { - Chapters(const Chapters&); - Chapters& operator=(const Chapters&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Chapters(Segment*, long long payload_start, long long payload_size, - long long element_start, long long element_size); - - ~Chapters(); - - long Parse(); - - class Atom; - class Edition; - - class Display { - friend class Atom; - Display(); - Display(const Display&); - ~Display(); - Display& operator=(const Display&); - - public: - const char* GetString() const; - const char* GetLanguage() const; - const char* GetCountry() const; - - private: - void Init(); - void ShallowCopy(Display&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - char* m_string; - char* m_language; - char* m_country; - }; - - class Atom { - friend class Edition; - Atom(); - Atom(const Atom&); - ~Atom(); - Atom& operator=(const Atom&); - - public: - unsigned long long GetUID() const; - const char* GetStringUID() const; - - long long GetStartTimecode() const; - long long GetStopTimecode() const; - - long long GetStartTime(const Chapters*) const; - long long GetStopTime(const Chapters*) const; - - int GetDisplayCount() const; - const Display* GetDisplay(int index) const; - - private: - void Init(); - void ShallowCopy(Atom&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - static long long GetTime(const Chapters*, long long timecode); - - long ParseDisplay(IMkvReader*, long long pos, long long size); - bool ExpandDisplaysArray(); - - char* m_string_uid; - unsigned long long m_uid; - long long m_start_timecode; - long long m_stop_timecode; - - Display* m_displays; - int m_displays_size; - int m_displays_count; - }; - - class Edition { - friend class Chapters; - Edition(); - Edition(const Edition&); - ~Edition(); - Edition& operator=(const Edition&); - - public: - int GetAtomCount() const; - const Atom* GetAtom(int index) const; - - private: - void Init(); - void ShallowCopy(Edition&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - long ParseAtom(IMkvReader*, long long pos, long long size); - bool ExpandAtomsArray(); - - Atom* m_atoms; - int m_atoms_size; - int m_atoms_count; - }; - - int GetEditionCount() const; - const Edition* GetEdition(int index) const; - - private: - long ParseEdition(long long pos, long long size); - bool ExpandEditionsArray(); - - Edition* m_editions; - int m_editions_size; - int m_editions_count; -}; - -class Tags { - Tags(const Tags&); - Tags& operator=(const Tags&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - Tags(Segment*, long long payload_start, long long payload_size, - long long element_start, long long element_size); - - ~Tags(); - - long Parse(); - - class Tag; - class SimpleTag; - - class SimpleTag { - friend class Tag; - SimpleTag(); - SimpleTag(const SimpleTag&); - ~SimpleTag(); - SimpleTag& operator=(const SimpleTag&); - - public: - const char* GetTagName() const; - const char* GetTagString() const; - - private: - void Init(); - void ShallowCopy(SimpleTag&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - char* m_tag_name; - char* m_tag_string; - }; - - class Tag { - friend class Tags; - Tag(); - Tag(const Tag&); - ~Tag(); - Tag& operator=(const Tag&); - - public: - int GetSimpleTagCount() const; - const SimpleTag* GetSimpleTag(int index) const; - - private: - void Init(); - void ShallowCopy(Tag&) const; - void Clear(); - long Parse(IMkvReader*, long long pos, long long size); - - long ParseSimpleTag(IMkvReader*, long long pos, long long size); - bool ExpandSimpleTagsArray(); - - SimpleTag* m_simple_tags; - int m_simple_tags_size; - int m_simple_tags_count; - }; - - int GetTagCount() const; - const Tag* GetTag(int index) const; - - private: - long ParseTag(long long pos, long long size); - bool ExpandTagsArray(); - - Tag* m_tags; - int m_tags_size; - int m_tags_count; -}; - -class SegmentInfo { - SegmentInfo(const SegmentInfo&); - SegmentInfo& operator=(const SegmentInfo&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - SegmentInfo(Segment*, long long start, long long size, - long long element_start, long long element_size); - - ~SegmentInfo(); - - long Parse(); - - long long GetTimeCodeScale() const; - long long GetDuration() const; // scaled - const char* GetMuxingAppAsUTF8() const; - const char* GetWritingAppAsUTF8() const; - const char* GetTitleAsUTF8() const; - - private: - long long m_timecodeScale; - double m_duration; - char* m_pMuxingAppAsUTF8; - char* m_pWritingAppAsUTF8; - char* m_pTitleAsUTF8; -}; - -class SeekHead { - SeekHead(const SeekHead&); - SeekHead& operator=(const SeekHead&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - SeekHead(Segment*, long long start, long long size, long long element_start, - long long element_size); - - ~SeekHead(); - - long Parse(); - - struct Entry { - Entry(); - - // the SeekHead entry payload - long long id; - long long pos; - - // absolute pos of SeekEntry ID - long long element_start; - - // SeekEntry ID size + size size + payload - long long element_size; - }; - - int GetCount() const; - const Entry* GetEntry(int idx) const; - - struct VoidElement { - // absolute pos of Void ID - long long element_start; - - // ID size + size size + payload size - long long element_size; - }; - - int GetVoidElementCount() const; - const VoidElement* GetVoidElement(int idx) const; - - private: - Entry* m_entries; - int m_entry_count; - - VoidElement* m_void_elements; - int m_void_element_count; - - static bool ParseEntry(IMkvReader*, - long long pos, // payload - long long size, Entry*); -}; - -class Cues; -class CuePoint { - friend class Cues; - - CuePoint(long, long long); - ~CuePoint(); - - CuePoint(const CuePoint&); - CuePoint& operator=(const CuePoint&); - - public: - long long m_element_start; - long long m_element_size; - - bool Load(IMkvReader*); - - long long GetTimeCode() const; // absolute but unscaled - long long GetTime(const Segment*) const; // absolute and scaled (ns units) - - struct TrackPosition { - long long m_track; - long long m_pos; // of cluster - long long m_block; - // codec_state //defaults to 0 - // reference = clusters containing req'd referenced blocks - // reftime = timecode of the referenced block - - bool Parse(IMkvReader*, long long, long long); - }; - - const TrackPosition* Find(const Track*) const; - - private: - const long m_index; - long long m_timecode; - TrackPosition* m_track_positions; - size_t m_track_positions_count; -}; - -class Cues { - friend class Segment; - - Cues(Segment*, long long start, long long size, long long element_start, - long long element_size); - ~Cues(); - - Cues(const Cues&); - Cues& operator=(const Cues&); - - public: - Segment* const m_pSegment; - const long long m_start; - const long long m_size; - const long long m_element_start; - const long long m_element_size; - - bool Find( // lower bound of time_ns - long long time_ns, const Track*, const CuePoint*&, - const CuePoint::TrackPosition*&) const; - - const CuePoint* GetFirst() const; - const CuePoint* GetLast() const; - const CuePoint* GetNext(const CuePoint*) const; - - const BlockEntry* GetBlock(const CuePoint*, - const CuePoint::TrackPosition*) const; - - bool LoadCuePoint() const; - long GetCount() const; // loaded only - // long GetTotal() const; //loaded + preloaded - bool DoneParsing() const; - - private: - bool Init() const; - bool PreloadCuePoint(long&, long long) const; - - mutable CuePoint** m_cue_points; - mutable long m_count; - mutable long m_preload_count; - mutable long long m_pos; -}; - -class Cluster { - friend class Segment; - - Cluster(const Cluster&); - Cluster& operator=(const Cluster&); - - public: - Segment* const m_pSegment; - - public: - static Cluster* Create(Segment*, - long index, // index in segment - long long off); // offset relative to segment - // long long element_size); - - Cluster(); // EndOfStream - ~Cluster(); - - bool EOS() const; - - long long GetTimeCode() const; // absolute, but not scaled - long long GetTime() const; // absolute, and scaled (nanosecond units) - long long GetFirstTime() const; // time (ns) of first (earliest) block - long long GetLastTime() const; // time (ns) of last (latest) block - - long GetFirst(const BlockEntry*&) const; - long GetLast(const BlockEntry*&) const; - long GetNext(const BlockEntry* curr, const BlockEntry*& next) const; - - const BlockEntry* GetEntry(const Track*, long long ns = -1) const; - const BlockEntry* GetEntry(const CuePoint&, - const CuePoint::TrackPosition&) const; - // const BlockEntry* GetMaxKey(const VideoTrack*) const; - - // static bool HasBlockEntries(const Segment*, long long); - - static long HasBlockEntries(const Segment*, long long idoff, long long& pos, - long& size); - - long GetEntryCount() const; - - long Load(long long& pos, long& size) const; - - long Parse(long long& pos, long& size) const; - long GetEntry(long index, const mkvparser::BlockEntry*&) const; - - protected: - Cluster(Segment*, long index, long long element_start); - // long long element_size); - - public: - const long long m_element_start; - long long GetPosition() const; // offset relative to segment - - long GetIndex() const; - long long GetElementSize() const; - // long long GetPayloadSize() const; - - // long long Unparsed() const; - - private: - long m_index; - mutable long long m_pos; - // mutable long long m_size; - mutable long long m_element_size; - mutable long long m_timecode; - mutable BlockEntry** m_entries; - mutable long m_entries_size; - mutable long m_entries_count; - - long ParseSimpleBlock(long long, long long&, long&); - long ParseBlockGroup(long long, long long&, long&); - - long CreateBlock(long long id, long long pos, long long size, - long long discard_padding); - long CreateBlockGroup(long long start_offset, long long size, - long long discard_padding); - long CreateSimpleBlock(long long, long long); -}; - -class Segment { - friend class Cues; - friend class Track; - friend class VideoTrack; - - Segment(const Segment&); - Segment& operator=(const Segment&); - - private: - Segment(IMkvReader*, long long elem_start, - // long long elem_size, - long long pos, long long size); - - public: - IMkvReader* const m_pReader; - const long long m_element_start; - // const long long m_element_size; - const long long m_start; // posn of segment payload - const long long m_size; // size of segment payload - Cluster m_eos; // TODO: make private? - - static long long CreateInstance(IMkvReader*, long long, Segment*&); - ~Segment(); - - long Load(); // loads headers and all clusters - - // for incremental loading - // long long Unparsed() const; - bool DoneParsing() const; - long long ParseHeaders(); // stops when first cluster is found - // long FindNextCluster(long long& pos, long& size) const; - long LoadCluster(long long& pos, long& size); // load one cluster - long LoadCluster(); - - long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos, - long& size); - - const SeekHead* GetSeekHead() const; - const Tracks* GetTracks() const; - const SegmentInfo* GetInfo() const; - const Cues* GetCues() const; - const Chapters* GetChapters() const; - const Tags* GetTags() const; - - long long GetDuration() const; - - unsigned long GetCount() const; - const Cluster* GetFirst() const; - const Cluster* GetLast() const; - const Cluster* GetNext(const Cluster*); - - const Cluster* FindCluster(long long time_nanoseconds) const; - // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const; - - const Cluster* FindOrPreloadCluster(long long pos); - - long ParseCues(long long cues_off, // offset relative to start of segment - long long& parse_pos, long& parse_len); - - private: - long long m_pos; // absolute file posn; what has been consumed so far - Cluster* m_pUnknownSize; - - SeekHead* m_pSeekHead; - SegmentInfo* m_pInfo; - Tracks* m_pTracks; - Cues* m_pCues; - Chapters* m_pChapters; - Tags* m_pTags; - Cluster** m_clusters; - long m_clusterCount; // number of entries for which m_index >= 0 - long m_clusterPreloadCount; // number of entries for which m_index < 0 - long m_clusterSize; // array size - - long DoLoadCluster(long long&, long&); - long DoLoadClusterUnknownSize(long long&, long&); - long DoParseNext(const Cluster*&, long long&, long&); - - bool AppendCluster(Cluster*); - bool PreloadCluster(Cluster*, ptrdiff_t); - - // void ParseSeekHead(long long pos, long long size); - // void ParseSeekEntry(long long pos, long long size); - // void ParseCues(long long); - - const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&); -}; - -} // namespace mkvparser - -inline long mkvparser::Segment::LoadCluster() { - long long pos; - long size; - - return LoadCluster(pos, size); -} - -#endif // MKVPARSER_MKVPARSER_H_ diff --git a/thirdparty/libvpx/AUTHORS b/thirdparty/libvpx/AUTHORS deleted file mode 100644 index fcd5c534a8..0000000000 --- a/thirdparty/libvpx/AUTHORS +++ /dev/null @@ -1,142 +0,0 @@ -# This file is automatically generated from the git commit history -# by tools/gen_authors.sh. - -Aaron Watry <awatry@gmail.com> -Abo Talib Mahfoodh <ab.mahfoodh@gmail.com> -Adam Xu <adam@xuyaowu.com> -Adrian Grange <agrange@google.com> -Aℓex Converse <aconverse@google.com> -Ahmad Sharif <asharif@google.com> -Alexander Voronov <avoronov@graphics.cs.msu.ru> -Alexis Ballier <aballier@gentoo.org> -Alok Ahuja <waveletcoeff@gmail.com> -Alpha Lam <hclam@google.com> -A.Mahfoodh <ab.mahfoodh@gmail.com> -Ami Fischman <fischman@chromium.org> -Andoni Morales Alastruey <ylatuya@gmail.com> -Andres Mejia <mcitadel@gmail.com> -Andrew Russell <anrussell@google.com> -Angie Chiang <angiebird@google.com> -Aron Rosenberg <arosenberg@logitech.com> -Attila Nagy <attilanagy@google.com> -Brion Vibber <bvibber@wikimedia.org> -changjun.yang <changjun.yang@intel.com> -Charles 'Buck' Krasic <ckrasic@google.com> -chm <chm@rock-chips.com> -Christian Duvivier <cduvivier@google.com> -Daniele Castagna <dcastagna@chromium.org> -Daniel Kang <ddkang@google.com> -Deb Mukherjee <debargha@google.com> -Dim Temp <dimtemp0@gmail.com> -Dmitry Kovalev <dkovalev@google.com> -Dragan Mrdjan <dmrdjan@mips.com> -Ed Baker <edward.baker@intel.com> -Ehsan Akhgari <ehsan.akhgari@gmail.com> -Erik Niemeyer <erik.a.niemeyer@intel.com> -Fabio Pedretti <fabio.ped@libero.it> -Frank Galligan <fgalligan@google.com> -Fredrik Söderquist <fs@opera.com> -Fritz Koenig <frkoenig@google.com> -Gaute Strokkenes <gaute.strokkenes@broadcom.com> -Geza Lore <gezalore@gmail.com> -Ghislain MARY <ghislainmary2@gmail.com> -Giuseppe Scrivano <gscrivano@gnu.org> -Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com> -Guillaume Martres <gmartres@google.com> -Guillermo Ballester Valor <gbvalor@gmail.com> -Hangyu Kuang <hkuang@google.com> -Hanno Böck <hanno@hboeck.de> -Henrik Lundin <hlundin@google.com> -Hui Su <huisu@google.com> -Ivan Maltz <ivanmaltz@google.com> -Jacek Caban <cjacek@gmail.com> -Jacky Chen <jackychen@google.com> -James Berry <jamesberry@google.com> -James Yu <james.yu@linaro.org> -James Zern <jzern@google.com> -Jan Gerber <j@mailb.org> -Jan Kratochvil <jan.kratochvil@redhat.com> -Janne Salonen <jsalonen@google.com> -Jean-Yves Avenard <jyavenard@mozilla.com> -Jeff Faust <jfaust@google.com> -Jeff Muizelaar <jmuizelaar@mozilla.com> -Jeff Petkau <jpet@chromium.org> -Jia Jia <jia.jia@linaro.org> -Jian Zhou <zhoujian@google.com> -Jim Bankoski <jimbankoski@google.com> -Jingning Han <jingning@google.com> -Joey Parrish <joeyparrish@google.com> -Johann Koenig <johannkoenig@google.com> -John Koleszar <jkoleszar@google.com> -Johnny Klonaris <google@jawknee.com> -John Stark <jhnstrk@gmail.com> -Joshua Bleecher Snyder <josh@treelinelabs.com> -Joshua Litt <joshualitt@google.com> -Julia Robson <juliamrobson@gmail.com> -Justin Clift <justin@salasaga.org> -Justin Lebar <justin.lebar@gmail.com> -KO Myung-Hun <komh@chollian.net> -Lawrence Velázquez <larryv@macports.org> -Linfeng Zhang <linfengz@google.com> -Lou Quillio <louquillio@google.com> -Luca Barbato <lu_zero@gentoo.org> -Makoto Kato <makoto.kt@gmail.com> -Mans Rullgard <mans@mansr.com> -Marco Paniconi <marpan@google.com> -Mark Mentovai <mark@chromium.org> -Martin Ettl <ettl.martin78@googlemail.com> -Martin Storsjo <martin@martin.st> -Matthew Heaney <matthewjheaney@chromium.org> -Michael Kohler <michaelkohler@live.com> -Mike Frysinger <vapier@chromium.org> -Mike Hommey <mhommey@mozilla.com> -Mikhal Shemer <mikhal@google.com> -Minghai Shang <minghai@google.com> -Morton Jonuschat <yabawock@gmail.com> -Nico Weber <thakis@chromium.org> -Parag Salasakar <img.mips1@gmail.com> -Pascal Massimino <pascal.massimino@gmail.com> -Patrik Westin <patrik.westin@gmail.com> -Paul Wilkins <paulwilkins@google.com> -Pavol Rusnak <stick@gk2.sk> -Paweł Hajdan <phajdan@google.com> -Pengchong Jin <pengchong@google.com> -Peter de Rivaz <peter.derivaz@gmail.com> -Philip Jägenstedt <philipj@opera.com> -Priit Laes <plaes@plaes.org> -Rafael Ávila de Espíndola <rafael.espindola@gmail.com> -Rafaël Carré <funman@videolan.org> -Ralph Giles <giles@xiph.org> -Rob Bradford <rob@linux.intel.com> -Ronald S. Bultje <rsbultje@gmail.com> -Rui Ueyama <ruiu@google.com> -Sami Pietilä <samipietila@google.com> -Sasi Inguva <isasi@google.com> -Scott Graham <scottmg@chromium.org> -Scott LaVarnway <slavarnway@google.com> -Sean McGovern <gseanmcg@gmail.com> -Sergey Kolomenkin <kolomenkin@gmail.com> -Sergey Ulanov <sergeyu@chromium.org> -Shimon Doodkin <helpmepro1@gmail.com> -Shunyao Li <shunyaoli@google.com> -Stefan Holmer <holmer@google.com> -Suman Sunkara <sunkaras@google.com> -Taekhyun Kim <takim@nvidia.com> -Takanori MATSUURA <t.matsuu@gmail.com> -Tamar Levy <tamar.levy@intel.com> -Tao Bai <michaelbai@chromium.org> -Tero Rintaluoma <teror@google.com> -Thijs Vermeir <thijsvermeir@gmail.com> -Tim Kopp <tkopp@google.com> -Timothy B. Terriberry <tterribe@xiph.org> -Tom Finegan <tomfinegan@google.com> -Vignesh Venkatasubramanian <vigneshv@google.com> -Yaowu Xu <yaowu@google.com> -Yi Luo <luoyi@google.com> -Yongzhe Wang <yongzhe@google.com> -Yunqing Wang <yunqingwang@google.com> -Yury Gitman <yuryg@google.com> -Zoe Liu <zoeliu@google.com> -Google Inc. -The Mozilla Foundation -The Xiph.Org Foundation diff --git a/thirdparty/libvpx/CHANGELOG b/thirdparty/libvpx/CHANGELOG deleted file mode 100644 index 795d395f96..0000000000 --- a/thirdparty/libvpx/CHANGELOG +++ /dev/null @@ -1,654 +0,0 @@ -2016-07-20 v1.6.0 "Khaki Campbell Duck" - This release improves upon the VP9 encoder and speeds up the encoding and - decoding processes. - - - Upgrading: - This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum - in vpx_image and some minor changes to the VP8_COMP structure. - - The default key frame interval for VP9 has changed from 128 to 9999. - - - Enhancement: - A core focus has been performance for low end Intel processors. SSSE3 - instructions such as 'pshufb' have been avoided and instructions have been - reordered to better accommodate the more constrained pipelines. - - As a result, devices based on Celeron processors have seen substantial - decoding improvements. From Indian Runner Duck to Javan Whistling Duck, - decoding speed improved between 10 and 30%. Between Javan Whistling Duck - and Khaki Campbell Duck, it improved another 10 to 15%. - - While Celeron benefited most, Core-i5 also improved 5% and 10% between the - respective releases. - - Realtime performance for WebRTC for both speed and quality has received a - lot of attention. - - - Bug Fixes: - A number of fuzzing issues, found variously by Mozilla, Chromium and others, - have been fixed and we strongly recommend updating. - -2015-11-09 v1.5.0 "Javan Whistling Duck" - This release improves upon the VP9 encoder and speeds up the encoding and - decoding processes. - - - Upgrading: - This release is ABI incompatible with 1.4.0. It drops deprecated VP8 - controls and adds a variety of VP9 controls for testing. - - The vpxenc utility now prefers VP9 by default. - - - Enhancements: - Faster VP9 encoding and decoding - Smaller library size by combining functions used by VP8 and VP9 - - - Bug Fixes: - A variety of fuzzing issues - -2015-04-03 v1.4.0 "Indian Runner Duck" - This release includes significant improvements to the VP9 codec. - - - Upgrading: - This release is ABI incompatible with 1.3.0. It drops the compatibility - layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec - controls for VP9. - - - Enhancements: - Faster VP9 encoding and decoding - Multithreaded VP9 decoding (tile and frame-based) - Multithreaded VP9 encoding - on by default - YUV 4:2:2 and 4:4:4 support in VP9 - 10 and 12bit support in VP9 - 64bit ARM support by replacing ARM assembly with intrinsics - - - Bug Fixes: - Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0 - files. - - - Known Issues: - Frame Parallel decoding fails for segmented and non-420 files. - -2013-11-15 v1.3.0 "Forest" - This release introduces the VP9 codec in a backward-compatible way. - All existing users of VP8 can continue to use the library without - modification. However, some VP8 options do not map to VP9 in the same manner. - - The VP9 encoder in this release is not feature complete. Users interested in - the encoder are advised to use the git master branch and discuss issues on - libvpx mailing lists. - - - Upgrading: - This release is ABI and API compatible with Duclair (v1.0.0). Users - of older releases should refer to the Upgrading notes in this document - for that release. - - - Enhancements: - Get rid of bashisms in the main build scripts - Added usage info on command line options - Add lossless compression mode - Dll build of libvpx - Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13) - Add option to disable documentation - configure: add --enable-external-build support - make: support V=1 as short form of verbose=yes - configure: support mingw-w64 - configure: support hardfloat armv7 CHOSTS - configure: add support for android x86 - Add estimated completion time to vpxenc - Don't exit on decode errors in vpxenc - vpxenc: support scaling prior to encoding - vpxdec: support scaling output - vpxenc: improve progress indicators with --skip - msvs: Don't link to winmm.lib - Add a new script for producing vcxproj files - Produce Visual Studio 10 and 11 project files - Produce Windows Phone project files - msvs-build: use msbuild for vs >= 2005 - configure: default configure log to config.log - Add encoding option --static-thresh - - - Speed: - Miscellaneous speed optimizations for VP8 and VP9. - - - Quality: - In general, quality is consistent with the Eider release. - - - Bug Fixes: - This release represents approximately a year of engineering effort, - and contains multiple bug fixes. Please refer to git history for details. - - -2012-12-21 v1.2.0 - This release acts as a checkpoint for a large amount of internal refactoring - and testing. It also contains a number of small bugfixes, so all users are - encouraged to upgrade. - - - Upgrading: - This release is ABI and API compatible with Duclair (v1.0.0). Users - of older releases should refer to the Upgrading notes in this - document for that release. - - - Enhancements: - VP8 optimizations for MIPS dspr2 - vpxenc: add -quiet option - - - Speed: - Encoder and decoder speed is consistent with the Eider release. - - - Quality: - In general, quality is consistent with the Eider release. - - Minor tweaks to ARNR filtering - Minor improvements to real time encoding with multiple temporal layers - - - Bug Fixes: - Fixes multithreaded encoder race condition in loopfilter - Fixes multi-resolution threaded encoding - Fix potential encoder dead-lock after picture resize - - -2012-05-09 v1.1.0 "Eider" - This introduces a number of enhancements, mostly focused on real-time - encoding. In addition, it fixes a decoder bug (first introduced in - Duclair) so all users of that release are encouraged to upgrade. - - - Upgrading: - This release is ABI and API compatible with Duclair (v1.0.0). Users - of older releases should refer to the Upgrading notes in this - document for that release. - - This release introduces a new temporal denoiser, controlled by the - VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not - currently take a strength parameter, so the control is effectively - a boolean - zero (off) or non-zero (on). For compatibility with - existing applications, the values accepted are the same as those - for the spatial denoiser (0-6). The temporal denoiser is enabled - by default, and the older spatial denoiser may be restored by - configuring with --disable-temporal-denoising. The temporal denoiser - is more computationally intensive than the spatial one. - - This release removes support for a legacy, decode only API that was - supported, but deprecated, at the initial release of libvpx - (v0.9.0). This is not expected to have any impact. If you are - impacted, you can apply a reversion to commit 2bf8fb58 locally. - Please update to the latest libvpx API if you are affected. - - - Enhancements: - Adds a motion compensated temporal denoiser to the encoder, which - gives higher quality than the older spatial denoiser. (See above - for notes on upgrading). - - In addition, support for new compilers and platforms were added, - including: - improved support for XCode - Android x86 NDK build - OS/2 support - SunCC support - - Changing resolution with vpx_codec_enc_config_set() is now - supported. Previously, reinitializing the codec was required to - change the input resolution. - - The vpxenc application has initial support for producing multiple - encodes from the same input in one call. Resizing is not yet - supported, but varying other codec parameters is. Use -- to - delineate output streams. Options persist from one stream to the - next. - - Also, the vpxenc application will now use a keyframe interval of - 5 seconds by default. Use the --kf-max-dist option to override. - - - Speed: - Decoder performance improved 2.5% versus Duclair. Encoder speed is - consistent with Duclair for most material. Two pass encoding of - slideshow-like material will see significant improvements. - - Large realtime encoding speed gains at a small quality expense are - possible by configuring the on-the-fly bitpacking experiment with - --enable-onthefly-bitpacking. Realtime encoder can be up to 13% - faster (ARM) depending on the number of threads and bitrate - settings. This technique sees constant gain over the 5-16 speed - range. For VC style input the loss seen is up to 0.2dB. See commit - 52cf4dca for further details. - - - Quality: - On the whole, quality is consistent with the Duclair release. Some - tweaks: - - Reduced blockiness in easy sections by applying a penalty to - intra modes. - - Improved quality of static sections (like slideshows) with - two pass encoding. - - Improved keyframe sizing with multiple temporal layers - - - Bug Fixes: - Corrected alt-ref contribution to frame rate for visible updates - to the alt-ref buffer. This affected applications making manual - usage of the frame reference flags, or temporal layers. - - Additional constraints were added to disable multi-frame quality - enhancement (MFQE) in sections of the frame where there is motion. - (#392) - - Fixed corruption issues when vpx_codec_enc_config_set() was called - with spatial resampling enabled. - - Fixed a decoder error introduced in Duclair where the segmentation - map was not being reinitialized on keyframes (#378) - - -2012-01-27 v1.0.0 "Duclair" - Our fourth named release, focused on performance and features related to - real-time encoding. It also fixes a decoder crash bug introduced in - v0.9.7, so all users of that release are encouraged to upgrade. - - - Upgrading: - This release is ABI incompatible with prior releases of libvpx, so the - "major" version number has been bumped to 1. You must recompile your - applications against the latest version of the libvpx headers. The - API remains compatible, and this should not require code changes in most - applications. - - - Enhancements: - This release introduces several substantial new features to the encoder, - of particular interest to real time streaming applications. - - Temporal scalability allows the encoder to produce a stream that can - be decimated to different frame rates, with independent rate targetting - for each substream. - - Multiframe quality enhancement postprocessing can make visual quality - more consistent in the presence of frames that are substantially - different quality than the surrounding frames, as in the temporal - scalability case and in some forced keyframe scenarios. - - Multiple-resolution encoding support allows the encoding of the - same content at different resolutions faster than encoding them - separately. - - - Speed: - Optimization targets for this release included the decoder and the real- - time modes of the encoder. Decoder speed on x86 has improved 10.5% with - this release. Encoder improvements followed a curve where speeds 1-3 - improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved - 1.5% to 10.5%, respectively. "Best" mode speed is consistent with the - Cayuga release. - - - Quality: - Encoder quality in the single stream case is consistent with the Cayuga - release. - - - Bug Fixes: - This release fixes an OOB read decoder crash bug present in v0.9.7 - related to the clamping of motion vectors in SPLITMV blocks. This - behavior could be triggered by corrupt input or by starting - decoding from a P-frame. - - -2011-08-15 v0.9.7-p1 "Cayuga" patch 1 - This is an incremental bugfix release against Cayuga. All users of that - release are strongly encouraged to upgrade. - - - Fix potential OOB reads (cdae03a) - - An unbounded out of bounds read was discovered when the - decoder was requested to perform error concealment (new in - Cayuga) given a frame with corrupt partition sizes. - - A bounded out of bounds read was discovered affecting all - versions of libvpx. Given an multipartition input frame that - is truncated between the mode/mv partition and the first - residiual paritition (in the block of partition offsets), up - to 3 extra bytes could have been read from the source buffer. - The code will not take any action regardless of the contents - of these undefined bytes, as the truncated buffer is detected - immediately following the read based on the calculated - starting position of the coefficient partition. - - - Fix potential error concealment crash when the very first frame - is missing or corrupt (a609be5) - - - Fix significant artifacts in error concealment (a4c2211, 99d870a) - - - Revert 1-pass CBR rate control changes (e961317) - Further testing showed this change produced undesirable visual - artifacts, rolling back for now. - - -2011-08-02 v0.9.7 "Cayuga" - Our third named release, focused on a faster, higher quality, encoder. - - - Upgrading: - This release is backwards compatible with Aylesbury (v0.9.5) and - Bali (v0.9.6). Users of older releases should refer to the Upgrading - notes in this document for that release. - - - Enhancements: - Stereo 3D format support for vpxenc - Runtime detection of available processor cores. - Allow specifying --end-usage by enum name - vpxdec: test for frame corruption - vpxenc: add quantizer histogram display - vpxenc: add rate histogram display - Set VPX_FRAME_IS_DROPPABLE - update configure for ios sdk 4.3 - Avoid text relocations in ARM vp8 decoder - Generate a vpx.pc file for pkg-config. - New ways of passing encoded data between encoder and decoder. - - - Speed: - This release includes across-the-board speed improvements to the - encoder. On x86, these measure at approximately 11.5% in Best mode, - 21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6). - On ARM Cortex A9 with Neon extensions, real-time encoding of video - telephony content is 35% faster than Bali on single core and 48% - faster on multi-core. On the NVidia Tegra2 platform, real time - encoding is 40% faster than Bali. - - Decoder speed was not a priority for this release, but improved - approximately 8.4% on x86. - - Reduce motion vector search on alt-ref frame. - Encoder loopfilter running in its own thread - Reworked loopfilter to precalculate more parameters - SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}(). - Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3. - Removed redundant checks - Reduced structure sizes - utilize preload in ARMv6 MC/LPF/Copy routines - ARM optimized quantization, dfct, variance, subtract - Increase chrow row alignment to 16 bytes. - disable trellis optimization for first pass - Write SSSE3 sub-pixel filter function - Improve SSE2 half-pixel filter funtions - Add vp8_sub_pixel_variance16x8_ssse3 function - Reduce unnecessary distortion computation - Use diamond search to replace full search - Preload reference area in sub-pixel motion search (real-time mode) - - - Quality: - This release focused primarily on one-pass use cases, including - video conferencing. Low latency data rate control was significantly - improved, improving streamability over bandwidth constrained links. - Added support for error concealment, allowing frames to maintain - visual quality in the presence of substantial packet loss. - - Add rc_max_intra_bitrate_pct control - Limit size of initial keyframe in one-pass. - Improve framerate adaptation - Improved 1-pass CBR rate control - Improved KF insertion after fades to still. - Improved key frame detection. - Improved activity masking (lower PSNR impact for same SSIM boost) - Improved interaction between GF and ARFs - Adding error-concealment to the decoder. - Adding support for independent partitions - Adjusted rate-distortion constants - - - - Bug Fixes: - Removed firstpass motion map - Fix parallel make install - Fix multithreaded encoding for 1 MB wide frame - Fixed iwalsh_neon build problems with RVDS4.1 - Fix semaphore emulation, spin-wait intrinsics on Windows - Fix build with xcode4 and simplify GLOBAL. - Mark ARM asm objects as allowing a non-executable stack. - Fix vpxenc encoding incorrect webm file header on big endian - - -2011-03-07 v0.9.6 "Bali" - Our second named release, focused on a faster, higher quality, encoder. - - - Upgrading: - This release is backwards compatible with Aylesbury (v0.9.5). Users - of older releases should refer to the Upgrading notes in this - document for that release. - - - Enhancements: - vpxenc --psnr shows a summary when encode completes - --tune=ssim option to enable activity masking - improved postproc visualizations for development - updated support for Apple iOS to SDK 4.2 - query decoder to determine which reference frames were updated - implemented error tracking in the decoder - fix pipe support on windows - - - Speed: - Primary focus was on good quality mode, speed 0. Average improvement - on x86 about 40%, up to 100% on user-generated content at that speed. - Best quality mode speed improved 35%, and realtime speed 10-20%. This - release also saw significant improvement in realtime encoding speed - on ARM platforms. - - Improved encoder threading - Dont pick encoder filter level when loopfilter is disabled. - Avoid double copying of key frames into alt and golden buffer - FDCT optimizations. - x86 sse2 temporal filter - SSSE3 version of fast quantizer - vp8_rd_pick_best_mbsegmentation code restructure - Adjusted breakout RD for SPLITMV - Changed segmentation check order - Improved rd_pick_intra4x4block - Adds armv6 optimized variance calculation - ARMv6 optimized sad16x16 - ARMv6 optimized half pixel variance calculations - Full search SAD function optimization in SSE4.1 - Improve MV prediction accuracy to achieve performance gain - Improve MV prediction in vp8_pick_inter_mode() for speed>3 - - - Quality: - Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release - also includes support for "activity masking," which greatly improves - SSIM at the expense of PSNR. For now, this feature is available with - the --tune=ssim option. Further experimentation in this area - is ongoing. This release also introduces a new rate control mode - called "CQ," which changes the allocation of bits within a clip to - the sections where they will have the most visual impact. - - Tuning for the more exact quantizer. - Relax rate control for last few frames - CQ Mode - Limit key frame quantizer for forced key frames. - KF/GF Pulsing - Add simple version of activity masking. - make rdmult adaptive for intra in quantizer RDO - cap the best quantizer for 2nd order DC - change the threshold of DC check for encode breakout - - - Bug Fixes: - Fix crash on Sparc Solaris. - Fix counter of fixed keyframe distance - ARNR filter pointer update bug fix - Fixed use of motion percentage in KF/GF group calc - Changed condition for using RD in Intra Mode - Fix encoder real-time only configuration. - Fix ARM encoder crash with multiple token partitions - Fixed bug first cluster timecode of webm file is wrong. - Fixed various encoder bugs with odd-sized images - vp8e_get_preview fixed when spatial resampling enabled - quantizer: fix assertion in fast quantizer path - Allocate source buffers to be multiples of 16 - Fix for manual Golden frame frequency - Fix drastic undershoot in long form content - - -2010-10-28 v0.9.5 "Aylesbury" - Our first named release, focused on a faster decoder, and a better encoder. - - - Upgrading: - This release incorporates backwards-incompatible changes to the - ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec. - - vpxdec - * the -q (quiet) option has been removed, and replaced with - -v (verbose). the output is quiet by default. Use -v to see - the version number of the binary. - - * The default behavior is now to write output to a single file - instead of individual frames. The -y option has been removed. - Y4M output is the default. - - * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12 - options must be specified. - - $ ivfdec -o OUTPUT INPUT - $ vpxdec --i420 -o OUTPUT INPUT - - * If an output file is not specified, the default is to write - Y4M to stdout. This makes piping more natural. - - $ ivfdec -y -o - INPUT | ... - $ vpxdec INPUT | ... - - * The output file has additional flexibility for formatting the - filename. It supports escape characters for constructing a - filename from the width, height, and sequence number. This - replaces the -p option. To get the equivalent: - - $ ivfdec -p frame INPUT - $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT - - vpxenc - * The output file must be specified with -o, rather than as the - last argument. - - $ ivfenc <options> INPUT OUTPUT - $ vpxenc <options> -o OUTPUT INPUT - - * The output defaults to webm. To get IVF output, use the --ivf - option. - - $ ivfenc <options> INPUT OUTPUT.ivf - $ vpxenc <options> -o OUTPUT.ivf --ivf INPUT - - - - Enhancements: - ivfenc and ivfdec have been renamed to vpxenc, vpxdec. - vpxdec supports .webm input - vpxdec writes .y4m by default - vpxenc writes .webm output by default - vpxenc --psnr now shows the average/overall PSNR at the end - ARM platforms now support runtime cpu detection - vpxdec visualizations added for motion vectors, block modes, references - vpxdec now silent by default - vpxdec --progress shows frame-by-frame timing information - vpxenc supports the distinction between --fps and --timebase - NASM is now a supported assembler - configure: enable PIC for shared libs by default - configure: add --enable-small - configure: support for ppc32-linux-gcc - configure: support for sparc-solaris-gcc - - - Bugs: - Improve handling of invalid frames - Fix valgrind errors in the NEON loop filters. - Fix loopfilter delta zero transitions - Fix valgrind errors in vp8_sixtap_predict8x4_armv6(). - Build fixes for darwin-icc - - - Speed: - 20-40% (average 28%) improvement in libvpx decoder speed, - including: - Rewrite vp8_short_walsh4x4_sse2() - Optimizations on the loopfilters. - Miscellaneous improvements for Atom - Add 4-tap version of 2nd-pass ARMv6 MC filter. - Improved multithread utilization - Better instruction choices on x86 - reorder data to use wider instructions - Update NEON wide idcts - Make block access to frame buffer sequential - Improved subset block search - Bilinear subpixel optimizations for ssse3. - Decrease memory footprint - - Encoder speed improvements (percentage gain not measured): - Skip unnecessary search of identical frames - Add SSE2 subtract functions - Improve bounds checking in vp8_diamond_search_sadx4() - Added vp8_fast_quantize_b_sse2 - - - Quality: - Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality - encoding mode, and up to 60% improvement on very noisy, still - or slow moving source video - - Motion compensated temporal filter for Alt-Ref Noise Reduction - Improved use of trellis quantization on 2nd order Y blocks - Tune effect of motion on KF/GF boost in two pass - Allow coefficient optimization for good quality speed 0. - Improved control of active min quantizer for two pass. - Enable ARFs for non-lagged compress - -2010-09-02 v0.9.2 - - Enhancements: - Disable frame dropping by default - Improved multithreaded performance - Improved Force Key Frame Behaviour - Increased rate control buffer level precision - Fix bug in 1st pass motion compensation - ivfenc: correct fixed kf interval, --disable-kf - - Speed: - Changed above and left context data layout - Rework idct calling structure. - Removed unnecessary MB_MODE_INFO copies - x86: SSSE3 sixtap prediction - Reworked IDCT to include reconstruction (add) step - Swap alt/gold/new/last frame buffer ptrs instead of copying. - Improve SSE2 loopfilter functions - Change bitreader to use a larger window. - Avoid loopfilter reinitialization when possible - - Quality: - Normalize quantizer's zero bin and rounding factors - Add trellis quantization. - Make the quantizer exact. - Updates to ARNR filtering algorithm - Fix breakout thresh computation for golden & AltRef frames - Redo the forward 4x4 dct - Improve the accuracy of forward walsh-hadamard transform - Further adjustment of RD behaviour with Q and Zbin. - - Build System: - Allow linking of libs built with MinGW to MSVC - Fix target auto-detection on mingw32 - Allow --cpu= to work for x86. - configure: pass original arguments through to make dist - Fix builds without runtime CPU detection - msvs: fix install of codec sources - msvs: Change devenv.com command line for better msys support - msvs: Add vs9 targets. - Add x86_64-linux-icc target - - Bugs: - Potential crashes on older MinGW builds - Fix two-pass framrate for Y4M input. - Fixed simple loop filter, other crashes on ARM v6 - arm: fix missing dependency with --enable-shared - configure: support directories containing .o - Replace pinsrw (SSE) with MMX instructions - apple: include proper mach primatives - Fixed rate control bug with long key frame interval. - Fix DSO link errors on x86-64 when not using a version script - Fixed buffer selection for UV in AltRef filtering - - -2010-06-17 v0.9.1 - - Enhancements: - * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O - * Speed optimizations - - Bugfixes: - * Rate control - * Prevent out-of-bounds accesses on invalid data - - Build system updates: - * Detect toolchain to be used automatically for native builds - * Support building shared libraries - * Better autotools emulation (--prefix, --libdir, DESTDIR) - - Updated LICENSE - * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html - - -2010-05-18 v0.9.0 - - Initial open source release. Welcome to WebM and VP8! - diff --git a/thirdparty/libvpx/LICENSE b/thirdparty/libvpx/LICENSE deleted file mode 100644 index 1ce44343c4..0000000000 --- a/thirdparty/libvpx/LICENSE +++ /dev/null @@ -1,31 +0,0 @@ -Copyright (c) 2010, The WebM Project authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google, nor the WebM Project, nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/thirdparty/libvpx/PATENTS b/thirdparty/libvpx/PATENTS deleted file mode 100644 index caedf607e9..0000000000 --- a/thirdparty/libvpx/PATENTS +++ /dev/null @@ -1,23 +0,0 @@ -Additional IP Rights Grant (Patents) ------------------------------------- - -"These implementations" means the copyrightable works that implement the WebM -codecs distributed by Google as part of the WebM Project. - -Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, -royalty-free, irrevocable (except as stated in this section) patent license to -make, have made, use, offer to sell, sell, import, transfer, and otherwise -run, modify and propagate the contents of these implementations of WebM, where -such license applies only to those patent claims, both currently owned by -Google and acquired in the future, licensable by Google that are necessarily -infringed by these implementations of WebM. This grant does not include claims -that would be infringed only as a consequence of further modification of these -implementations. If you or your agent or exclusive licensee institute or order -or agree to the institution of patent litigation or any other patent -enforcement activity against any entity (including a cross-claim or -counterclaim in a lawsuit) alleging that any of these implementations of WebM -or any code incorporated within any of these implementations of WebM -constitute direct or contributory patent infringement, or inducement of -patent infringement, then any patent rights granted to you under this License -for these implementations of WebM shall terminate as of the date such -litigation is filed. diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h b/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h deleted file mode 100644 index 5c9b7aa392..0000000000 --- a/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h +++ /dev/null @@ -1,240 +0,0 @@ -#ifndef VP8_RTCD_H_ -#define VP8_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP8 - */ - -struct blockd; -struct loop_filter_info; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c - -void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_clear_system_state_c(); -#define vp8_clear_system_state vp8_clear_system_state_c - -void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); -void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride); -RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride); - -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); - -void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs); - -void vp8_dequantize_b_c(struct blockd*, short *dqc); -void vp8_dequantize_b_neon(struct blockd*, short *dqc); -RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc); - -void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bvs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_mbvs_neon(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); - -void vp8_short_inv_walsh4x4_c(short *input, short *output); -void vp8_short_inv_walsh4x4_neon(short *input, short *output); -RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output); - -void vp8_short_inv_walsh4x4_1_c(short *input, short *output); -#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c - -void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c - -void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon; -#endif - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon; -#endif - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon; -#endif - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon; -#endif - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon; -#endif - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon; -#endif - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon; -#endif - vp8_dequant_idct_add = vp8_dequant_idct_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon; -#endif - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; -#endif - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon; -#endif - vp8_dequantize_b = vp8_dequantize_b_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon; -#endif - vp8_loop_filter_bh = vp8_loop_filter_bh_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon; -#endif - vp8_loop_filter_bv = vp8_loop_filter_bv_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon; -#endif - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon; -#endif - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon; -#endif - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon; -#endif - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon; -#endif - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon; -#endif - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon; -#endif - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon; -#endif - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon; -#endif - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon; -#endif - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon; -#endif - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon; -#endif -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_c.h b/thirdparty/libvpx/rtcd/vp8_rtcd_c.h deleted file mode 100644 index d1657ac09d..0000000000 --- a/thirdparty/libvpx/rtcd/vp8_rtcd_c.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef VP8_RTCD_H_ -#define VP8_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP8 - */ - -struct blockd; -struct loop_filter_info; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c - -void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c - -void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c - -void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c - -void vp8_clear_system_state_c(); -#define vp8_clear_system_state vp8_clear_system_state_c - -void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -#define vp8_copy_mem16x16 vp8_copy_mem16x16_c - -void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -#define vp8_copy_mem8x4 vp8_copy_mem8x4_c - -void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -#define vp8_copy_mem8x8 vp8_copy_mem8x8_c - -void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c - -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); -#define vp8_dequant_idct_add vp8_dequant_idct_add_c - -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c - -void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c - -void vp8_dequantize_b_c(struct blockd*, short *dqc); -#define vp8_dequantize_b vp8_dequantize_b_c - -void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_bh vp8_loop_filter_bh_c - -void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_bv vp8_loop_filter_bv_c - -void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c - -void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c - -void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c - -void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c - -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c - -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c - -void vp8_short_inv_walsh4x4_c(short *input, short *output); -#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c - -void vp8_short_inv_walsh4x4_1_c(short *input, short *output); -#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c - -void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c - -void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c - -void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c - -void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c - -void vp8_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h b/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h deleted file mode 100644 index cbe1f47b2a..0000000000 --- a/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h +++ /dev/null @@ -1,247 +0,0 @@ -#ifndef VP8_RTCD_H_ -#define VP8_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP8 - */ - -struct blockd; -struct loop_filter_info; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_clear_system_state_c(); -void vpx_reset_mmx_state(); -RTCD_EXTERN void (*vp8_clear_system_state)(); - -void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x4_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -void vp8_copy_mem8x8_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch); - -void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride); -void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride); -RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride); - -void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs); - -void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs); -RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs); - -void vp8_dequantize_b_c(struct blockd*, short *dqc); -void vp8_dequantize_b_mmx(struct blockd*, short *dqc); -RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc); - -void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); -RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi); - -void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bhs_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bvs_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_horizontal_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_vertical_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit); -void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit); -RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit); - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); -RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride); - -void vp8_short_inv_walsh4x4_c(short *input, short *output); -void vp8_short_inv_walsh4x4_mmx(short *input, short *output); -void vp8_short_inv_walsh4x4_sse2(short *input, short *output); -RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output); - -void vp8_short_inv_walsh4x4_1_c(short *input, short *output); -#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c - -void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); -RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -void vp8_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; - if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx; - if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; - if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; - if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx; - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; - if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx; - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; - if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx; - if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; - if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - vp8_clear_system_state = vp8_clear_system_state_c; - if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state; - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; - if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx; - if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2; - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; - if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx; - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; - if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx; - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; - if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx; - vp8_dequant_idct_add = vp8_dequant_idct_add_c; - if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx; - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; - if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; - if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; - if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; - if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - vp8_dequantize_b = vp8_dequantize_b_c; - if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx; - vp8_loop_filter_bh = vp8_loop_filter_bh_c; - if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; - vp8_loop_filter_bv = vp8_loop_filter_bv_c; - if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2; - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; - if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2; - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; - if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2; - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2; - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2; - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2; - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; - if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx; - if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2; - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; - if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx; - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; - if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx; - if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2; - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; - if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx; - if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; - if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; - if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; - if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; - if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx; - if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; - if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; - if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; - if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; - if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h b/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h deleted file mode 100644 index afdc7e179e..0000000000 --- a/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef VP9_RTCD_H_ -#define VP9_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP9 - */ - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon; -#endif - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon; -#endif -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_c.h b/thirdparty/libvpx/rtcd/vp9_rtcd_c.h deleted file mode 100644 index 329cb9d04c..0000000000 --- a/thirdparty/libvpx/rtcd/vp9_rtcd_c.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef VP9_RTCD_H_ -#define VP9_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP9 - */ - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c - -void vp9_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h b/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h deleted file mode 100644 index 8ce8067674..0000000000 --- a/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef VP9_RTCD_H_ -#define VP9_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * VP9 - */ - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); -RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type); - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); -RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type); - -void vp9_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - vp9_iht16x16_256_add = vp9_iht16x16_256_add_c; - if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2; - - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; - if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2; - - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; - if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h deleted file mode 100644 index df42f3d24a..0000000000 --- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h +++ /dev/null @@ -1,678 +0,0 @@ -#ifndef VPX_DSP_RTCD_H_ -#define VPX_DSP_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * DSP - */ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c - -void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c - -void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c - -void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c - -void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c - -void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c - -void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c - -void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c - -void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c - -void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c - -void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c - -void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c - -void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c - -void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c - -void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c - -void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c - -void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c - -void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c - -void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c - -void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c - -void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c - -void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c - -void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c - -void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c - -void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c - -void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c - -void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c - -void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_2d vpx_scaled_2d_c - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_horiz vpx_scaled_horiz_c - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_vert vpx_scaled_vert_c - -void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c - -void vpx_dsp_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - vpx_convolve8 = vpx_convolve8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8 = vpx_convolve8_neon; -#endif - vpx_convolve8_avg = vpx_convolve8_avg_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_avg = vpx_convolve8_avg_neon; -#endif - vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_neon; -#endif - vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_neon; -#endif - vpx_convolve8_horiz = vpx_convolve8_horiz_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_horiz = vpx_convolve8_horiz_neon; -#endif - vpx_convolve8_vert = vpx_convolve8_vert_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve8_vert = vpx_convolve8_vert_neon; -#endif - vpx_convolve_avg = vpx_convolve_avg_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve_avg = vpx_convolve_avg_neon; -#endif - vpx_convolve_copy = vpx_convolve_copy_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_convolve_copy = vpx_convolve_copy_neon; -#endif - vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_neon; -#endif - vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_neon; -#endif - vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_neon; -#endif - vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_neon; -#endif - vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_neon; -#endif - vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_neon; -#endif - vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_neon; -#endif - vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_neon; -#endif - vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_neon; -#endif - vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_neon; -#endif - vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_neon; -#endif - vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_neon; -#endif - vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_neon; -#endif - vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_neon; -#endif - vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_neon; -#endif - vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_neon; -#endif - vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_neon; -#endif - vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_neon; -#endif - vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_neon; -#endif - vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_neon; -#endif - vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_neon; -#endif - vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_neon; -#endif - vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_neon; -#endif - vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_neon; -#endif - vpx_idct16x16_10_add = vpx_idct16x16_10_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct16x16_10_add = vpx_idct16x16_10_add_neon; -#endif - vpx_idct16x16_1_add = vpx_idct16x16_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct16x16_1_add = vpx_idct16x16_1_add_neon; -#endif - vpx_idct16x16_256_add = vpx_idct16x16_256_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct16x16_256_add = vpx_idct16x16_256_add_neon; -#endif - vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_neon; -#endif - vpx_idct32x32_135_add = vpx_idct32x32_135_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_neon; -#endif - vpx_idct32x32_1_add = vpx_idct32x32_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_1_add = vpx_idct32x32_1_add_neon; -#endif - vpx_idct32x32_34_add = vpx_idct32x32_34_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct32x32_34_add = vpx_idct32x32_1024_add_neon; -#endif - vpx_idct4x4_16_add = vpx_idct4x4_16_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct4x4_16_add = vpx_idct4x4_16_add_neon; -#endif - vpx_idct4x4_1_add = vpx_idct4x4_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct4x4_1_add = vpx_idct4x4_1_add_neon; -#endif - vpx_idct8x8_12_add = vpx_idct8x8_12_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct8x8_12_add = vpx_idct8x8_12_add_neon; -#endif - vpx_idct8x8_1_add = vpx_idct8x8_1_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct8x8_1_add = vpx_idct8x8_1_add_neon; -#endif - vpx_idct8x8_64_add = vpx_idct8x8_64_add_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_idct8x8_64_add = vpx_idct8x8_64_add_neon; -#endif - vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_neon; -#endif - vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_neon; -#endif - vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_neon; -#endif - vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_neon; -#endif - vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_neon; -#endif - vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_neon; -#endif - vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_neon; -#endif - vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_neon; -#endif - vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_neon; -#endif - vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_neon; -#endif - vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_neon; -#endif - vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_neon; -#endif - vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon; -#endif - vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_neon; -#endif - vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_neon; -#endif - vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_neon; -#endif - vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_neon; -#endif - vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_neon; -#endif - vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_neon; -#endif - vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c; -#if HAVE_NEON - if (flags & HAS_NEON) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_neon; -#endif -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h deleted file mode 100644 index 9fcc2f0066..0000000000 --- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h +++ /dev/null @@ -1,355 +0,0 @@ -#ifndef VPX_DSP_RTCD_H_ -#define VPX_DSP_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * DSP - */ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8 vpx_convolve8_c - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_avg vpx_convolve8_avg_c - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_horiz vpx_convolve8_horiz_c - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve8_vert vpx_convolve8_vert_c - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve_avg vpx_convolve_avg_c - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_convolve_copy vpx_convolve_copy_c - -void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c - -void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c - -void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c - -void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c - -void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c - -void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c - -void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c - -void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c - -void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c - -void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c - -void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c - -void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c - -void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c - -void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c - -void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c - -void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c - -void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c - -void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c - -void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c - -void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c - -void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c - -void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c - -void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c - -void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c - -void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c - -void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c - -void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c - -void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c - -void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c - -void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c - -void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c - -void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c - -void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c - -void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c - -void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c - -void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c - -void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c - -void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c - -void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c - -void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c - -void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c - -void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c - -void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c - -void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c - -void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c - -void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c - -void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c - -void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c - -void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c - -void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c - -void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c - -void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_edge_16 vpx_lpf_horizontal_edge_16_c - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_horizontal_edge_8 vpx_lpf_horizontal_edge_8_c - -void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_2d vpx_scaled_2d_c - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_horiz vpx_scaled_horiz_c - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_vert vpx_scaled_vert_c - -void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c - -void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c - -void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c - -void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c - -void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c - -void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c - -void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c - -void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c - -void vpx_dsp_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h deleted file mode 100644 index c2a68330ac..0000000000 --- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h +++ /dev/null @@ -1,604 +0,0 @@ -#ifndef VPX_DSP_RTCD_H_ -#define VPX_DSP_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -/* - * DSP - */ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_dsp_common.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c - -void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c - -void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c - -void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c - -void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c - -void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c - -void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d207_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c - -void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c - -void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c - -void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c - -void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d45_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c - -void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c - -void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c - -void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c - -void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c - -void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c - -void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c - -void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -void vpx_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride); -RTCD_EXTERN void (*vpx_iwht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride); - -void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride); -#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c - -void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -void vpx_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); -RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); -RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -RTCD_EXTERN void (*vpx_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_horiz vpx_scaled_horiz_c - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h); -#define vpx_scaled_vert vpx_scaled_vert_c - -void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_tm_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -void vpx_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); -#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c - -void vpx_dsp_rtcd(void); - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - vpx_convolve8 = vpx_convolve8_c; - if (flags & HAS_SSE2) vpx_convolve8 = vpx_convolve8_sse2; - if (flags & HAS_SSSE3) vpx_convolve8 = vpx_convolve8_ssse3; - vpx_convolve8_avg = vpx_convolve8_avg_c; - if (flags & HAS_SSE2) vpx_convolve8_avg = vpx_convolve8_avg_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_avg = vpx_convolve8_avg_ssse3; - vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c; - if (flags & HAS_SSE2) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_ssse3; - vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c; - if (flags & HAS_SSE2) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_ssse3; - vpx_convolve8_horiz = vpx_convolve8_horiz_c; - if (flags & HAS_SSE2) vpx_convolve8_horiz = vpx_convolve8_horiz_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_horiz = vpx_convolve8_horiz_ssse3; - vpx_convolve8_vert = vpx_convolve8_vert_c; - if (flags & HAS_SSE2) vpx_convolve8_vert = vpx_convolve8_vert_sse2; - if (flags & HAS_SSSE3) vpx_convolve8_vert = vpx_convolve8_vert_ssse3; - vpx_convolve_avg = vpx_convolve_avg_c; - if (flags & HAS_SSE2) vpx_convolve_avg = vpx_convolve_avg_sse2; - vpx_convolve_copy = vpx_convolve_copy_c; - if (flags & HAS_SSE2) vpx_convolve_copy = vpx_convolve_copy_sse2; - vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_ssse3; - vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_ssse3; - vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_ssse3; - vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_c; - if (flags & HAS_SSSE3) vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_ssse3; - vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_ssse3; - vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_ssse3; - vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_sse2; - vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_c; - if (flags & HAS_SSSE3) vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_ssse3; - vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_ssse3; - vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_ssse3; - vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_sse2; - vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_sse2; - vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_ssse3; - vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_ssse3; - vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3; - vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c; - if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3; - vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_sse2; - vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_sse2; - vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_sse2; - vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_sse2; - vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_sse2; - vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_sse2; - vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_sse2; - vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_sse2; - vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_sse2; - vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_sse2; - vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_sse2; - vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_sse2; - vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_sse2; - vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_sse2; - vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_sse2; - vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2; - vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_sse2; - vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_sse2; - vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_sse2; - vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_sse2; - vpx_idct16x16_10_add = vpx_idct16x16_10_add_c; - if (flags & HAS_SSE2) vpx_idct16x16_10_add = vpx_idct16x16_10_add_sse2; - vpx_idct16x16_1_add = vpx_idct16x16_1_add_c; - if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2; - vpx_idct16x16_256_add = vpx_idct16x16_256_add_c; - if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2; - vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2; - vpx_idct32x32_135_add = vpx_idct32x32_135_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_sse2; - vpx_idct32x32_1_add = vpx_idct32x32_1_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2; - vpx_idct32x32_34_add = vpx_idct32x32_34_add_c; - if (flags & HAS_SSE2) vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2; - vpx_idct4x4_16_add = vpx_idct4x4_16_add_c; - if (flags & HAS_SSE2) vpx_idct4x4_16_add = vpx_idct4x4_16_add_sse2; - vpx_idct4x4_1_add = vpx_idct4x4_1_add_c; - if (flags & HAS_SSE2) vpx_idct4x4_1_add = vpx_idct4x4_1_add_sse2; - vpx_idct8x8_12_add = vpx_idct8x8_12_add_c; - if (flags & HAS_SSE2) vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2; - vpx_idct8x8_1_add = vpx_idct8x8_1_add_c; - if (flags & HAS_SSE2) vpx_idct8x8_1_add = vpx_idct8x8_1_add_sse2; - vpx_idct8x8_64_add = vpx_idct8x8_64_add_c; - if (flags & HAS_SSE2) vpx_idct8x8_64_add = vpx_idct8x8_64_add_sse2; - vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_c; - if (flags & HAS_SSE2) vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_sse2; - vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_sse2; - vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_sse2; - vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_sse2; - vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_sse2; - vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_sse2; - vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c; - if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_sse2; - vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_sse2; - vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_sse2; - vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_sse2; - vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_sse2; - vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_sse2; - vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c; - if (flags & HAS_SSE2) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_sse2; - vpx_scaled_2d = vpx_scaled_2d_c; - if (flags & HAS_SSSE3) vpx_scaled_2d = vpx_scaled_2d_ssse3; - vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_sse2; - vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_sse2; - vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_sse2; - vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_sse2; - vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c; - if (flags & HAS_SSE2) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_sse2; - vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c; - if (flags & HAS_SSE2) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_sse2; - vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c; - if (flags & HAS_SSE2) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_sse2; - vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c; - if (flags & HAS_SSE2) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_sse2; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/third_party/android/cpu-features.c b/thirdparty/libvpx/third_party/android/cpu-features.c deleted file mode 100644 index e2bd749b01..0000000000 --- a/thirdparty/libvpx/third_party/android/cpu-features.c +++ /dev/null @@ -1,1313 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* ChangeLog for this library: - * - * NDK r10e?: Add MIPS MSA feature. - * - * NDK r10: Support for 64-bit CPUs (Intel, ARM & MIPS). - * - * NDK r8d: Add android_setCpu(). - * - * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, - * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. - * - * Rewrite the code to parse /proc/self/auxv instead of - * the "Features" field in /proc/cpuinfo. - * - * Dynamically allocate the buffer that hold the content - * of /proc/cpuinfo to deal with newer hardware. - * - * NDK r7c: Fix CPU count computation. The old method only reported the - * number of _active_ CPUs when the library was initialized, - * which could be less than the real total. - * - * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 - * for an ARMv6 CPU (see below). - * - * Handle kernels that only report 'neon', and not 'vfpv3' - * (VFPv3 is mandated by the ARM architecture is Neon is implemented) - * - * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' - * - * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in - * android_getCpuFamily(). - * - * NDK r4: Initial release - */ - -#include "cpu-features.h" - -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/system_properties.h> -#include <unistd.h> - -static pthread_once_t g_once; -static int g_inited; -static AndroidCpuFamily g_cpuFamily; -static uint64_t g_cpuFeatures; -static int g_cpuCount; - -#ifdef __arm__ -static uint32_t g_cpuIdArm; -#endif - -static const int android_cpufeatures_debug = 0; - -#define D(...) \ - do { \ - if (android_cpufeatures_debug) { \ - printf(__VA_ARGS__); fflush(stdout); \ - } \ - } while (0) - -#ifdef __i386__ -static __inline__ void x86_cpuid(int func, int values[4]) -{ - int a, b, c, d; - /* We need to preserve ebx since we're compiling PIC code */ - /* this means we can't use "=b" for the second output register */ - __asm__ __volatile__ ( \ - "push %%ebx\n" - "cpuid\n" \ - "mov %%ebx, %1\n" - "pop %%ebx\n" - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "a" (func) \ - ); - values[0] = a; - values[1] = b; - values[2] = c; - values[3] = d; -} -#elif defined(__x86_64__) -static __inline__ void x86_cpuid(int func, int values[4]) -{ - int64_t a, b, c, d; - /* We need to preserve ebx since we're compiling PIC code */ - /* this means we can't use "=b" for the second output register */ - __asm__ __volatile__ ( \ - "push %%rbx\n" - "cpuid\n" \ - "mov %%rbx, %1\n" - "pop %%rbx\n" - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "a" (func) \ - ); - values[0] = a; - values[1] = b; - values[2] = c; - values[3] = d; -} -#endif - -/* Get the size of a file by reading it until the end. This is needed - * because files under /proc do not always return a valid size when - * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. - */ -static int -get_file_size(const char* pathname) -{ - - int fd, result = 0; - char buffer[256]; - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - D("Can't open %s: %s\n", pathname, strerror(errno)); - return -1; - } - - for (;;) { - int ret = read(fd, buffer, sizeof buffer); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading %s: %s\n", pathname, strerror(errno)); - break; - } - if (ret == 0) - break; - - result += ret; - } - close(fd); - return result; -} - -/* Read the content of /proc/cpuinfo into a user-provided buffer. - * Return the length of the data, or -1 on error. Does *not* - * zero-terminate the content. Will not read more - * than 'buffsize' bytes. - */ -static int -read_file(const char* pathname, char* buffer, size_t buffsize) -{ - int fd, count; - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - D("Could not open %s: %s\n", pathname, strerror(errno)); - return -1; - } - count = 0; - while (count < (int)buffsize) { - int ret = read(fd, buffer + count, buffsize - count); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading from %s: %s\n", pathname, strerror(errno)); - if (count == 0) - count = -1; - break; - } - if (ret == 0) - break; - count += ret; - } - close(fd); - return count; -} - -#ifdef __arm__ -/* Extract the content of a the first occurence of a given field in - * the content of /proc/cpuinfo and return it as a heap-allocated - * string that must be freed by the caller. - * - * Return NULL if not found - */ -static char* -extract_cpuinfo_field(const char* buffer, int buflen, const char* field) -{ - int fieldlen = strlen(field); - const char* bufend = buffer + buflen; - char* result = NULL; - int len; - const char *p, *q; - - /* Look for first field occurence, and ensures it starts the line. */ - p = buffer; - for (;;) { - p = memmem(p, bufend-p, field, fieldlen); - if (p == NULL) - goto EXIT; - - if (p == buffer || p[-1] == '\n') - break; - - p += fieldlen; - } - - /* Skip to the first column followed by a space */ - p += fieldlen; - p = memchr(p, ':', bufend-p); - if (p == NULL || p[1] != ' ') - goto EXIT; - - /* Find the end of the line */ - p += 2; - q = memchr(p, '\n', bufend-p); - if (q == NULL) - q = bufend; - - /* Copy the line into a heap-allocated buffer */ - len = q-p; - result = malloc(len+1); - if (result == NULL) - goto EXIT; - - memcpy(result, p, len); - result[len] = '\0'; - -EXIT: - return result; -} - -/* Checks that a space-separated list of items contains one given 'item'. - * Returns 1 if found, 0 otherwise. - */ -static int -has_list_item(const char* list, const char* item) -{ - const char* p = list; - int itemlen = strlen(item); - - if (list == NULL) - return 0; - - while (*p) { - const char* q; - - /* skip spaces */ - while (*p == ' ' || *p == '\t') - p++; - - /* find end of current list item */ - q = p; - while (*q && *q != ' ' && *q != '\t') - q++; - - if (itemlen == q-p && !memcmp(p, item, itemlen)) - return 1; - - /* skip to next item */ - p = q; - } - return 0; -} -#endif /* __arm__ */ - -/* Parse a number starting from 'input', but not going further - * than 'limit'. Return the value into '*result'. - * - * NOTE: Does not skip over leading spaces, or deal with sign characters. - * NOTE: Ignores overflows. - * - * The function returns NULL in case of error (bad format), or the new - * position after the decimal number in case of success (which will always - * be <= 'limit'). - */ -static const char* -parse_number(const char* input, const char* limit, int base, int* result) -{ - const char* p = input; - int val = 0; - while (p < limit) { - int d = (*p - '0'); - if ((unsigned)d >= 10U) { - d = (*p - 'a'); - if ((unsigned)d >= 6U) - d = (*p - 'A'); - if ((unsigned)d >= 6U) - break; - d += 10; - } - if (d >= base) - break; - val = val*base + d; - p++; - } - if (p == input) - return NULL; - - *result = val; - return p; -} - -static const char* -parse_decimal(const char* input, const char* limit, int* result) -{ - return parse_number(input, limit, 10, result); -} - -#ifdef __arm__ -static const char* -parse_hexadecimal(const char* input, const char* limit, int* result) -{ - return parse_number(input, limit, 16, result); -} -#endif /* __arm__ */ - -/* This small data type is used to represent a CPU list / mask, as read - * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt - * - * For now, we don't expect more than 32 cores on mobile devices, so keep - * everything simple. - */ -typedef struct { - uint32_t mask; -} CpuList; - -static __inline__ void -cpulist_init(CpuList* list) { - list->mask = 0; -} - -static __inline__ void -cpulist_and(CpuList* list1, CpuList* list2) { - list1->mask &= list2->mask; -} - -static __inline__ void -cpulist_set(CpuList* list, int index) { - if ((unsigned)index < 32) { - list->mask |= (uint32_t)(1U << index); - } -} - -static __inline__ int -cpulist_count(CpuList* list) { - return __builtin_popcount(list->mask); -} - -/* Parse a textual list of cpus and store the result inside a CpuList object. - * Input format is the following: - * - comma-separated list of items (no spaces) - * - each item is either a single decimal number (cpu index), or a range made - * of two numbers separated by a single dash (-). Ranges are inclusive. - * - * Examples: 0 - * 2,4-127,128-143 - * 0-1 - */ -static void -cpulist_parse(CpuList* list, const char* line, int line_len) -{ - const char* p = line; - const char* end = p + line_len; - const char* q; - - /* NOTE: the input line coming from sysfs typically contains a - * trailing newline, so take care of it in the code below - */ - while (p < end && *p != '\n') - { - int val, start_value, end_value; - - /* Find the end of current item, and put it into 'q' */ - q = memchr(p, ',', end-p); - if (q == NULL) { - q = end; - } - - /* Get first value */ - p = parse_decimal(p, q, &start_value); - if (p == NULL) - goto BAD_FORMAT; - - end_value = start_value; - - /* If we're not at the end of the item, expect a dash and - * and integer; extract end value. - */ - if (p < q && *p == '-') { - p = parse_decimal(p+1, q, &end_value); - if (p == NULL) - goto BAD_FORMAT; - } - - /* Set bits CPU list bits */ - for (val = start_value; val <= end_value; val++) { - cpulist_set(list, val); - } - - /* Jump to next item */ - p = q; - if (p < end) - p++; - } - -BAD_FORMAT: - ; -} - -/* Read a CPU list from one sysfs file */ -static void -cpulist_read_from(CpuList* list, const char* filename) -{ - char file[64]; - int filelen; - - cpulist_init(list); - - filelen = read_file(filename, file, sizeof file); - if (filelen < 0) { - D("Could not read %s: %s\n", filename, strerror(errno)); - return; - } - - cpulist_parse(list, file, filelen); -} -#if defined(__aarch64__) -// see <uapi/asm/hwcap.h> kernel header -#define HWCAP_FP (1 << 0) -#define HWCAP_ASIMD (1 << 1) -#define HWCAP_AES (1 << 3) -#define HWCAP_PMULL (1 << 4) -#define HWCAP_SHA1 (1 << 5) -#define HWCAP_SHA2 (1 << 6) -#define HWCAP_CRC32 (1 << 7) -#endif - -#if defined(__arm__) - -// See <asm/hwcap.h> kernel header. -#define HWCAP_VFP (1 << 6) -#define HWCAP_IWMMXT (1 << 9) -#define HWCAP_NEON (1 << 12) -#define HWCAP_VFPv3 (1 << 13) -#define HWCAP_VFPv3D16 (1 << 14) -#define HWCAP_VFPv4 (1 << 16) -#define HWCAP_IDIVA (1 << 17) -#define HWCAP_IDIVT (1 << 18) - -// see <uapi/asm/hwcap.h> kernel header -#define HWCAP2_AES (1 << 0) -#define HWCAP2_PMULL (1 << 1) -#define HWCAP2_SHA1 (1 << 2) -#define HWCAP2_SHA2 (1 << 3) -#define HWCAP2_CRC32 (1 << 4) - -// This is the list of 32-bit ARMv7 optional features that are _always_ -// supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference -// Manual. -#define HWCAP_SET_FOR_ARMV8 \ - ( HWCAP_VFP | \ - HWCAP_NEON | \ - HWCAP_VFPv3 | \ - HWCAP_VFPv4 | \ - HWCAP_IDIVA | \ - HWCAP_IDIVT ) -#endif - -#if defined(__mips__) -// see <uapi/asm/hwcap.h> kernel header -#define HWCAP_MIPS_R6 (1 << 0) -#define HWCAP_MIPS_MSA (1 << 1) -#endif - -#if defined(__arm__) || defined(__aarch64__) || defined(__mips__) - -#define AT_HWCAP 16 -#define AT_HWCAP2 26 - -// Probe the system's C library for a 'getauxval' function and call it if -// it exits, or return 0 for failure. This function is available since API -// level 20. -// -// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the -// edge case where some NDK developers use headers for a platform that is -// newer than the one really targetted by their application. -// This is typically done to use newer native APIs only when running on more -// recent Android versions, and requires careful symbol management. -// -// Note that getauxval() can't really be re-implemented here, because -// its implementation does not parse /proc/self/auxv. Instead it depends -// on values that are passed by the kernel at process-init time to the -// C runtime initialization layer. -static uint32_t -get_elf_hwcap_from_getauxval(int hwcap_type) { - typedef unsigned long getauxval_func_t(unsigned long); - - dlerror(); - void* libc_handle = dlopen("libc.so", RTLD_NOW); - if (!libc_handle) { - D("Could not dlopen() C library: %s\n", dlerror()); - return 0; - } - - uint32_t ret = 0; - getauxval_func_t* func = (getauxval_func_t*) - dlsym(libc_handle, "getauxval"); - if (!func) { - D("Could not find getauxval() in C library\n"); - } else { - // Note: getauxval() returns 0 on failure. Doesn't touch errno. - ret = (uint32_t)(*func)(hwcap_type); - } - dlclose(libc_handle); - return ret; -} -#endif - -#if defined(__arm__) -// Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the -// current CPU. Note that this file is not accessible from regular -// application processes on some Android platform releases. -// On success, return new ELF hwcaps, or 0 on failure. -static uint32_t -get_elf_hwcap_from_proc_self_auxv(void) { - const char filepath[] = "/proc/self/auxv"; - int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY)); - if (fd < 0) { - D("Could not open %s: %s\n", filepath, strerror(errno)); - return 0; - } - - struct { uint32_t tag; uint32_t value; } entry; - - uint32_t result = 0; - for (;;) { - int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry)); - if (ret < 0) { - D("Error while reading %s: %s\n", filepath, strerror(errno)); - break; - } - // Detect end of list. - if (ret == 0 || (entry.tag == 0 && entry.value == 0)) - break; - if (entry.tag == AT_HWCAP) { - result = entry.value; - break; - } - } - close(fd); - return result; -} - -/* Compute the ELF HWCAP flags from the content of /proc/cpuinfo. - * This works by parsing the 'Features' line, which lists which optional - * features the device's CPU supports, on top of its reference - * architecture. - */ -static uint32_t -get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len) { - uint32_t hwcaps = 0; - long architecture = 0; - char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); - if (cpuArch) { - architecture = strtol(cpuArch, NULL, 10); - free(cpuArch); - - if (architecture >= 8L) { - // This is a 32-bit ARM binary running on a 64-bit ARM64 kernel. - // The 'Features' line only lists the optional features that the - // device's CPU supports, compared to its reference architecture - // which are of no use for this process. - D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture); - return HWCAP_SET_FOR_ARMV8; - } - } - - char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); - if (cpuFeatures != NULL) { - D("Found cpuFeatures = '%s'\n", cpuFeatures); - - if (has_list_item(cpuFeatures, "vfp")) - hwcaps |= HWCAP_VFP; - if (has_list_item(cpuFeatures, "vfpv3")) - hwcaps |= HWCAP_VFPv3; - if (has_list_item(cpuFeatures, "vfpv3d16")) - hwcaps |= HWCAP_VFPv3D16; - if (has_list_item(cpuFeatures, "vfpv4")) - hwcaps |= HWCAP_VFPv4; - if (has_list_item(cpuFeatures, "neon")) - hwcaps |= HWCAP_NEON; - if (has_list_item(cpuFeatures, "idiva")) - hwcaps |= HWCAP_IDIVA; - if (has_list_item(cpuFeatures, "idivt")) - hwcaps |= HWCAP_IDIVT; - if (has_list_item(cpuFeatures, "idiv")) - hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; - if (has_list_item(cpuFeatures, "iwmmxt")) - hwcaps |= HWCAP_IWMMXT; - - free(cpuFeatures); - } - return hwcaps; -} -#endif /* __arm__ */ - -/* Return the number of cpus present on a given device. - * - * To handle all weird kernel configurations, we need to compute the - * intersection of the 'present' and 'possible' CPU lists and count - * the result. - */ -static int -get_cpu_count(void) -{ - CpuList cpus_present[1]; - CpuList cpus_possible[1]; - - cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); - cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); - - /* Compute the intersection of both sets to get the actual number of - * CPU cores that can be used on this device by the kernel. - */ - cpulist_and(cpus_present, cpus_possible); - - return cpulist_count(cpus_present); -} - -static void -android_cpuInitFamily(void) -{ -#if defined(__arm__) - g_cpuFamily = ANDROID_CPU_FAMILY_ARM; -#elif defined(__i386__) - g_cpuFamily = ANDROID_CPU_FAMILY_X86; -#elif defined(__mips64) -/* Needs to be before __mips__ since the compiler defines both */ - g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64; -#elif defined(__mips__) - g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; -#elif defined(__aarch64__) - g_cpuFamily = ANDROID_CPU_FAMILY_ARM64; -#elif defined(__x86_64__) - g_cpuFamily = ANDROID_CPU_FAMILY_X86_64; -#else - g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; -#endif -} - -static void -android_cpuInit(void) -{ - char* cpuinfo = NULL; - int cpuinfo_len; - - android_cpuInitFamily(); - - g_cpuFeatures = 0; - g_cpuCount = 1; - g_inited = 1; - - cpuinfo_len = get_file_size("/proc/cpuinfo"); - if (cpuinfo_len < 0) { - D("cpuinfo_len cannot be computed!"); - return; - } - cpuinfo = malloc(cpuinfo_len); - if (cpuinfo == NULL) { - D("cpuinfo buffer could not be allocated"); - return; - } - cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); - D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, - cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); - - if (cpuinfo_len < 0) /* should not happen */ { - free(cpuinfo); - return; - } - - /* Count the CPU cores, the value may be 0 for single-core CPUs */ - g_cpuCount = get_cpu_count(); - if (g_cpuCount == 0) { - g_cpuCount = 1; - } - - D("found cpuCount = %d\n", g_cpuCount); - -#ifdef __arm__ - { - /* Extract architecture from the "CPU Architecture" field. - * The list is well-known, unlike the the output of - * the 'Processor' field which can vary greatly. - * - * See the definition of the 'proc_arch' array in - * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in - * same file. - */ - char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); - - if (cpuArch != NULL) { - char* end; - long archNumber; - int hasARMv7 = 0; - - D("found cpuArch = '%s'\n", cpuArch); - - /* read the initial decimal number, ignore the rest */ - archNumber = strtol(cpuArch, &end, 10); - - /* Note that ARMv8 is upwards compatible with ARMv7. */ - if (end > cpuArch && archNumber >= 7) { - hasARMv7 = 1; - } - - /* Unfortunately, it seems that certain ARMv6-based CPUs - * report an incorrect architecture number of 7! - * - * See http://code.google.com/p/android/issues/detail?id=10812 - * - * We try to correct this by looking at the 'elf_format' - * field reported by the 'Processor' field, which is of the - * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for - * an ARMv6-one. - */ - if (hasARMv7) { - char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, - "Processor"); - if (cpuProc != NULL) { - D("found cpuProc = '%s'\n", cpuProc); - if (has_list_item(cpuProc, "(v6l)")) { - D("CPU processor and architecture mismatch!!\n"); - hasARMv7 = 0; - } - free(cpuProc); - } - } - - if (hasARMv7) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; - } - - /* The LDREX / STREX instructions are available from ARMv6 */ - if (archNumber >= 6) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; - } - - free(cpuArch); - } - - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = 0; - hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP); - if (!hwcaps) { - D("Parsing /proc/self/auxv to extract ELF hwcaps!\n"); - hwcaps = get_elf_hwcap_from_proc_self_auxv(); - } - if (!hwcaps) { - // Parsing /proc/self/auxv will fail from regular application - // processes on some Android platform versions, when this happens - // parse proc/cpuinfo instead. - D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n"); - hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len); - } - - if (hwcaps != 0) { - int has_vfp = (hwcaps & HWCAP_VFP); - int has_vfpv3 = (hwcaps & HWCAP_VFPv3); - int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); - int has_vfpv4 = (hwcaps & HWCAP_VFPv4); - int has_neon = (hwcaps & HWCAP_NEON); - int has_idiva = (hwcaps & HWCAP_IDIVA); - int has_idivt = (hwcaps & HWCAP_IDIVT); - int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); - - // The kernel does a poor job at ensuring consistency when - // describing CPU features. So lots of guessing is needed. - - // 'vfpv4' implies VFPv3|VFP_FMA|FP16 - if (has_vfpv4) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | - ANDROID_CPU_ARM_FEATURE_VFP_FP16 | - ANDROID_CPU_ARM_FEATURE_VFP_FMA; - - // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, - // a value of 'vfpv3' doesn't necessarily mean that the D32 - // feature is present, so be conservative. All CPUs in the - // field that support D32 also support NEON, so this should - // not be a problem in practice. - if (has_vfpv3 || has_vfpv3d16) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; - - // 'vfp' is super ambiguous. Depending on the kernel, it can - // either mean VFPv2 or VFPv3. Make it depend on ARMv7. - if (has_vfp) { - if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; - else - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; - } - - // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA - if (has_neon) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | - ANDROID_CPU_ARM_FEATURE_NEON | - ANDROID_CPU_ARM_FEATURE_VFP_D32; - if (has_vfpv4) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; - } - - // VFPv3 implies VFPv2 and ARMv7 - if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | - ANDROID_CPU_ARM_FEATURE_ARMv7; - - if (has_idiva) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; - if (has_idivt) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; - - if (has_iwmmxt) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; - } - - /* Extract the list of CPU features from ELF hwcaps2 */ - uint32_t hwcaps2 = 0; - hwcaps2 = get_elf_hwcap_from_getauxval(AT_HWCAP2); - if (hwcaps2 != 0) { - int has_aes = (hwcaps2 & HWCAP2_AES); - int has_pmull = (hwcaps2 & HWCAP2_PMULL); - int has_sha1 = (hwcaps2 & HWCAP2_SHA1); - int has_sha2 = (hwcaps2 & HWCAP2_SHA2); - int has_crc32 = (hwcaps2 & HWCAP2_CRC32); - - if (has_aes) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES; - if (has_pmull) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL; - if (has_sha1) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1; - if (has_sha2) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2; - if (has_crc32) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32; - } - /* Extract the cpuid value from various fields */ - // The CPUID value is broken up in several entries in /proc/cpuinfo. - // This table is used to rebuild it from the entries. - static const struct CpuIdEntry { - const char* field; - char format; - char bit_lshift; - char bit_length; - } cpu_id_entries[] = { - { "CPU implementer", 'x', 24, 8 }, - { "CPU variant", 'x', 20, 4 }, - { "CPU part", 'x', 4, 12 }, - { "CPU revision", 'd', 0, 4 }, - }; - size_t i; - D("Parsing /proc/cpuinfo to recover CPUID\n"); - for (i = 0; - i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); - ++i) { - const struct CpuIdEntry* entry = &cpu_id_entries[i]; - char* value = extract_cpuinfo_field(cpuinfo, - cpuinfo_len, - entry->field); - if (value == NULL) - continue; - - D("field=%s value='%s'\n", entry->field, value); - char* value_end = value + strlen(value); - int val = 0; - const char* start = value; - const char* p; - if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { - start += 2; - p = parse_hexadecimal(start, value_end, &val); - } else if (entry->format == 'x') - p = parse_hexadecimal(value, value_end, &val); - else - p = parse_decimal(value, value_end, &val); - - if (p > (const char*)start) { - val &= ((1 << entry->bit_length)-1); - val <<= entry->bit_lshift; - g_cpuIdArm |= (uint32_t) val; - } - - free(value); - } - - // Handle kernel configuration bugs that prevent the correct - // reporting of CPU features. - static const struct CpuFix { - uint32_t cpuid; - uint64_t or_flags; - } cpu_fixes[] = { - /* The Nexus 4 (Qualcomm Krait) kernel configuration - * forgets to report IDIV support. */ - { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, - { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, - }; - size_t n; - for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) { - const struct CpuFix* entry = &cpu_fixes[n]; - - if (g_cpuIdArm == entry->cpuid) - g_cpuFeatures |= entry->or_flags; - } - - // Special case: The emulator-specific Android 4.2 kernel fails - // to report support for the 32-bit ARM IDIV instruction. - // Technically, this is a feature of the virtual CPU implemented - // by the emulator. Note that it could also support Thumb IDIV - // in the future, and this will have to be slightly updated. - char* hardware = extract_cpuinfo_field(cpuinfo, - cpuinfo_len, - "Hardware"); - if (hardware) { - if (!strcmp(hardware, "Goldfish") && - g_cpuIdArm == 0x4100c080 && - (g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; - } - free(hardware); - } - } -#endif /* __arm__ */ -#ifdef __aarch64__ - { - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = 0; - hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP); - if (hwcaps != 0) { - int has_fp = (hwcaps & HWCAP_FP); - int has_asimd = (hwcaps & HWCAP_ASIMD); - int has_aes = (hwcaps & HWCAP_AES); - int has_pmull = (hwcaps & HWCAP_PMULL); - int has_sha1 = (hwcaps & HWCAP_SHA1); - int has_sha2 = (hwcaps & HWCAP_SHA2); - int has_crc32 = (hwcaps & HWCAP_CRC32); - - if(has_fp == 0) { - D("ERROR: Floating-point unit missing, but is required by Android on AArch64 CPUs\n"); - } - if(has_asimd == 0) { - D("ERROR: ASIMD unit missing, but is required by Android on AArch64 CPUs\n"); - } - - if (has_fp) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP; - if (has_asimd) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD; - if (has_aes) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES; - if (has_pmull) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL; - if (has_sha1) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1; - if (has_sha2) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2; - if (has_crc32) - g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32; - } - } -#endif /* __aarch64__ */ - -#if defined(__i386__) || defined(__x86_64__) - int regs[4]; - -/* According to http://en.wikipedia.org/wiki/CPUID */ -#define VENDOR_INTEL_b 0x756e6547 -#define VENDOR_INTEL_c 0x6c65746e -#define VENDOR_INTEL_d 0x49656e69 - - x86_cpuid(0, regs); - int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && - regs[2] == VENDOR_INTEL_c && - regs[3] == VENDOR_INTEL_d); - - x86_cpuid(1, regs); - if ((regs[2] & (1 << 9)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; - } - if ((regs[2] & (1 << 23)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; - } - if ((regs[2] & (1 << 19)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1; - } - if ((regs[2] & (1 << 20)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2; - } - if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; - } - if ((regs[2] & (1 << 25)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI; - } - if ((regs[2] & (1 << 28)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX; - } - if ((regs[2] & (1 << 30)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND; - } - - x86_cpuid(7, regs); - if ((regs[1] & (1 << 5)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2; - } - if ((regs[1] & (1 << 29)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI; - } - - -#endif -#if defined( __mips__) - { /* MIPS and MIPS64 */ - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = 0; - hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP); - if (hwcaps != 0) { - int has_r6 = (hwcaps & HWCAP_MIPS_R6); - int has_msa = (hwcaps & HWCAP_MIPS_MSA); - if (has_r6) - g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6; - if (has_msa) - g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA; - } - } -#endif /* __mips__ */ - - free(cpuinfo); -} - - -AndroidCpuFamily -android_getCpuFamily(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuFamily; -} - - -uint64_t -android_getCpuFeatures(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuFeatures; -} - - -int -android_getCpuCount(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuCount; -} - -static void -android_cpuInitDummy(void) -{ - g_inited = 1; -} - -int -android_setCpu(int cpu_count, uint64_t cpu_features) -{ - /* Fail if the library was already initialized. */ - if (g_inited) - return 0; - - android_cpuInitFamily(); - g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); - g_cpuFeatures = cpu_features; - pthread_once(&g_once, android_cpuInitDummy); - - return 1; -} - -#ifdef __arm__ -uint32_t -android_getCpuIdArm(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuIdArm; -} - -int -android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) -{ - if (!android_setCpu(cpu_count, cpu_features)) - return 0; - - g_cpuIdArm = cpu_id; - return 1; -} -#endif /* __arm__ */ - -/* - * Technical note: Making sense of ARM's FPU architecture versions. - * - * FPA was ARM's first attempt at an FPU architecture. There is no Android - * device that actually uses it since this technology was already obsolete - * when the project started. If you see references to FPA instructions - * somewhere, you can be sure that this doesn't apply to Android at all. - * - * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of - * new versions / additions to it. ARM considers this obsolete right now, - * and no known Android device implements it either. - * - * VFPv2 added a few instructions to VFPv1, and is an *optional* extension - * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device - * supporting the 'armeabi' ABI doesn't necessarily support these. - * - * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used - * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated - * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means - * that it provides 16 double-precision FPU registers (d0-d15) and 32 - * single-precision ones (s0-s31) which happen to be mapped to the same - * register banks. - * - * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 - * additional double precision registers (d16-d31). Note that there are - * still only 32 single precision registers. - * - * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision - * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which - * are not supported by Android. Note that it is not compatible with VFPv2. - * - * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 - * depending on context. For example GCC uses it for VFPv3-D32, but - * the Linux kernel code uses it for VFPv3-D16 (especially in - * /proc/cpuinfo). Always try to use the full designation when - * possible. - * - * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides - * instructions to perform parallel computations on vectors of 8, 16, - * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all - * NEON registers are also mapped to the same register banks. - * - * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to - * perform fused multiply-accumulate on VFP registers, as well as - * half-precision (16-bit) conversion operations. - * - * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision - * registers. - * - * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused - * multiply-accumulate instructions that work on the NEON registers. - * - * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 - * depending on context. - * - * The following information was determined by scanning the binutils-2.22 - * sources: - * - * Basic VFP instruction subsets: - * - * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. - * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. - * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. - * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. - * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. - * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. - * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. - * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. - * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add - * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add - * - * FPU types (excluding NEON) - * - * FPU_VFP_V1xD (EXT_V1xD) - * | - * +--------------------------+ - * | | - * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) - * | | - * | | - * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) - * | - * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) - * | - * +--------------------------+ - * | | - * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) - * | | - * | FPU_VFP_V4 (+EXT_D32) - * | - * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) - * - * VFP architectures: - * - * ARCH_VFP_V1xD (EXT_V1xD) - * | - * +------------------+ - * | | - * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) - * | | - * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) - * | | - * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) - * | - * ARCH_VFP_V1 (+EXT_V1) - * | - * ARCH_VFP_V2 (+EXT_V2) - * | - * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) - * | | - * | ARCH_VFP_V4 (+EXT_D32) - * | | - * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) - * | - * ARCH_VFP_V3 (+EXT_D32) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3_FP16 (+EXT_FP16) - * | - * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) - * | - * ARCH_NEON_FP16 (+EXT_FP16) - * - * -fpu=<name> values and their correspondance with FPU architectures above: - * - * {"vfp", FPU_ARCH_VFP_V2}, - * {"vfp9", FPU_ARCH_VFP_V2}, - * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. - * {"vfp10", FPU_ARCH_VFP_V2}, - * {"vfp10-r0", FPU_ARCH_VFP_V1}, - * {"vfpxd", FPU_ARCH_VFP_V1xD}, - * {"vfpv2", FPU_ARCH_VFP_V2}, - * {"vfpv3", FPU_ARCH_VFP_V3}, - * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, - * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, - * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, - * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, - * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, - * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, - * {"neon-fp16", FPU_ARCH_NEON_FP16}, - * {"vfpv4", FPU_ARCH_VFP_V4}, - * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, - * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, - * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, - * - * - * Simplified diagram that only includes FPUs supported by Android: - * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, - * all others are optional and must be probed at runtime. - * - * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) - * | | - * | ARCH_VFP_V4 (+EXT_D32) - * | | - * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) - * | - * ARCH_VFP_V3 (+EXT_D32) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3_FP16 (+EXT_FP16) - * | - * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) - * | - * ARCH_NEON_FP16 (+EXT_FP16) - * - */ diff --git a/thirdparty/libvpx/third_party/android/cpu-features.h b/thirdparty/libvpx/third_party/android/cpu-features.h deleted file mode 100644 index 1e9724197a..0000000000 --- a/thirdparty/libvpx/third_party/android/cpu-features.h +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#ifndef CPU_FEATURES_H -#define CPU_FEATURES_H - -#include <sys/cdefs.h> -#include <stdint.h> - -__BEGIN_DECLS - -/* A list of valid values returned by android_getCpuFamily(). - * They describe the CPU Architecture of the current process. - */ -typedef enum { - ANDROID_CPU_FAMILY_UNKNOWN = 0, - ANDROID_CPU_FAMILY_ARM, - ANDROID_CPU_FAMILY_X86, - ANDROID_CPU_FAMILY_MIPS, - ANDROID_CPU_FAMILY_ARM64, - ANDROID_CPU_FAMILY_X86_64, - ANDROID_CPU_FAMILY_MIPS64, - - ANDROID_CPU_FAMILY_MAX /* do not remove */ - -} AndroidCpuFamily; - -/* Return the CPU family of the current process. - * - * Note that this matches the bitness of the current process. I.e. when - * running a 32-bit binary on a 64-bit capable CPU, this will return the - * 32-bit CPU family value. - */ -extern AndroidCpuFamily android_getCpuFamily(void); - -/* Return a bitmap describing a set of optional CPU features that are - * supported by the current device's CPU. The exact bit-flags returned - * depend on the value returned by android_getCpuFamily(). See the - * documentation for the ANDROID_CPU_*_FEATURE_* flags below for details. - */ -extern uint64_t android_getCpuFeatures(void); - -/* The list of feature flags for ANDROID_CPU_FAMILY_ARM that can be - * recognized by the library (see note below for 64-bit ARM). Value details - * are: - * - * VFPv2: - * CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs - * support these instructions. VFPv2 is a subset of VFPv3 so this will - * be set whenever VFPv3 is set too. - * - * ARMv7: - * CPU supports the ARMv7-A basic instruction set. - * This feature is mandated by the 'armeabi-v7a' ABI. - * - * VFPv3: - * CPU supports the VFPv3-D16 instruction set, providing hardware FPU - * support for single and double precision floating point registers. - * Note that only 16 FPU registers are available by default, unless - * the D32 bit is set too. This feature is also mandated by the - * 'armeabi-v7a' ABI. - * - * VFP_D32: - * CPU VFP optional extension that provides 32 FPU registers, - * instead of 16. Note that ARM mandates this feature is the 'NEON' - * feature is implemented by the CPU. - * - * NEON: - * CPU FPU supports "ARM Advanced SIMD" instructions, also known as - * NEON. Note that this mandates the VFP_D32 feature as well, per the - * ARM Architecture specification. - * - * VFP_FP16: - * Half-width floating precision VFP extension. If set, the CPU - * supports instructions to perform floating-point operations on - * 16-bit registers. This is part of the VFPv4 specification, but - * not mandated by any Android ABI. - * - * VFP_FMA: - * Fused multiply-accumulate VFP instructions extension. Also part of - * the VFPv4 specification, but not mandated by any Android ABI. - * - * NEON_FMA: - * Fused multiply-accumulate NEON instructions extension. Optional - * extension from the VFPv4 specification, but not mandated by any - * Android ABI. - * - * IDIV_ARM: - * Integer division available in ARM mode. Only available - * on recent CPUs (e.g. Cortex-A15). - * - * IDIV_THUMB2: - * Integer division available in Thumb-2 mode. Only available - * on recent CPUs (e.g. Cortex-A15). - * - * iWMMXt: - * Optional extension that adds MMX registers and operations to an - * ARM CPU. This is only available on a few XScale-based CPU designs - * sold by Marvell. Pretty rare in practice. - * - * AES: - * CPU supports AES instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * CRC32: - * CPU supports CRC32 instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * SHA2: - * CPU supports SHA2 instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * SHA1: - * CPU supports SHA1 instructions. These instructions are only - * available for 32-bit applications running on ARMv8 CPU. - * - * PMULL: - * CPU supports 64-bit PMULL and PMULL2 instructions. These - * instructions are only available for 32-bit applications - * running on ARMv8 CPU. - * - * If you want to tell the compiler to generate code that targets one of - * the feature set above, you should probably use one of the following - * flags (for more details, see technical note at the end of this file): - * - * -mfpu=vfp - * -mfpu=vfpv2 - * These are equivalent and tell GCC to use VFPv2 instructions for - * floating-point operations. Use this if you want your code to - * run on *some* ARMv6 devices, and any ARMv7-A device supported - * by Android. - * - * Generated code requires VFPv2 feature. - * - * -mfpu=vfpv3-d16 - * Tell GCC to use VFPv3 instructions (using only 16 FPU registers). - * This should be generic code that runs on any CPU that supports the - * 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this. - * - * Generated code requires VFPv3 feature. - * - * -mfpu=vfpv3 - * Tell GCC to use VFPv3 instructions with 32 FPU registers. - * Generated code requires VFPv3|VFP_D32 features. - * - * -mfpu=neon - * Tell GCC to use VFPv3 instructions with 32 FPU registers, and - * also support NEON intrinsics (see <arm_neon.h>). - * Generated code requires VFPv3|VFP_D32|NEON features. - * - * -mfpu=vfpv4-d16 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA features. - * - * -mfpu=vfpv4 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features. - * - * -mfpu=neon-vfpv4 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA - * features. - * - * -mcpu=cortex-a7 - * -mcpu=cortex-a15 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32| - * NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2 - * This flag implies -mfpu=neon-vfpv4. - * - * -mcpu=iwmmxt - * Allows the use of iWMMXt instrinsics with GCC. - * - * IMPORTANT NOTE: These flags should only be tested when - * android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM, i.e. this is a - * 32-bit process. - * - * When running a 64-bit ARM process on an ARMv8 CPU, - * android_getCpuFeatures() will return a different set of bitflags - */ -enum { - ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0), - ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1), - ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2), - ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3), - ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4), - ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5), - ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6), - ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7), - ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8), - ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9), - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10), - ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11), - ANDROID_CPU_ARM_FEATURE_AES = (1 << 12), - ANDROID_CPU_ARM_FEATURE_PMULL = (1 << 13), - ANDROID_CPU_ARM_FEATURE_SHA1 = (1 << 14), - ANDROID_CPU_ARM_FEATURE_SHA2 = (1 << 15), - ANDROID_CPU_ARM_FEATURE_CRC32 = (1 << 16), -}; - -/* The bit flags corresponding to the output of android_getCpuFeatures() - * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_ARM64. Value details - * are: - * - * FP: - * CPU has Floating-point unit. - * - * ASIMD: - * CPU has Advanced SIMD unit. - * - * AES: - * CPU supports AES instructions. - * - * CRC32: - * CPU supports CRC32 instructions. - * - * SHA2: - * CPU supports SHA2 instructions. - * - * SHA1: - * CPU supports SHA1 instructions. - * - * PMULL: - * CPU supports 64-bit PMULL and PMULL2 instructions. - */ -enum { - ANDROID_CPU_ARM64_FEATURE_FP = (1 << 0), - ANDROID_CPU_ARM64_FEATURE_ASIMD = (1 << 1), - ANDROID_CPU_ARM64_FEATURE_AES = (1 << 2), - ANDROID_CPU_ARM64_FEATURE_PMULL = (1 << 3), - ANDROID_CPU_ARM64_FEATURE_SHA1 = (1 << 4), - ANDROID_CPU_ARM64_FEATURE_SHA2 = (1 << 5), - ANDROID_CPU_ARM64_FEATURE_CRC32 = (1 << 6), -}; - -/* The bit flags corresponding to the output of android_getCpuFeatures() - * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_X86 or - * ANDROID_CPU_FAMILY_X86_64. - */ -enum { - ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0), - ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1), - ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2), - ANDROID_CPU_X86_FEATURE_SSE4_1 = (1 << 3), - ANDROID_CPU_X86_FEATURE_SSE4_2 = (1 << 4), - ANDROID_CPU_X86_FEATURE_AES_NI = (1 << 5), - ANDROID_CPU_X86_FEATURE_AVX = (1 << 6), - ANDROID_CPU_X86_FEATURE_RDRAND = (1 << 7), - ANDROID_CPU_X86_FEATURE_AVX2 = (1 << 8), - ANDROID_CPU_X86_FEATURE_SHA_NI = (1 << 9), -}; - -/* The bit flags corresponding to the output of android_getCpuFeatures() - * when android_getCpuFamily() returns ANDROID_CPU_FAMILY_MIPS - * or ANDROID_CPU_FAMILY_MIPS64. Values are: - * - * R6: - * CPU executes MIPS Release 6 instructions natively, and - * supports obsoleted R1..R5 instructions only via kernel traps. - * - * MSA: - * CPU supports Mips SIMD Architecture instructions. - */ -enum { - ANDROID_CPU_MIPS_FEATURE_R6 = (1 << 0), - ANDROID_CPU_MIPS_FEATURE_MSA = (1 << 1), -}; - - -/* Return the number of CPU cores detected on this device. */ -extern int android_getCpuCount(void); - -/* The following is used to force the CPU count and features - * mask in sandboxed processes. Under 4.1 and higher, these processes - * cannot access /proc, which is the only way to get information from - * the kernel about the current hardware (at least on ARM). - * - * It _must_ be called only once, and before any android_getCpuXXX - * function, any other case will fail. - * - * This function return 1 on success, and 0 on failure. - */ -extern int android_setCpu(int cpu_count, - uint64_t cpu_features); - -#ifdef __arm__ -/* Retrieve the ARM 32-bit CPUID value from the kernel. - * Note that this cannot work on sandboxed processes under 4.1 and - * higher, unless you called android_setCpuArm() before. - */ -extern uint32_t android_getCpuIdArm(void); - -/* An ARM-specific variant of android_setCpu() that also allows you - * to set the ARM CPUID field. - */ -extern int android_setCpuArm(int cpu_count, - uint64_t cpu_features, - uint32_t cpu_id); -#endif - -__END_DECLS - -#endif /* CPU_FEATURES_H */ diff --git a/thirdparty/libvpx/third_party/x86inc/LICENSE b/thirdparty/libvpx/third_party/x86inc/LICENSE deleted file mode 100644 index 7d07645a17..0000000000 --- a/thirdparty/libvpx/third_party/x86inc/LICENSE +++ /dev/null @@ -1,18 +0,0 @@ -Copyright (C) 2005-2012 x264 project - -Authors: Loren Merritt <lorenm@u.washington.edu> - Anton Mitrofanov <BugMaster@narod.ru> - Jason Garrett-Glaser <darkshikari@gmail.com> - Henrik Gramner <hengar-6@student.ltu.se> - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/thirdparty/libvpx/third_party/x86inc/README.libvpx b/thirdparty/libvpx/third_party/x86inc/README.libvpx deleted file mode 100644 index 8d3cd966da..0000000000 --- a/thirdparty/libvpx/third_party/x86inc/README.libvpx +++ /dev/null @@ -1,20 +0,0 @@ -URL: https://git.videolan.org/git/x264.git -Version: d23d18655249944c1ca894b451e2c82c7a584c62 -License: ISC -License File: LICENSE - -Description: -x264/libav's framework for x86 assembly. Contains a variety of macros and -defines that help automatically allow assembly to work cross-platform. - -Local Modifications: -Get configuration from vpx_config.asm. -Prefix functions with vpx by default. -Manage name mangling (prefixing with '_') manually because 'PREFIX' does not - exist in libvpx. -Expand PIC default to macho64 and respect CONFIG_PIC from libvpx -Set 'private_extern' visibility for macho targets. -Copy PIC 'GLOBAL' macros from x86_abi_support.asm -Use .text instead of .rodata on macho to avoid broken tables in PIC mode. -Use .text with no alignment for aout -Only use 'hidden' visibility with Chromium diff --git a/thirdparty/libvpx/third_party/x86inc/x86inc.asm b/thirdparty/libvpx/third_party/x86inc/x86inc.asm deleted file mode 100644 index b647dff2f8..0000000000 --- a/thirdparty/libvpx/third_party/x86inc/x86inc.asm +++ /dev/null @@ -1,1649 +0,0 @@ -;***************************************************************************** -;* x86inc.asm: x264asm abstraction layer -;***************************************************************************** -;* Copyright (C) 2005-2016 x264 project -;* -;* Authors: Loren Merritt <lorenm@u.washington.edu> -;* Anton Mitrofanov <BugMaster@narod.ru> -;* Fiona Glaser <fiona@x264.com> -;* Henrik Gramner <henrik@gramner.com> -;* -;* Permission to use, copy, modify, and/or distribute this software for any -;* purpose with or without fee is hereby granted, provided that the above -;* copyright notice and this permission notice appear in all copies. -;* -;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -;***************************************************************************** - -; This is a header file for the x264ASM assembly language, which uses -; NASM/YASM syntax combined with a large number of macros to provide easy -; abstraction between different calling conventions (x86_32, win64, linux64). -; It also has various other useful features to simplify writing the kind of -; DSP functions that are most often used in x264. - -; Unlike the rest of x264, this file is available under an ISC license, as it -; has significant usefulness outside of x264 and we want it to be available -; to the largest audience possible. Of course, if you modify it for your own -; purposes to add a new feature, we strongly encourage contributing a patch -; as this feature might be useful for others as well. Send patches or ideas -; to x264-devel@videolan.org . - -%include "vpx_config.asm" - -%ifndef private_prefix - %define private_prefix vpx -%endif - -%ifndef public_prefix - %define public_prefix private_prefix -%endif - -%ifndef STACK_ALIGNMENT - %if ARCH_X86_64 - %define STACK_ALIGNMENT 16 - %else - %define STACK_ALIGNMENT 4 - %endif -%endif - -%define WIN64 0 -%define UNIX64 0 -%if ARCH_X86_64 - %ifidn __OUTPUT_FORMAT__,win32 - %define WIN64 1 - %elifidn __OUTPUT_FORMAT__,win64 - %define WIN64 1 - %elifidn __OUTPUT_FORMAT__,x64 - %define WIN64 1 - %else - %define UNIX64 1 - %endif -%endif - -%define FORMAT_ELF 0 -%ifidn __OUTPUT_FORMAT__,elf - %define FORMAT_ELF 1 -%elifidn __OUTPUT_FORMAT__,elf32 - %define FORMAT_ELF 1 -%elifidn __OUTPUT_FORMAT__,elf64 - %define FORMAT_ELF 1 -%endif - -%define FORMAT_MACHO 0 -%ifidn __OUTPUT_FORMAT__,macho32 - %define FORMAT_MACHO 1 -%elifidn __OUTPUT_FORMAT__,macho64 - %define FORMAT_MACHO 1 -%endif - -; Set PREFIX for libvpx builds. -%if FORMAT_ELF - %undef PREFIX -%elif WIN64 - %undef PREFIX -%else - %define PREFIX -%endif - -%ifdef PREFIX - %define mangle(x) _ %+ x -%else - %define mangle(x) x -%endif - -; In some instances macho32 tables get misaligned when using .rodata. -; When looking at the disassembly it appears that the offset is either -; correct or consistently off by 90. Placing them in the .text section -; works around the issue. It appears to be specific to the way libvpx -; handles the tables. -%macro SECTION_RODATA 0-1 16 - %ifidn __OUTPUT_FORMAT__,macho32 - SECTION .text align=%1 - fakegot: - %elifidn __OUTPUT_FORMAT__,aout - SECTION .text - %else - SECTION .rodata align=%1 - %endif -%endmacro - -; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC" -; from original code is added in for 64bit. -%ifidn __OUTPUT_FORMAT__,elf32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,macho32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,win32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,aout -%define ABI_IS_32BIT 1 -%else -%define ABI_IS_32BIT 0 -%endif - -%if ABI_IS_32BIT - %if CONFIG_PIC=1 - %ifidn __OUTPUT_FORMAT__,elf32 - %define GET_GOT_DEFINED 1 - %define WRT_PLT wrt ..plt - %macro GET_GOT 1 - extern _GLOBAL_OFFSET_TABLE_ - push %1 - call %%get_got - %%sub_offset: - jmp %%exitGG - %%get_got: - mov %1, [esp] - add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc - ret - %%exitGG: - %undef GLOBAL - %define GLOBAL(x) x + %1 wrt ..gotoff - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %elifidn __OUTPUT_FORMAT__,macho32 - %define GET_GOT_DEFINED 1 - %macro GET_GOT 1 - push %1 - call %%get_got - %%get_got: - pop %1 - %undef GLOBAL - %define GLOBAL(x) x + %1 - %%get_got - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %else - %define GET_GOT_DEFINED 0 - %endif - %endif - - %if ARCH_X86_64 == 0 - %undef PIC - %endif - -%else - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) rel x - %define WRT_PLT wrt ..plt - - %if WIN64 - %define PIC - %elifidn __OUTPUT_FORMAT__,macho64 - %define PIC - %elif CONFIG_PIC - %define PIC - %endif -%endif - -%ifnmacro GET_GOT - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) x -%endif -%ifndef RESTORE_GOT - %define RESTORE_GOT -%endif -%ifndef WRT_PLT - %define WRT_PLT -%endif - -%ifdef PIC - default rel -%endif - -%ifndef GET_GOT_DEFINED - %define GET_GOT_DEFINED 0 -%endif -; Done with PIC macros - -%ifdef __NASM_VER__ - %use smartalign -%endif - -; Macros to eliminate most code duplication between x86_32 and x86_64: -; Currently this works only for leaf functions which load all their arguments -; into registers at the start, and make no other use of the stack. Luckily that -; covers most of x264's asm. - -; PROLOGUE: -; %1 = number of arguments. loads them from stack if needed. -; %2 = number of registers used. pushes callee-saved regs if needed. -; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. -; %4 = (optional) stack size to be allocated. The stack will be aligned before -; allocating the specified stack size. If the required stack alignment is -; larger than the known stack alignment the stack will be manually aligned -; and an extra register will be allocated to hold the original stack -; pointer (to not invalidate r0m etc.). To prevent the use of an extra -; register as stack pointer, request a negative stack size. -; %4+/%5+ = list of names to define to registers -; PROLOGUE can also be invoked by adding the same options to cglobal - -; e.g. -; cglobal foo, 2,3,7,0x40, dst, src, tmp -; declares a function (foo) that automatically loads two arguments (dst and -; src) into registers, uses one additional register (tmp) plus 7 vector -; registers (m0-m6) and allocates 0x40 bytes of stack space. - -; TODO Some functions can use some args directly from the stack. If they're the -; last args then you can just not declare them, but if they're in the middle -; we need more flexible macro. - -; RET: -; Pops anything that was pushed by PROLOGUE, and returns. - -; REP_RET: -; Use this instead of RET if it's a branch target. - -; registers: -; rN and rNq are the native-size register holding function argument N -; rNd, rNw, rNb are dword, word, and byte size -; rNh is the high 8 bits of the word size -; rNm is the original location of arg N (a register or on the stack), dword -; rNmp is native size - -%macro DECLARE_REG 2-3 - %define r%1q %2 - %define r%1d %2d - %define r%1w %2w - %define r%1b %2b - %define r%1h %2h - %define %2q %2 - %if %0 == 2 - %define r%1m %2d - %define r%1mp %2 - %elif ARCH_X86_64 ; memory - %define r%1m [rstk + stack_offset + %3] - %define r%1mp qword r %+ %1 %+ m - %else - %define r%1m [rstk + stack_offset + %3] - %define r%1mp dword r %+ %1 %+ m - %endif - %define r%1 %2 -%endmacro - -%macro DECLARE_REG_SIZE 3 - %define r%1q r%1 - %define e%1q r%1 - %define r%1d e%1 - %define e%1d e%1 - %define r%1w %1 - %define e%1w %1 - %define r%1h %3 - %define e%1h %3 - %define r%1b %2 - %define e%1b %2 - %if ARCH_X86_64 == 0 - %define r%1 e%1 - %endif -%endmacro - -DECLARE_REG_SIZE ax, al, ah -DECLARE_REG_SIZE bx, bl, bh -DECLARE_REG_SIZE cx, cl, ch -DECLARE_REG_SIZE dx, dl, dh -DECLARE_REG_SIZE si, sil, null -DECLARE_REG_SIZE di, dil, null -DECLARE_REG_SIZE bp, bpl, null - -; t# defines for when per-arch register allocation is more complex than just function arguments - -%macro DECLARE_REG_TMP 1-* - %assign %%i 0 - %rep %0 - CAT_XDEFINE t, %%i, r%1 - %assign %%i %%i+1 - %rotate 1 - %endrep -%endmacro - -%macro DECLARE_REG_TMP_SIZE 0-* - %rep %0 - %define t%1q t%1 %+ q - %define t%1d t%1 %+ d - %define t%1w t%1 %+ w - %define t%1h t%1 %+ h - %define t%1b t%1 %+ b - %rotate 1 - %endrep -%endmacro - -DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 - -%if ARCH_X86_64 - %define gprsize 8 -%else - %define gprsize 4 -%endif - -%macro PUSH 1 - push %1 - %ifidn rstk, rsp - %assign stack_offset stack_offset+gprsize - %endif -%endmacro - -%macro POP 1 - pop %1 - %ifidn rstk, rsp - %assign stack_offset stack_offset-gprsize - %endif -%endmacro - -%macro PUSH_IF_USED 1-* - %rep %0 - %if %1 < regs_used - PUSH r%1 - %endif - %rotate 1 - %endrep -%endmacro - -%macro POP_IF_USED 1-* - %rep %0 - %if %1 < regs_used - pop r%1 - %endif - %rotate 1 - %endrep -%endmacro - -%macro LOAD_IF_USED 1-* - %rep %0 - %if %1 < num_args - mov r%1, r %+ %1 %+ mp - %endif - %rotate 1 - %endrep -%endmacro - -%macro SUB 2 - sub %1, %2 - %ifidn %1, rstk - %assign stack_offset stack_offset+(%2) - %endif -%endmacro - -%macro ADD 2 - add %1, %2 - %ifidn %1, rstk - %assign stack_offset stack_offset-(%2) - %endif -%endmacro - -%macro movifnidn 2 - %ifnidn %1, %2 - mov %1, %2 - %endif -%endmacro - -%macro movsxdifnidn 2 - %ifnidn %1, %2 - movsxd %1, %2 - %endif -%endmacro - -%macro ASSERT 1 - %if (%1) == 0 - %error assertion ``%1'' failed - %endif -%endmacro - -%macro DEFINE_ARGS 0-* - %ifdef n_arg_names - %assign %%i 0 - %rep n_arg_names - CAT_UNDEF arg_name %+ %%i, q - CAT_UNDEF arg_name %+ %%i, d - CAT_UNDEF arg_name %+ %%i, w - CAT_UNDEF arg_name %+ %%i, h - CAT_UNDEF arg_name %+ %%i, b - CAT_UNDEF arg_name %+ %%i, m - CAT_UNDEF arg_name %+ %%i, mp - CAT_UNDEF arg_name, %%i - %assign %%i %%i+1 - %endrep - %endif - - %xdefine %%stack_offset stack_offset - %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine - %assign %%i 0 - %rep %0 - %xdefine %1q r %+ %%i %+ q - %xdefine %1d r %+ %%i %+ d - %xdefine %1w r %+ %%i %+ w - %xdefine %1h r %+ %%i %+ h - %xdefine %1b r %+ %%i %+ b - %xdefine %1m r %+ %%i %+ m - %xdefine %1mp r %+ %%i %+ mp - CAT_XDEFINE arg_name, %%i, %1 - %assign %%i %%i+1 - %rotate 1 - %endrep - %xdefine stack_offset %%stack_offset - %assign n_arg_names %0 -%endmacro - -%define required_stack_alignment ((mmsize + 15) & ~15) - -%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) - %ifnum %1 - %if %1 != 0 - %assign %%pad 0 - %assign stack_size %1 - %if stack_size < 0 - %assign stack_size -stack_size - %endif - %if WIN64 - %assign %%pad %%pad + 32 ; shadow space - %if mmsize != 8 - %assign xmm_regs_used %2 - %if xmm_regs_used > 8 - %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers - %endif - %endif - %endif - %if required_stack_alignment <= STACK_ALIGNMENT - ; maintain the current stack alignment - %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) - SUB rsp, stack_size_padded - %else - %assign %%reg_num (regs_used - 1) - %xdefine rstk r %+ %%reg_num - ; align stack, and save original stack location directly above - ; it, i.e. in [rsp+stack_size_padded], so we can restore the - ; stack in a single instruction (i.e. mov rsp, rstk or mov - ; rsp, [rsp+stack_size_padded]) - %if %1 < 0 ; need to store rsp on stack - %xdefine rstkm [rsp + stack_size + %%pad] - %assign %%pad %%pad + gprsize - %else ; can keep rsp in rstk during whole function - %xdefine rstkm rstk - %endif - %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1)) - mov rstk, rsp - and rsp, ~(required_stack_alignment-1) - sub rsp, stack_size_padded - movifnidn rstkm, rstk - %endif - WIN64_PUSH_XMM - %endif - %endif -%endmacro - -%macro SETUP_STACK_POINTER 1 - %ifnum %1 - %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT - %if %1 > 0 - %assign regs_used (regs_used + 1) - %endif - %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 - ; Ensure that we don't clobber any registers containing arguments - %assign regs_used 5 + UNIX64 * 3 - %endif - %endif - %endif -%endmacro - -%macro DEFINE_ARGS_INTERNAL 3+ - %ifnum %2 - DEFINE_ARGS %3 - %elif %1 == 4 - DEFINE_ARGS %2 - %elif %1 > 4 - DEFINE_ARGS %2, %3 - %endif -%endmacro - -%if WIN64 ; Windows x64 ;================================================= - -DECLARE_REG 0, rcx -DECLARE_REG 1, rdx -DECLARE_REG 2, R8 -DECLARE_REG 3, R9 -DECLARE_REG 4, R10, 40 -DECLARE_REG 5, R11, 48 -DECLARE_REG 6, rax, 56 -DECLARE_REG 7, rdi, 64 -DECLARE_REG 8, rsi, 72 -DECLARE_REG 9, rbx, 80 -DECLARE_REG 10, rbp, 88 -DECLARE_REG 11, R12, 96 -DECLARE_REG 12, R13, 104 -DECLARE_REG 13, R14, 112 -DECLARE_REG 14, R15, 120 - -%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - SETUP_STACK_POINTER %4 - ASSERT regs_used <= 15 - PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 - ALLOC_STACK %4, %3 - %if mmsize != 8 && stack_size == 0 - WIN64_SPILL_XMM %3 - %endif - LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS_INTERNAL %0, %4, %5 -%endmacro - -%macro WIN64_PUSH_XMM 0 - ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated. - %if xmm_regs_used > 6 - movaps [rstk + stack_offset + 8], xmm6 - %endif - %if xmm_regs_used > 7 - movaps [rstk + stack_offset + 24], xmm7 - %endif - %if xmm_regs_used > 8 - %assign %%i 8 - %rep xmm_regs_used-8 - movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i - %assign %%i %%i+1 - %endrep - %endif -%endmacro - -%macro WIN64_SPILL_XMM 1 - %assign xmm_regs_used %1 - ASSERT xmm_regs_used <= 16 - %if xmm_regs_used > 8 - ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack. - %assign %%pad (xmm_regs_used-8)*16 + 32 - %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) - SUB rsp, stack_size_padded - %endif - WIN64_PUSH_XMM -%endmacro - -%macro WIN64_RESTORE_XMM_INTERNAL 1 - %assign %%pad_size 0 - %if xmm_regs_used > 8 - %assign %%i xmm_regs_used - %rep xmm_regs_used-8 - %assign %%i %%i-1 - movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32] - %endrep - %endif - %if stack_size_padded > 0 - %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm - %else - add %1, stack_size_padded - %assign %%pad_size stack_size_padded - %endif - %endif - %if xmm_regs_used > 7 - movaps xmm7, [%1 + stack_offset - %%pad_size + 24] - %endif - %if xmm_regs_used > 6 - movaps xmm6, [%1 + stack_offset - %%pad_size + 8] - %endif -%endmacro - -%macro WIN64_RESTORE_XMM 1 - WIN64_RESTORE_XMM_INTERNAL %1 - %assign stack_offset (stack_offset-stack_size_padded) - %assign xmm_regs_used 0 -%endmacro - -%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0 - -%macro RET 0 - WIN64_RESTORE_XMM_INTERNAL rsp - POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 - %if mmsize == 32 - vzeroupper - %endif - AUTO_REP_RET -%endmacro - -%elif ARCH_X86_64 ; *nix x64 ;============================================= - -DECLARE_REG 0, rdi -DECLARE_REG 1, rsi -DECLARE_REG 2, rdx -DECLARE_REG 3, rcx -DECLARE_REG 4, R8 -DECLARE_REG 5, R9 -DECLARE_REG 6, rax, 8 -DECLARE_REG 7, R10, 16 -DECLARE_REG 8, R11, 24 -DECLARE_REG 9, rbx, 32 -DECLARE_REG 10, rbp, 40 -DECLARE_REG 11, R12, 48 -DECLARE_REG 12, R13, 56 -DECLARE_REG 13, R14, 64 -DECLARE_REG 14, R15, 72 - -%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - SETUP_STACK_POINTER %4 - ASSERT regs_used <= 15 - PUSH_IF_USED 9, 10, 11, 12, 13, 14 - ALLOC_STACK %4 - LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS_INTERNAL %0, %4, %5 -%endmacro - -%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0 - -%macro RET 0 - %if stack_size_padded > 0 - %if required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm - %else - add rsp, stack_size_padded - %endif - %endif - POP_IF_USED 14, 13, 12, 11, 10, 9 - %if mmsize == 32 - vzeroupper - %endif - AUTO_REP_RET -%endmacro - -%else ; X86_32 ;============================================================== - -DECLARE_REG 0, eax, 4 -DECLARE_REG 1, ecx, 8 -DECLARE_REG 2, edx, 12 -DECLARE_REG 3, ebx, 16 -DECLARE_REG 4, esi, 20 -DECLARE_REG 5, edi, 24 -DECLARE_REG 6, ebp, 28 -%define rsp esp - -%macro DECLARE_ARG 1-* - %rep %0 - %define r%1m [rstk + stack_offset + 4*%1 + 4] - %define r%1mp dword r%1m - %rotate 1 - %endrep -%endmacro - -DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 - -%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - %if num_args > 7 - %assign num_args 7 - %endif - %if regs_used > 7 - %assign regs_used 7 - %endif - SETUP_STACK_POINTER %4 - ASSERT regs_used <= 7 - PUSH_IF_USED 3, 4, 5, 6 - ALLOC_STACK %4 - LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 - DEFINE_ARGS_INTERNAL %0, %4, %5 -%endmacro - -%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0 - -%macro RET 0 - %if stack_size_padded > 0 - %if required_stack_alignment > STACK_ALIGNMENT - mov rsp, rstkm - %else - add rsp, stack_size_padded - %endif - %endif - POP_IF_USED 6, 5, 4, 3 - %if mmsize == 32 - vzeroupper - %endif - AUTO_REP_RET -%endmacro - -%endif ;====================================================================== - -%if WIN64 == 0 - %macro WIN64_SPILL_XMM 1 - %endmacro - %macro WIN64_RESTORE_XMM 1 - %endmacro - %macro WIN64_PUSH_XMM 0 - %endmacro -%endif - -; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either -; a branch or a branch target. So switch to a 2-byte form of ret in that case. -; We can automatically detect "follows a branch", but not a branch target. -; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.) -%macro REP_RET 0 - %if has_epilogue - RET - %else - rep ret - %endif - annotate_function_size -%endmacro - -%define last_branch_adr $$ -%macro AUTO_REP_RET 0 - %if notcpuflag(ssse3) - times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr. - %endif - ret - annotate_function_size -%endmacro - -%macro BRANCH_INSTR 0-* - %rep %0 - %macro %1 1-2 %1 - %2 %1 - %if notcpuflag(ssse3) - %%branch_instr equ $ - %xdefine last_branch_adr %%branch_instr - %endif - %endmacro - %rotate 1 - %endrep -%endmacro - -BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp - -%macro TAIL_CALL 2 ; callee, is_nonadjacent - %if has_epilogue - call %1 - RET - %elif %2 - jmp %1 - %endif - annotate_function_size -%endmacro - -;============================================================================= -; arch-independent part -;============================================================================= - -%assign function_align 16 - -; Begin a function. -; Applies any symbol mangling needed for C linkage, and sets up a define such that -; subsequent uses of the function name automatically refer to the mangled version. -; Appends cpuflags to the function name if cpuflags has been specified. -; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX -; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2). -%macro cglobal 1-2+ "" ; name, [PROLOGUE args] - cglobal_internal 1, %1 %+ SUFFIX, %2 -%endmacro -%macro cvisible 1-2+ "" ; name, [PROLOGUE args] - cglobal_internal 0, %1 %+ SUFFIX, %2 -%endmacro -%macro cglobal_internal 2-3+ - annotate_function_size - %if %1 - %xdefine %%FUNCTION_PREFIX private_prefix - ; libvpx explicitly sets visibility in shared object builds. Avoid - ; setting visibility to hidden as it may break builds that split - ; sources on e.g., directory boundaries. - %ifdef CHROMIUM - %xdefine %%VISIBILITY hidden - %else - %xdefine %%VISIBILITY - %endif - %else - %xdefine %%FUNCTION_PREFIX public_prefix - %xdefine %%VISIBILITY - %endif - %ifndef cglobaled_%2 - %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2) - %xdefine %2.skip_prologue %2 %+ .skip_prologue - CAT_XDEFINE cglobaled_, %2, 1 - %endif - %xdefine current_function %2 - %xdefine current_function_section __SECT__ - %if FORMAT_ELF - global %2:function %%VISIBILITY - %elif FORMAT_MACHO - %ifdef __NASM_VER__ - global %2 - %else - global %2:private_extern - %endif - %else - global %2 - %endif - align function_align - %2: - RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer - %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required - %assign stack_offset 0 ; stack pointer offset relative to the return address - %assign stack_size 0 ; amount of stack space that can be freely used inside a function - %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding - %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 - %ifnidn %3, "" - PROLOGUE %3 - %endif -%endmacro - -%macro cextern 1 - %xdefine %1 mangle(private_prefix %+ _ %+ %1) - CAT_XDEFINE cglobaled_, %1, 1 - extern %1 -%endmacro - -; like cextern, but without the prefix -%macro cextern_naked 1 - %ifdef PREFIX - %xdefine %1 mangle(%1) - %endif - CAT_XDEFINE cglobaled_, %1, 1 - extern %1 -%endmacro - -%macro const 1-2+ - %xdefine %1 mangle(private_prefix %+ _ %+ %1) - %if FORMAT_ELF - global %1:data hidden - %else - global %1 - %endif - %1: %2 -%endmacro - -; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. -%if FORMAT_ELF - [SECTION .note.GNU-stack noalloc noexec nowrite progbits] -%endif - -; Tell debuggers how large the function was. -; This may be invoked multiple times per function; we rely on later instances overriding earlier ones. -; This is invoked by RET and similar macros, and also cglobal does it for the previous function, -; but if the last function in a source file doesn't use any of the standard macros for its epilogue, -; then its size might be unspecified. -%macro annotate_function_size 0 - %ifdef __YASM_VER__ - %ifdef current_function - %if FORMAT_ELF - current_function_section - %%ecf equ $ - size current_function %%ecf - current_function - __SECT__ - %endif - %endif - %endif -%endmacro - -; cpuflags - -%assign cpuflags_mmx (1<<0) -%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx -%assign cpuflags_3dnow (1<<2) | cpuflags_mmx -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow -%assign cpuflags_sse (1<<4) | cpuflags_mmx2 -%assign cpuflags_sse2 (1<<5) | cpuflags_sse -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 -%assign cpuflags_sse3 (1<<7) | cpuflags_sse2 -%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3 -%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3 -%assign cpuflags_sse42 (1<<10)| cpuflags_sse4 -%assign cpuflags_avx (1<<11)| cpuflags_sse42 -%assign cpuflags_xop (1<<12)| cpuflags_avx -%assign cpuflags_fma4 (1<<13)| cpuflags_avx -%assign cpuflags_fma3 (1<<14)| cpuflags_avx -%assign cpuflags_avx2 (1<<15)| cpuflags_fma3 - -%assign cpuflags_cache32 (1<<16) -%assign cpuflags_cache64 (1<<17) -%assign cpuflags_slowctz (1<<18) -%assign cpuflags_lzcnt (1<<19) -%assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<21) -%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt -%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1 - -; Returns a boolean value expressing whether or not the specified cpuflag is enabled. -%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) -%define notcpuflag(x) (cpuflag(x) ^ 1) - -; Takes an arbitrary number of cpuflags from the above list. -; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. -; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. -%macro INIT_CPUFLAGS 0-* - %xdefine SUFFIX - %undef cpuname - %assign cpuflags 0 - - %if %0 >= 1 - %rep %0 - %ifdef cpuname - %xdefine cpuname cpuname %+ _%1 - %else - %xdefine cpuname %1 - %endif - %assign cpuflags cpuflags | cpuflags_%1 - %rotate 1 - %endrep - %xdefine SUFFIX _ %+ cpuname - - %if cpuflag(avx) - %assign avx_enabled 1 - %endif - %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2)) - %define mova movaps - %define movu movups - %define movnta movntps - %endif - %if cpuflag(aligned) - %define movu mova - %elif cpuflag(sse3) && notcpuflag(ssse3) - %define movu lddqu - %endif - %endif - - %if ARCH_X86_64 || cpuflag(sse2) - %ifdef __NASM_VER__ - ALIGNMODE k8 - %else - CPU amdnop - %endif - %else - %ifdef __NASM_VER__ - ALIGNMODE nop - %else - CPU basicnop - %endif - %endif -%endmacro - -; Merge mmx and sse* -; m# is a simd register of the currently selected size -; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m# -; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m# -; (All 3 remain in sync through SWAP.) - -%macro CAT_XDEFINE 3 - %xdefine %1%2 %3 -%endmacro - -%macro CAT_UNDEF 2 - %undef %1%2 -%endmacro - -%macro INIT_MMX 0-1+ - %assign avx_enabled 0 - %define RESET_MM_PERMUTATION INIT_MMX %1 - %define mmsize 8 - %define num_mmregs 8 - %define mova movq - %define movu movq - %define movh movd - %define movnta movntq - %assign %%i 0 - %rep 8 - CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nnmm, %%i, %%i - %assign %%i %%i+1 - %endrep - %rep 8 - CAT_UNDEF m, %%i - CAT_UNDEF nnmm, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -%macro INIT_XMM 0-1+ - %assign avx_enabled 0 - %define RESET_MM_PERMUTATION INIT_XMM %1 - %define mmsize 16 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %endif - %define mova movdqa - %define movu movdqu - %define movh movq - %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nnxmm, %%i, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -%macro INIT_YMM 0-1+ - %assign avx_enabled 1 - %define RESET_MM_PERMUTATION INIT_YMM %1 - %define mmsize 32 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %endif - %define mova movdqa - %define movu movdqu - %undef movh - %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nnymm, %%i, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -INIT_XMM - -%macro DECLARE_MMCAST 1 - %define mmmm%1 mm%1 - %define mmxmm%1 mm%1 - %define mmymm%1 mm%1 - %define xmmmm%1 mm%1 - %define xmmxmm%1 xmm%1 - %define xmmymm%1 xmm%1 - %define ymmmm%1 mm%1 - %define ymmxmm%1 xmm%1 - %define ymmymm%1 ymm%1 - %define xm%1 xmm %+ m%1 - %define ym%1 ymm %+ m%1 -%endmacro - -%assign i 0 -%rep 16 - DECLARE_MMCAST i - %assign i i+1 -%endrep - -; I often want to use macros that permute their arguments. e.g. there's no -; efficient way to implement butterfly or transpose or dct without swapping some -; arguments. -; -; I would like to not have to manually keep track of the permutations: -; If I insert a permutation in the middle of a function, it should automatically -; change everything that follows. For more complex macros I may also have multiple -; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. -; -; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that -; permutes its arguments. It's equivalent to exchanging the contents of the -; registers, except that this way you exchange the register names instead, so it -; doesn't cost any cycles. - -%macro PERMUTE 2-* ; takes a list of pairs to swap - %rep %0/2 - %xdefine %%tmp%2 m%2 - %rotate 2 - %endrep - %rep %0/2 - %xdefine m%1 %%tmp%2 - CAT_XDEFINE nn, m%1, %1 - %rotate 2 - %endrep -%endmacro - -%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs) - %ifnum %1 ; SWAP 0, 1, ... - SWAP_INTERNAL_NUM %1, %2 - %else ; SWAP m0, m1, ... - SWAP_INTERNAL_NAME %1, %2 - %endif -%endmacro - -%macro SWAP_INTERNAL_NUM 2-* - %rep %0-1 - %xdefine %%tmp m%1 - %xdefine m%1 m%2 - %xdefine m%2 %%tmp - CAT_XDEFINE nn, m%1, %1 - CAT_XDEFINE nn, m%2, %2 - %rotate 1 - %endrep -%endmacro - -%macro SWAP_INTERNAL_NAME 2-* - %xdefine %%args nn %+ %1 - %rep %0-1 - %xdefine %%args %%args, nn %+ %2 - %rotate 1 - %endrep - SWAP_INTERNAL_NUM %%args -%endmacro - -; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later -; calls to that function will automatically load the permutation, so values can -; be returned in mmregs. -%macro SAVE_MM_PERMUTATION 0-1 - %if %0 - %xdefine %%f %1_m - %else - %xdefine %%f current_function %+ _m - %endif - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE %%f, %%i, m %+ %%i - %assign %%i %%i+1 - %endrep -%endmacro - -%macro LOAD_MM_PERMUTATION 1 ; name to load from - %ifdef %1_m0 - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, %1_m %+ %%i - CAT_XDEFINE nn, m %+ %%i, %%i - %assign %%i %%i+1 - %endrep - %endif -%endmacro - -; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't -%macro call 1 - call_internal %1 %+ SUFFIX, %1 -%endmacro -%macro call_internal 2 - %xdefine %%i %2 - %ifndef cglobaled_%2 - %ifdef cglobaled_%1 - %xdefine %%i %1 - %endif - %endif - call %%i - LOAD_MM_PERMUTATION %%i -%endmacro - -; Substitutions that reduce instruction size but are functionally equivalent -%macro add 2 - %ifnum %2 - %if %2==128 - sub %1, -128 - %else - add %1, %2 - %endif - %else - add %1, %2 - %endif -%endmacro - -%macro sub 2 - %ifnum %2 - %if %2==128 - add %1, -128 - %else - sub %1, %2 - %endif - %else - sub %1, %2 - %endif -%endmacro - -;============================================================================= -; AVX abstraction layer -;============================================================================= - -%assign i 0 -%rep 16 - %if i < 8 - CAT_XDEFINE sizeofmm, i, 8 - %endif - CAT_XDEFINE sizeofxmm, i, 16 - CAT_XDEFINE sizeofymm, i, 32 - %assign i i+1 -%endrep -%undef i - -%macro CHECK_AVX_INSTR_EMU 3-* - %xdefine %%opcode %1 - %xdefine %%dst %2 - %rep %0-2 - %ifidn %%dst, %3 - %error non-avx emulation of ``%%opcode'' is not supported - %endif - %rotate 1 - %endrep -%endmacro - -;%1 == instruction -;%2 == minimal instruction set -;%3 == 1 if float, 0 if int -;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise -;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not -;%6+: operands -%macro RUN_AVX_INSTR 6-9+ - %ifnum sizeof%7 - %assign __sizeofreg sizeof%7 - %elifnum sizeof%6 - %assign __sizeofreg sizeof%6 - %else - %assign __sizeofreg mmsize - %endif - %assign __emulate_avx 0 - %if avx_enabled && __sizeofreg >= 16 - %xdefine __instr v%1 - %else - %xdefine __instr %1 - %if %0 >= 8+%4 - %assign __emulate_avx 1 - %endif - %endif - %ifnidn %2, fnord - %ifdef cpuname - %if notcpuflag(%2) - %error use of ``%1'' %2 instruction in cpuname function: current_function - %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 - %error use of ``%1'' sse2 instruction in cpuname function: current_function - %endif - %endif - %endif - - %if __emulate_avx - %xdefine __src1 %7 - %xdefine __src2 %8 - %ifnidn %6, %7 - %if %0 >= 9 - CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9 - %else - CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8 - %endif - %if %5 && %4 == 0 - %ifnid %8 - ; 3-operand AVX instructions with a memory arg can only have it in src2, - ; whereas SSE emulation prefers to have it in src1 (i.e. the mov). - ; So, if the instruction is commutative with a memory arg, swap them. - %xdefine __src1 %8 - %xdefine __src2 %7 - %endif - %endif - %if __sizeofreg == 8 - MOVQ %6, __src1 - %elif %3 - MOVAPS %6, __src1 - %else - MOVDQA %6, __src1 - %endif - %endif - %if %0 >= 9 - %1 %6, __src2, %9 - %else - %1 %6, __src2 - %endif - %elif %0 >= 9 - __instr %6, %7, %8, %9 - %elif %0 == 8 - __instr %6, %7, %8 - %elif %0 == 7 - __instr %6, %7 - %else - __instr %6 - %endif -%endmacro - -;%1 == instruction -;%2 == minimal instruction set -;%3 == 1 if float, 0 if int -;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise -;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not -%macro AVX_INSTR 1-5 fnord, 0, 1, 0 - %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5 - %ifidn %2, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1 - %elifidn %3, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2 - %elifidn %4, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3 - %elifidn %5, fnord - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4 - %else - RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5 - %endif - %endmacro -%endmacro - -; Instructions with both VEX and non-VEX encodings -; Non-destructive instructions are written without parameters -AVX_INSTR addpd, sse2, 1, 0, 1 -AVX_INSTR addps, sse, 1, 0, 1 -AVX_INSTR addsd, sse2, 1, 0, 1 -AVX_INSTR addss, sse, 1, 0, 1 -AVX_INSTR addsubpd, sse3, 1, 0, 0 -AVX_INSTR addsubps, sse3, 1, 0, 0 -AVX_INSTR aesdec, fnord, 0, 0, 0 -AVX_INSTR aesdeclast, fnord, 0, 0, 0 -AVX_INSTR aesenc, fnord, 0, 0, 0 -AVX_INSTR aesenclast, fnord, 0, 0, 0 -AVX_INSTR aesimc -AVX_INSTR aeskeygenassist -AVX_INSTR andnpd, sse2, 1, 0, 0 -AVX_INSTR andnps, sse, 1, 0, 0 -AVX_INSTR andpd, sse2, 1, 0, 1 -AVX_INSTR andps, sse, 1, 0, 1 -AVX_INSTR blendpd, sse4, 1, 0, 0 -AVX_INSTR blendps, sse4, 1, 0, 0 -AVX_INSTR blendvpd, sse4, 1, 0, 0 -AVX_INSTR blendvps, sse4, 1, 0, 0 -AVX_INSTR cmppd, sse2, 1, 1, 0 -AVX_INSTR cmpps, sse, 1, 1, 0 -AVX_INSTR cmpsd, sse2, 1, 1, 0 -AVX_INSTR cmpss, sse, 1, 1, 0 -AVX_INSTR comisd, sse2 -AVX_INSTR comiss, sse -AVX_INSTR cvtdq2pd, sse2 -AVX_INSTR cvtdq2ps, sse2 -AVX_INSTR cvtpd2dq, sse2 -AVX_INSTR cvtpd2ps, sse2 -AVX_INSTR cvtps2dq, sse2 -AVX_INSTR cvtps2pd, sse2 -AVX_INSTR cvtsd2si, sse2 -AVX_INSTR cvtsd2ss, sse2 -AVX_INSTR cvtsi2sd, sse2 -AVX_INSTR cvtsi2ss, sse -AVX_INSTR cvtss2sd, sse2 -AVX_INSTR cvtss2si, sse -AVX_INSTR cvttpd2dq, sse2 -AVX_INSTR cvttps2dq, sse2 -AVX_INSTR cvttsd2si, sse2 -AVX_INSTR cvttss2si, sse -AVX_INSTR divpd, sse2, 1, 0, 0 -AVX_INSTR divps, sse, 1, 0, 0 -AVX_INSTR divsd, sse2, 1, 0, 0 -AVX_INSTR divss, sse, 1, 0, 0 -AVX_INSTR dppd, sse4, 1, 1, 0 -AVX_INSTR dpps, sse4, 1, 1, 0 -AVX_INSTR extractps, sse4 -AVX_INSTR haddpd, sse3, 1, 0, 0 -AVX_INSTR haddps, sse3, 1, 0, 0 -AVX_INSTR hsubpd, sse3, 1, 0, 0 -AVX_INSTR hsubps, sse3, 1, 0, 0 -AVX_INSTR insertps, sse4, 1, 1, 0 -AVX_INSTR lddqu, sse3 -AVX_INSTR ldmxcsr, sse -AVX_INSTR maskmovdqu, sse2 -AVX_INSTR maxpd, sse2, 1, 0, 1 -AVX_INSTR maxps, sse, 1, 0, 1 -AVX_INSTR maxsd, sse2, 1, 0, 1 -AVX_INSTR maxss, sse, 1, 0, 1 -AVX_INSTR minpd, sse2, 1, 0, 1 -AVX_INSTR minps, sse, 1, 0, 1 -AVX_INSTR minsd, sse2, 1, 0, 1 -AVX_INSTR minss, sse, 1, 0, 1 -AVX_INSTR movapd, sse2 -AVX_INSTR movaps, sse -AVX_INSTR movd, mmx -AVX_INSTR movddup, sse3 -AVX_INSTR movdqa, sse2 -AVX_INSTR movdqu, sse2 -AVX_INSTR movhlps, sse, 1, 0, 0 -AVX_INSTR movhpd, sse2, 1, 0, 0 -AVX_INSTR movhps, sse, 1, 0, 0 -AVX_INSTR movlhps, sse, 1, 0, 0 -AVX_INSTR movlpd, sse2, 1, 0, 0 -AVX_INSTR movlps, sse, 1, 0, 0 -AVX_INSTR movmskpd, sse2 -AVX_INSTR movmskps, sse -AVX_INSTR movntdq, sse2 -AVX_INSTR movntdqa, sse4 -AVX_INSTR movntpd, sse2 -AVX_INSTR movntps, sse -AVX_INSTR movq, mmx -AVX_INSTR movsd, sse2, 1, 0, 0 -AVX_INSTR movshdup, sse3 -AVX_INSTR movsldup, sse3 -AVX_INSTR movss, sse, 1, 0, 0 -AVX_INSTR movupd, sse2 -AVX_INSTR movups, sse -AVX_INSTR mpsadbw, sse4 -AVX_INSTR mulpd, sse2, 1, 0, 1 -AVX_INSTR mulps, sse, 1, 0, 1 -AVX_INSTR mulsd, sse2, 1, 0, 1 -AVX_INSTR mulss, sse, 1, 0, 1 -AVX_INSTR orpd, sse2, 1, 0, 1 -AVX_INSTR orps, sse, 1, 0, 1 -AVX_INSTR pabsb, ssse3 -AVX_INSTR pabsd, ssse3 -AVX_INSTR pabsw, ssse3 -AVX_INSTR packsswb, mmx, 0, 0, 0 -AVX_INSTR packssdw, mmx, 0, 0, 0 -AVX_INSTR packuswb, mmx, 0, 0, 0 -AVX_INSTR packusdw, sse4, 0, 0, 0 -AVX_INSTR paddb, mmx, 0, 0, 1 -AVX_INSTR paddw, mmx, 0, 0, 1 -AVX_INSTR paddd, mmx, 0, 0, 1 -AVX_INSTR paddq, sse2, 0, 0, 1 -AVX_INSTR paddsb, mmx, 0, 0, 1 -AVX_INSTR paddsw, mmx, 0, 0, 1 -AVX_INSTR paddusb, mmx, 0, 0, 1 -AVX_INSTR paddusw, mmx, 0, 0, 1 -AVX_INSTR palignr, ssse3 -AVX_INSTR pand, mmx, 0, 0, 1 -AVX_INSTR pandn, mmx, 0, 0, 0 -AVX_INSTR pavgb, mmx2, 0, 0, 1 -AVX_INSTR pavgw, mmx2, 0, 0, 1 -AVX_INSTR pblendvb, sse4, 0, 0, 0 -AVX_INSTR pblendw, sse4 -AVX_INSTR pclmulqdq -AVX_INSTR pcmpestri, sse42 -AVX_INSTR pcmpestrm, sse42 -AVX_INSTR pcmpistri, sse42 -AVX_INSTR pcmpistrm, sse42 -AVX_INSTR pcmpeqb, mmx, 0, 0, 1 -AVX_INSTR pcmpeqw, mmx, 0, 0, 1 -AVX_INSTR pcmpeqd, mmx, 0, 0, 1 -AVX_INSTR pcmpeqq, sse4, 0, 0, 1 -AVX_INSTR pcmpgtb, mmx, 0, 0, 0 -AVX_INSTR pcmpgtw, mmx, 0, 0, 0 -AVX_INSTR pcmpgtd, mmx, 0, 0, 0 -AVX_INSTR pcmpgtq, sse42, 0, 0, 0 -AVX_INSTR pextrb, sse4 -AVX_INSTR pextrd, sse4 -AVX_INSTR pextrq, sse4 -AVX_INSTR pextrw, mmx2 -AVX_INSTR phaddw, ssse3, 0, 0, 0 -AVX_INSTR phaddd, ssse3, 0, 0, 0 -AVX_INSTR phaddsw, ssse3, 0, 0, 0 -AVX_INSTR phminposuw, sse4 -AVX_INSTR phsubw, ssse3, 0, 0, 0 -AVX_INSTR phsubd, ssse3, 0, 0, 0 -AVX_INSTR phsubsw, ssse3, 0, 0, 0 -AVX_INSTR pinsrb, sse4 -AVX_INSTR pinsrd, sse4 -AVX_INSTR pinsrq, sse4 -AVX_INSTR pinsrw, mmx2 -AVX_INSTR pmaddwd, mmx, 0, 0, 1 -AVX_INSTR pmaddubsw, ssse3, 0, 0, 0 -AVX_INSTR pmaxsb, sse4, 0, 0, 1 -AVX_INSTR pmaxsw, mmx2, 0, 0, 1 -AVX_INSTR pmaxsd, sse4, 0, 0, 1 -AVX_INSTR pmaxub, mmx2, 0, 0, 1 -AVX_INSTR pmaxuw, sse4, 0, 0, 1 -AVX_INSTR pmaxud, sse4, 0, 0, 1 -AVX_INSTR pminsb, sse4, 0, 0, 1 -AVX_INSTR pminsw, mmx2, 0, 0, 1 -AVX_INSTR pminsd, sse4, 0, 0, 1 -AVX_INSTR pminub, mmx2, 0, 0, 1 -AVX_INSTR pminuw, sse4, 0, 0, 1 -AVX_INSTR pminud, sse4, 0, 0, 1 -AVX_INSTR pmovmskb, mmx2 -AVX_INSTR pmovsxbw, sse4 -AVX_INSTR pmovsxbd, sse4 -AVX_INSTR pmovsxbq, sse4 -AVX_INSTR pmovsxwd, sse4 -AVX_INSTR pmovsxwq, sse4 -AVX_INSTR pmovsxdq, sse4 -AVX_INSTR pmovzxbw, sse4 -AVX_INSTR pmovzxbd, sse4 -AVX_INSTR pmovzxbq, sse4 -AVX_INSTR pmovzxwd, sse4 -AVX_INSTR pmovzxwq, sse4 -AVX_INSTR pmovzxdq, sse4 -AVX_INSTR pmuldq, sse4, 0, 0, 1 -AVX_INSTR pmulhrsw, ssse3, 0, 0, 1 -AVX_INSTR pmulhuw, mmx2, 0, 0, 1 -AVX_INSTR pmulhw, mmx, 0, 0, 1 -AVX_INSTR pmullw, mmx, 0, 0, 1 -AVX_INSTR pmulld, sse4, 0, 0, 1 -AVX_INSTR pmuludq, sse2, 0, 0, 1 -AVX_INSTR por, mmx, 0, 0, 1 -AVX_INSTR psadbw, mmx2, 0, 0, 1 -AVX_INSTR pshufb, ssse3, 0, 0, 0 -AVX_INSTR pshufd, sse2 -AVX_INSTR pshufhw, sse2 -AVX_INSTR pshuflw, sse2 -AVX_INSTR psignb, ssse3, 0, 0, 0 -AVX_INSTR psignw, ssse3, 0, 0, 0 -AVX_INSTR psignd, ssse3, 0, 0, 0 -AVX_INSTR psllw, mmx, 0, 0, 0 -AVX_INSTR pslld, mmx, 0, 0, 0 -AVX_INSTR psllq, mmx, 0, 0, 0 -AVX_INSTR pslldq, sse2, 0, 0, 0 -AVX_INSTR psraw, mmx, 0, 0, 0 -AVX_INSTR psrad, mmx, 0, 0, 0 -AVX_INSTR psrlw, mmx, 0, 0, 0 -AVX_INSTR psrld, mmx, 0, 0, 0 -AVX_INSTR psrlq, mmx, 0, 0, 0 -AVX_INSTR psrldq, sse2, 0, 0, 0 -AVX_INSTR psubb, mmx, 0, 0, 0 -AVX_INSTR psubw, mmx, 0, 0, 0 -AVX_INSTR psubd, mmx, 0, 0, 0 -AVX_INSTR psubq, sse2, 0, 0, 0 -AVX_INSTR psubsb, mmx, 0, 0, 0 -AVX_INSTR psubsw, mmx, 0, 0, 0 -AVX_INSTR psubusb, mmx, 0, 0, 0 -AVX_INSTR psubusw, mmx, 0, 0, 0 -AVX_INSTR ptest, sse4 -AVX_INSTR punpckhbw, mmx, 0, 0, 0 -AVX_INSTR punpckhwd, mmx, 0, 0, 0 -AVX_INSTR punpckhdq, mmx, 0, 0, 0 -AVX_INSTR punpckhqdq, sse2, 0, 0, 0 -AVX_INSTR punpcklbw, mmx, 0, 0, 0 -AVX_INSTR punpcklwd, mmx, 0, 0, 0 -AVX_INSTR punpckldq, mmx, 0, 0, 0 -AVX_INSTR punpcklqdq, sse2, 0, 0, 0 -AVX_INSTR pxor, mmx, 0, 0, 1 -AVX_INSTR rcpps, sse, 1, 0, 0 -AVX_INSTR rcpss, sse, 1, 0, 0 -AVX_INSTR roundpd, sse4 -AVX_INSTR roundps, sse4 -AVX_INSTR roundsd, sse4 -AVX_INSTR roundss, sse4 -AVX_INSTR rsqrtps, sse, 1, 0, 0 -AVX_INSTR rsqrtss, sse, 1, 0, 0 -AVX_INSTR shufpd, sse2, 1, 1, 0 -AVX_INSTR shufps, sse, 1, 1, 0 -AVX_INSTR sqrtpd, sse2, 1, 0, 0 -AVX_INSTR sqrtps, sse, 1, 0, 0 -AVX_INSTR sqrtsd, sse2, 1, 0, 0 -AVX_INSTR sqrtss, sse, 1, 0, 0 -AVX_INSTR stmxcsr, sse -AVX_INSTR subpd, sse2, 1, 0, 0 -AVX_INSTR subps, sse, 1, 0, 0 -AVX_INSTR subsd, sse2, 1, 0, 0 -AVX_INSTR subss, sse, 1, 0, 0 -AVX_INSTR ucomisd, sse2 -AVX_INSTR ucomiss, sse -AVX_INSTR unpckhpd, sse2, 1, 0, 0 -AVX_INSTR unpckhps, sse, 1, 0, 0 -AVX_INSTR unpcklpd, sse2, 1, 0, 0 -AVX_INSTR unpcklps, sse, 1, 0, 0 -AVX_INSTR xorpd, sse2, 1, 0, 1 -AVX_INSTR xorps, sse, 1, 0, 1 - -; 3DNow instructions, for sharing code between AVX, SSE and 3DN -AVX_INSTR pfadd, 3dnow, 1, 0, 1 -AVX_INSTR pfsub, 3dnow, 1, 0, 0 -AVX_INSTR pfmul, 3dnow, 1, 0, 1 - -; base-4 constants for shuffles -%assign i 0 -%rep 256 - %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3) - %if j < 10 - CAT_XDEFINE q000, j, i - %elif j < 100 - CAT_XDEFINE q00, j, i - %elif j < 1000 - CAT_XDEFINE q0, j, i - %else - CAT_XDEFINE q, j, i - %endif - %assign i i+1 -%endrep -%undef i -%undef j - -%macro FMA_INSTR 3 - %macro %1 4-7 %1, %2, %3 - %if cpuflag(xop) - v%5 %1, %2, %3, %4 - %elifnidn %1, %4 - %6 %1, %2, %3 - %7 %1, %4 - %else - %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported - %endif - %endmacro -%endmacro - -FMA_INSTR pmacsww, pmullw, paddw -FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation -FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation -FMA_INSTR pmadcswd, pmaddwd, paddd - -; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax. -; FMA3 is only possible if dst is the same as one of the src registers. -; Either src2 or src3 can be a memory operand. -%macro FMA4_INSTR 2-* - %push fma4_instr - %xdefine %$prefix %1 - %rep %0 - 1 - %macro %$prefix%2 4-6 %$prefix, %2 - %if notcpuflag(fma3) && notcpuflag(fma4) - %error use of ``%5%6'' fma instruction in cpuname function: current_function - %elif cpuflag(fma4) - v%5%6 %1, %2, %3, %4 - %elifidn %1, %2 - ; If %3 or %4 is a memory operand it needs to be encoded as the last operand. - %ifid %3 - v%{5}213%6 %2, %3, %4 - %else - v%{5}132%6 %2, %4, %3 - %endif - %elifidn %1, %3 - v%{5}213%6 %3, %2, %4 - %elifidn %1, %4 - v%{5}231%6 %4, %2, %3 - %else - %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported - %endif - %endmacro - %rotate 1 - %endrep - %pop -%endmacro - -FMA4_INSTR fmadd, pd, ps, sd, ss -FMA4_INSTR fmaddsub, pd, ps -FMA4_INSTR fmsub, pd, ps, sd, ss -FMA4_INSTR fmsubadd, pd, ps -FMA4_INSTR fnmadd, pd, ps, sd, ss -FMA4_INSTR fnmsub, pd, ps, sd, ss - -; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) -%ifdef __YASM_VER__ - %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0 - %macro vpbroadcastq 2 - %if sizeof%1 == 16 - movddup %1, %2 - %else - vbroadcastsd %1, %2 - %endif - %endmacro - %endif -%endif diff --git a/thirdparty/libvpx/vp8/common/alloccommon.c b/thirdparty/libvpx/vp8/common/alloccommon.c deleted file mode 100644 index 8dfd4ce203..0000000000 --- a/thirdparty/libvpx/vp8/common/alloccommon.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "alloccommon.h" -#include "blockd.h" -#include "vpx_mem/vpx_mem.h" -#include "onyxc_int.h" -#include "findnearmv.h" -#include "entropymode.h" -#include "systemdependent.h" - -void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) -{ - int i; - for (i = 0; i < NUM_YV12_BUFFERS; i++) - vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); - - vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); -#if CONFIG_POSTPROC - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); - if (oci->post_proc_buffer_int_used) - vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); - - vpx_free(oci->pp_limits_buffer); - oci->pp_limits_buffer = NULL; -#endif - - vpx_free(oci->above_context); - vpx_free(oci->mip); -#if CONFIG_ERROR_CONCEALMENT - vpx_free(oci->prev_mip); - oci->prev_mip = NULL; -#endif - - oci->above_context = NULL; - oci->mip = NULL; -} - -int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) -{ - int i; - - vp8_de_alloc_frame_buffers(oci); - - /* our internal buffers are always multiples of 16 */ - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if ((height & 0xf) != 0) - height += 16 - (height & 0xf); - - - for (i = 0; i < NUM_YV12_BUFFERS; i++) - { - oci->fb_idx_ref_cnt[i] = 0; - oci->yv12_fb[i].flags = 0; - if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) - goto allocation_fail; - } - - oci->new_fb_idx = 0; - oci->lst_fb_idx = 1; - oci->gld_fb_idx = 2; - oci->alt_fb_idx = 3; - - oci->fb_idx_ref_cnt[0] = 1; - oci->fb_idx_ref_cnt[1] = 1; - oci->fb_idx_ref_cnt[2] = 1; - oci->fb_idx_ref_cnt[3] = 1; - - if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) - goto allocation_fail; - - oci->mb_rows = height >> 4; - oci->mb_cols = width >> 4; - oci->MBs = oci->mb_rows * oci->mb_cols; - oci->mode_info_stride = oci->mb_cols + 1; - oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); - - if (!oci->mip) - goto allocation_fail; - - oci->mi = oci->mip + oci->mode_info_stride + 1; - - /* Allocation of previous mode info will be done in vp8_decode_frame() - * as it is a decoder only data */ - - oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); - - if (!oci->above_context) - goto allocation_fail; - -#if CONFIG_POSTPROC - if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) - goto allocation_fail; - - oci->post_proc_buffer_int_used = 0; - memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); - memset(oci->post_proc_buffer.buffer_alloc, 128, - oci->post_proc_buffer.frame_size); - - /* Allocate buffer to store post-processing filter coefficients. - * - * Note: Round up mb_cols to support SIMD reads - */ - oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); - if (!oci->pp_limits_buffer) - goto allocation_fail; -#endif - - return 0; - -allocation_fail: - vp8_de_alloc_frame_buffers(oci); - return 1; -} - -void vp8_setup_version(VP8_COMMON *cm) -{ - switch (cm->version) - { - case 0: - cm->no_lpf = 0; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 0; - cm->full_pixel = 0; - break; - case 1: - cm->no_lpf = 0; - cm->filter_type = SIMPLE_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 0; - break; - case 2: - cm->no_lpf = 1; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 0; - break; - case 3: - cm->no_lpf = 1; - cm->filter_type = SIMPLE_LOOPFILTER; - cm->use_bilinear_mc_filter = 1; - cm->full_pixel = 1; - break; - default: - /*4,5,6,7 are reserved for future use*/ - cm->no_lpf = 0; - cm->filter_type = NORMAL_LOOPFILTER; - cm->use_bilinear_mc_filter = 0; - cm->full_pixel = 0; - break; - } -} -void vp8_create_common(VP8_COMMON *oci) -{ - vp8_machine_specific_config(oci); - - vp8_init_mbmode_probs(oci); - vp8_default_bmode_probs(oci->fc.bmode_prob); - - oci->mb_no_coeff_skip = 1; - oci->no_lpf = 0; - oci->filter_type = NORMAL_LOOPFILTER; - oci->use_bilinear_mc_filter = 0; - oci->full_pixel = 0; - oci->multi_token_partition = ONE_PARTITION; - oci->clamp_type = RECON_CLAMP_REQUIRED; - - /* Initialize reference frame sign bias structure to defaults */ - memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); - - /* Default disable buffer to buffer copying */ - oci->copy_buffer_to_gf = 0; - oci->copy_buffer_to_arf = 0; -} - -void vp8_remove_common(VP8_COMMON *oci) -{ - vp8_de_alloc_frame_buffers(oci); -} diff --git a/thirdparty/libvpx/vp8/common/alloccommon.h b/thirdparty/libvpx/vp8/common/alloccommon.h deleted file mode 100644 index 93e99d76b1..0000000000 --- a/thirdparty/libvpx/vp8/common/alloccommon.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ALLOCCOMMON_H_ -#define VP8_COMMON_ALLOCCOMMON_H_ - -#include "onyxc_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_create_common(VP8_COMMON *oci); -void vp8_remove_common(VP8_COMMON *oci); -void vp8_de_alloc_frame_buffers(VP8_COMMON *oci); -int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height); -void vp8_setup_version(VP8_COMMON *oci); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c b/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c deleted file mode 100644 index 5840c2bbaa..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vp8/common/loopfilter.h" -#include "vp8/common/onyxc_int.h" - -#define prototype_loopfilter(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ - const unsigned char *limit, const unsigned char *thresh, int count) - -#if HAVE_MEDIA -extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6); -extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6); -extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6); -extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6); -#endif - -#if HAVE_NEON -typedef void loopfilter_y_neon(unsigned char *src, int pitch, - unsigned char blimit, unsigned char limit, unsigned char thresh); -typedef void loopfilter_uv_neon(unsigned char *u, int pitch, - unsigned char blimit, unsigned char limit, unsigned char thresh, - unsigned char *v); - -extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; -extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; - -extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; -extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; -extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; -extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; -#endif - -#if HAVE_MEDIA -/* ARMV6/MEDIA loopfilter functions*/ -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit); -} - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit); -} -#endif - -#if HAVE_NEON -/* NEON loopfilter functions */ -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char mblim = *lfi->mblim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); -} - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char mblim = *lfi->mblim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); -} - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char blim = *lfi->blim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr); - vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr); - vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride); -} - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - unsigned char blim = *lfi->blim; - unsigned char lim = *lfi->lim; - unsigned char hev_thr = *lfi->hev_thr; - - vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr); - vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr); - vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr); - - if (u_ptr) - vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4); -} -#endif diff --git a/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c deleted file mode 100644 index bb6ea76ba4..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -static const uint8_t bifilter4_coeff[8][2] = { - {128, 0}, - {112, 16}, - { 96, 32}, - { 80, 48}, - { 64, 64}, - { 48, 80}, - { 32, 96}, - { 16, 112} -}; - -void vp8_bilinear_predict8x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8; - uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16; - uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; - - if (xoffset == 0) { // skip_1stpass_filter - d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d26u8 = vld1_u8(src_ptr); - } else { - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q5u8 = vld1q_u8(src_ptr); - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - q10u16 = vmlal_u8(q10u16, d11u8, d1u8); - - d22u8 = vqrshrn_n_u16(q6u16, 7); - d23u8 = vqrshrn_n_u16(q7u16, 7); - d24u8 = vqrshrn_n_u16(q8u16, 7); - d25u8 = vqrshrn_n_u16(q9u16, 7); - d26u8 = vqrshrn_n_u16(q10u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d25u8); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d22u8, d0u8); - q2u16 = vmull_u8(d23u8, d0u8); - q3u16 = vmull_u8(d24u8, d0u8); - q4u16 = vmull_u8(d25u8, d0u8); - - q1u16 = vmlal_u8(q1u16, d23u8, d1u8); - q2u16 = vmlal_u8(q2u16, d24u8, d1u8); - q3u16 = vmlal_u8(q3u16, d25u8, d1u8); - q4u16 = vmlal_u8(q4u16, d26u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - - vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d5u8); - } - return; -} - -void vp8_bilinear_predict8x8_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8; - uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16; - uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; - - if (xoffset == 0) { // skip_1stpass_filter - d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; - d30u8 = vld1_u8(src_ptr); - } else { - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - - d22u8 = vqrshrn_n_u16(q6u16, 7); - d23u8 = vqrshrn_n_u16(q7u16, 7); - d24u8 = vqrshrn_n_u16(q8u16, 7); - d25u8 = vqrshrn_n_u16(q9u16, 7); - - // first_pass filtering on the rest 5-line data - q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q5u8 = vld1q_u8(src_ptr); - - q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - - d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); - d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); - d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - - q6u16 = vmlal_u8(q6u16, d3u8, d1u8); - q7u16 = vmlal_u8(q7u16, d5u8, d1u8); - q8u16 = vmlal_u8(q8u16, d7u8, d1u8); - q9u16 = vmlal_u8(q9u16, d9u8, d1u8); - q10u16 = vmlal_u8(q10u16, d11u8, d1u8); - - d26u8 = vqrshrn_n_u16(q6u16, 7); - d27u8 = vqrshrn_n_u16(q7u16, 7); - d28u8 = vqrshrn_n_u16(q8u16, 7); - d29u8 = vqrshrn_n_u16(q9u16, 7); - d30u8 = vqrshrn_n_u16(q10u16, 7); - } - - // secondpass_filter - if (yoffset == 0) { // skip_2ndpass_filter - vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d29u8); - } else { - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q1u16 = vmull_u8(d22u8, d0u8); - q2u16 = vmull_u8(d23u8, d0u8); - q3u16 = vmull_u8(d24u8, d0u8); - q4u16 = vmull_u8(d25u8, d0u8); - q5u16 = vmull_u8(d26u8, d0u8); - q6u16 = vmull_u8(d27u8, d0u8); - q7u16 = vmull_u8(d28u8, d0u8); - q8u16 = vmull_u8(d29u8, d0u8); - - q1u16 = vmlal_u8(q1u16, d23u8, d1u8); - q2u16 = vmlal_u8(q2u16, d24u8, d1u8); - q3u16 = vmlal_u8(q3u16, d25u8, d1u8); - q4u16 = vmlal_u8(q4u16, d26u8, d1u8); - q5u16 = vmlal_u8(q5u16, d27u8, d1u8); - q6u16 = vmlal_u8(q6u16, d28u8, d1u8); - q7u16 = vmlal_u8(q7u16, d29u8, d1u8); - q8u16 = vmlal_u8(q8u16, d30u8, d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch; - vst1_u8((uint8_t *)dst_ptr, d9u8); - } - return; -} - -void vp8_bilinear_predict16x16_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - int i; - unsigned char tmp[272]; - unsigned char *tmpp; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; - uint8x8_t d19u8, d20u8, d21u8; - uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; - uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8; - uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; - uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; - - if (xoffset == 0) { // secondpass_bfilter16x16_only - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - q11u8 = vld1q_u8(src_ptr); - src_ptr += src_pixels_per_line; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; - } - return; - } - - if (yoffset == 0) { // firstpass_bfilter16x16_only - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - for (i = 4; i > 0 ; i--) { - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 =vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch; - } - return; - } - - d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - // First Pass: output_height lines x output_width columns (17x16) - tmpp = tmp; - for (i = 3; i > 0; i--) { - q7u16 = vmull_u8(d2u8, d0u8); - q8u16 = vmull_u8(d3u8, d0u8); - q9u16 = vmull_u8(d5u8, d0u8); - q10u16 = vmull_u8(d6u8, d0u8); - q11u16 = vmull_u8(d8u8, d0u8); - q12u16 = vmull_u8(d9u8, d0u8); - q13u16 = vmull_u8(d11u8, d0u8); - q14u16 = vmull_u8(d12u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - d11u8 = vext_u8(d11u8, d12u8, 1); - - q7u16 = vmlal_u8(q7u16, d2u8, d1u8); - q9u16 = vmlal_u8(q9u16, d5u8, d1u8); - q11u16 = vmlal_u8(q11u16, d8u8, d1u8); - q13u16 = vmlal_u8(q13u16, d11u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - d12u8 = vext_u8(d12u8, d13u8, 1); - - q8u16 = vmlal_u8(q8u16, d3u8, d1u8); - q10u16 = vmlal_u8(q10u16, d6u8, d1u8); - q12u16 = vmlal_u8(q12u16, d9u8, d1u8); - q14u16 = vmlal_u8(q14u16, d12u8, d1u8); - - d14u8 = vqrshrn_n_u16(q7u16, 7); - d15u8 = vqrshrn_n_u16(q8u16, 7); - d16u8 = vqrshrn_n_u16(q9u16, 7); - d17u8 = vqrshrn_n_u16(q10u16, 7); - d18u8 = vqrshrn_n_u16(q11u16, 7); - d19u8 = vqrshrn_n_u16(q12u16, 7); - d20u8 = vqrshrn_n_u16(q13u16, 7); - d21u8 = vqrshrn_n_u16(q14u16, 7); - - d2u8 = vld1_u8(src_ptr); - d3u8 = vld1_u8(src_ptr + 8); - d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d5u8 = vld1_u8(src_ptr); - d6u8 = vld1_u8(src_ptr + 8); - d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d8u8 = vld1_u8(src_ptr); - d9u8 = vld1_u8(src_ptr + 8); - d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - d11u8 = vld1_u8(src_ptr); - d12u8 = vld1_u8(src_ptr + 8); - d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - q10u8 = vcombine_u8(d20u8, d21u8); - - vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16; - } - - // First-pass filtering for rest 5 lines - d14u8 = vld1_u8(src_ptr); - d15u8 = vld1_u8(src_ptr + 8); - d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; - - q9u16 = vmull_u8(d2u8, d0u8); - q10u16 = vmull_u8(d3u8, d0u8); - q11u16 = vmull_u8(d5u8, d0u8); - q12u16 = vmull_u8(d6u8, d0u8); - q13u16 = vmull_u8(d8u8, d0u8); - q14u16 = vmull_u8(d9u8, d0u8); - - d2u8 = vext_u8(d2u8, d3u8, 1); - d5u8 = vext_u8(d5u8, d6u8, 1); - d8u8 = vext_u8(d8u8, d9u8, 1); - - q9u16 = vmlal_u8(q9u16, d2u8, d1u8); - q11u16 = vmlal_u8(q11u16, d5u8, d1u8); - q13u16 = vmlal_u8(q13u16, d8u8, d1u8); - - d3u8 = vext_u8(d3u8, d4u8, 1); - d6u8 = vext_u8(d6u8, d7u8, 1); - d9u8 = vext_u8(d9u8, d10u8, 1); - - q10u16 = vmlal_u8(q10u16, d3u8, d1u8); - q12u16 = vmlal_u8(q12u16, d6u8, d1u8); - q14u16 = vmlal_u8(q14u16, d9u8, d1u8); - - q1u16 = vmull_u8(d11u8, d0u8); - q2u16 = vmull_u8(d12u8, d0u8); - q3u16 = vmull_u8(d14u8, d0u8); - q4u16 = vmull_u8(d15u8, d0u8); - - d11u8 = vext_u8(d11u8, d12u8, 1); - d14u8 = vext_u8(d14u8, d15u8, 1); - - q1u16 = vmlal_u8(q1u16, d11u8, d1u8); - q3u16 = vmlal_u8(q3u16, d14u8, d1u8); - - d12u8 = vext_u8(d12u8, d13u8, 1); - d15u8 = vext_u8(d15u8, d16u8, 1); - - q2u16 = vmlal_u8(q2u16, d12u8, d1u8); - q4u16 = vmlal_u8(q4u16, d15u8, d1u8); - - d10u8 = vqrshrn_n_u16(q9u16, 7); - d11u8 = vqrshrn_n_u16(q10u16, 7); - d12u8 = vqrshrn_n_u16(q11u16, 7); - d13u8 = vqrshrn_n_u16(q12u16, 7); - d14u8 = vqrshrn_n_u16(q13u16, 7); - d15u8 = vqrshrn_n_u16(q14u16, 7); - d16u8 = vqrshrn_n_u16(q1u16, 7); - d17u8 = vqrshrn_n_u16(q2u16, 7); - d18u8 = vqrshrn_n_u16(q3u16, 7); - d19u8 = vqrshrn_n_u16(q4u16, 7); - - q5u8 = vcombine_u8(d10u8, d11u8); - q6u8 = vcombine_u8(d12u8, d13u8); - q7u8 = vcombine_u8(d14u8, d15u8); - q8u8 = vcombine_u8(d16u8, d17u8); - q9u8 = vcombine_u8(d18u8, d19u8); - - vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; - vst1q_u8((uint8_t *)tmpp, q9u8); - - // secondpass_filter - d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); - d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); - - tmpp = tmp; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - for (i = 4; i > 0; i--) { - q12u8 = vld1q_u8(tmpp); tmpp += 16; - q13u8 = vld1q_u8(tmpp); tmpp += 16; - q14u8 = vld1q_u8(tmpp); tmpp += 16; - q15u8 = vld1q_u8(tmpp); tmpp += 16; - - q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); - q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); - q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); - q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); - q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); - q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); - q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); - q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); - - q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); - q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); - q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); - q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); - q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); - q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); - q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); - q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); - - d2u8 = vqrshrn_n_u16(q1u16, 7); - d3u8 = vqrshrn_n_u16(q2u16, 7); - d4u8 = vqrshrn_n_u16(q3u16, 7); - d5u8 = vqrshrn_n_u16(q4u16, 7); - d6u8 = vqrshrn_n_u16(q5u16, 7); - d7u8 = vqrshrn_n_u16(q6u16, 7); - d8u8 = vqrshrn_n_u16(q7u16, 7); - d9u8 = vqrshrn_n_u16(q8u16, 7); - - q1u8 = vcombine_u8(d2u8, d3u8); - q2u8 = vcombine_u8(d4u8, d5u8); - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - - q11u8 = q15u8; - - vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; - vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c deleted file mode 100644 index deced115c1..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -void vp8_copy_mem8x4_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - uint8x8_t vtmp; - int r; - - for (r = 0; r < 4; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - -void vp8_copy_mem8x8_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - uint8x8_t vtmp; - int r; - - for (r = 0; r < 8; r++) { - vtmp = vld1_u8(src); - vst1_u8(dst, vtmp); - src += src_stride; - dst += dst_stride; - } -} - -void vp8_copy_mem16x16_neon( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) { - int r; - uint8x16_t qtmp; - - for (r = 0; r < 16; r++) { - qtmp = vld1q_u8(src); - vst1q_u8(dst, qtmp); - src += src_stride; - dst += dst_stride; - } -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c deleted file mode 100644 index ad5f41d7de..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -void vp8_dc_only_idct_add_neon( - int16_t input_dc, - unsigned char *pred_ptr, - int pred_stride, - unsigned char *dst_ptr, - int dst_stride) { - int i; - uint16_t a1 = ((input_dc + 4) >> 3); - uint32x2_t d2u32 = vdup_n_u32(0); - uint8x8_t d2u8; - uint16x8_t q1u16; - uint16x8_t qAdd; - - qAdd = vdupq_n_u16(a1); - - for (i = 0; i < 2; i++) { - d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0); - pred_ptr += pred_stride; - d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1); - pred_ptr += pred_stride; - - q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); - dst_ptr += dst_stride; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); - dst_ptr += dst_stride; - } -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c deleted file mode 100644 index 58e11922c7..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 35468; - -void vp8_dequant_idct_add_neon( - int16_t *input, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int32x2_t d14, d15; - int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; - int16x8_t q1, q2, q3, q4, q5, q6; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x2x2_t d2tmp0, d2tmp1; - int16x4x2_t d2tmp2, d2tmp3; - - d14 = d15 = vdup_n_s32(0); - - // load input - q3 = vld1q_s16(input); - vst1q_s16(input, qEmpty); - input += 8; - q4 = vld1q_s16(input); - vst1q_s16(input, qEmpty); - - // load dq - q5 = vld1q_s16(dq); - dq += 8; - q6 = vld1q_s16(dq); - - // load src from dst - dst0 = dst; - d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0); - dst0 += stride; - d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1); - dst0 += stride; - d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0); - dst0 += stride; - d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1); - - q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3), - vreinterpretq_u16_s16(q5))); - q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4), - vreinterpretq_u16_s16(q6))); - - d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); - d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); - - q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2)); - - q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); - q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); - - q3 = vshrq_n_s16(q3, 1); - q4 = vshrq_n_s16(q4, 1); - - q3 = vqaddq_s16(q3, q2); - q4 = vqaddq_s16(q4, q2); - - d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); - d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), - vreinterpret_s16_s32(d2tmp1.val[0])); - d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), - vreinterpret_s16_s32(d2tmp1.val[1])); - - // loop 2 - q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]); - - q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); - q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); - - d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]); - d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]); - - q3 = vshrq_n_s16(q3, 1); - q4 = vshrq_n_s16(q4, 1); - - q3 = vqaddq_s16(q3, q2); - q4 = vqaddq_s16(q4, q2); - - d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); - d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2 = vrshr_n_s16(d2, 3); - d3 = vrshr_n_s16(d3, 3); - d4 = vrshr_n_s16(d4, 3); - d5 = vrshr_n_s16(d5, 3); - - d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), - vreinterpret_s16_s32(d2tmp1.val[0])); - d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), - vreinterpret_s16_s32(d2tmp1.val[1])); - - q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]); - q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]); - - q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1), - vreinterpret_u8_s32(d14))); - q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2), - vreinterpret_u8_s32(d15))); - - d14 = vreinterpret_s32_u8(vqmovun_s16(q1)); - d15 = vreinterpret_s32_u8(vqmovun_s16(q2)); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d14, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d14, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d15, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d15, 1); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c deleted file mode 100644 index 54e709dd3c..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "vp8/common/blockd.h" - -void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { - int16x8x2_t qQ, qDQC, qDQ; - - qQ = vld2q_s16(d->qcoeff); - qDQC = vld2q_s16(DQC); - - qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); - qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); - - vst2q_s16(d->dqcoeff, qDQ); -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c deleted file mode 100644 index fb327a7260..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" - -/* place these declarations here because we don't want to maintain them - * outside of this scope - */ -void idct_dequant_full_2x_neon(short *q, short *dq, - unsigned char *dst, int stride); -void idct_dequant_0_2x_neon(short *q, short dq, - unsigned char *dst, int stride); - - -void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, - unsigned char *dst, - int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (((short *)(eobs))[0]) - { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dst, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dst, stride); - } - - if (((short *)(eobs))[1]) - { - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon (q+32, dq, dst+8, stride); - else - idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride); - } - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, - unsigned char *dstu, - unsigned char *dstv, - int stride, char *eobs) -{ - if (((short *)(eobs))[0]) - { - if (((short *)eobs)[0] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstu, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstu, stride); - } - - q += 32; - dstu += 4*stride; - - if (((short *)(eobs))[1]) - { - if (((short *)eobs)[1] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstu, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstu, stride); - } - - q += 32; - - if (((short *)(eobs))[2]) - { - if (((short *)eobs)[2] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstv, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstv, stride); - } - - q += 32; - dstv += 4*stride; - - if (((short *)(eobs))[3]) - { - if (((short *)eobs)[3] & 0xfefe) - idct_dequant_full_2x_neon (q, dq, dstv, stride); - else - idct_dequant_0_2x_neon (q, dq[0], dstv, stride); - } -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c deleted file mode 100644 index e6f862fa89..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -void idct_dequant_0_2x_neon( - int16_t *q, - int16_t dq, - unsigned char *dst, - int stride) { - unsigned char *dst0; - int i, a0, a1; - int16x8x2_t q2Add; - int32x2_t d2s32 = vdup_n_s32(0), - d4s32 = vdup_n_s32(0); - uint8x8_t d2u8, d4u8; - uint16x8_t q1u16, q2u16; - - a0 = ((q[0] * dq) + 4) >> 3; - a1 = ((q[16] * dq) + 4) >> 3; - q[0] = q[16] = 0; - q2Add.val[0] = vdupq_n_s16((int16_t)a0); - q2Add.val[1] = vdupq_n_s16((int16_t)a1); - - for (i = 0; i < 2; i++, dst += 4) { - dst0 = dst; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); - dst0 += stride; - d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); - dst0 += stride; - d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d2s32)); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), - vreinterpret_u8_s32(d4s32)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - - d2s32 = vreinterpret_s32_u8(d2u8); - d4s32 = vreinterpret_s32_u8(d4u8); - - dst0 = dst; - vst1_lane_s32((int32_t *)dst0, d2s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d2s32, 1); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst0, d4s32, 1); - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c deleted file mode 100644 index a60ed46b76..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 17734; -// because the lowest bit in 0x8a8c is 0, we can pre-shift this - -void idct_dequant_full_2x_neon( - int16_t *q, - int16_t *dq, - unsigned char *dst, - int stride) { - unsigned char *dst0, *dst1; - int32x2_t d28, d29, d30, d31; - int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; - int16x8_t qEmpty = vdupq_n_s16(0); - int32x4x2_t q2tmp0, q2tmp1; - int16x8x2_t q2tmp2, q2tmp3; - int16x4_t dLow0, dLow1, dHigh0, dHigh1; - - d28 = d29 = d30 = d31 = vdup_n_s32(0); - - // load dq - q0 = vld1q_s16(dq); - dq += 8; - q1 = vld1q_s16(dq); - - // load q - q2 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q3 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q4 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - q += 8; - q5 = vld1q_s16(q); - vst1q_s16(q, qEmpty); - - // load src from dst - dst0 = dst; - dst1 = dst + 4; - d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); - dst0 += stride; - d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); - dst1 += stride; - d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); - dst0 += stride; - d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); - dst1 += stride; - - d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); - dst0 += stride; - d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); - dst1 += stride; - d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); - d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); - - q2 = vmulq_s16(q2, q0); - q3 = vmulq_s16(q3, q1); - q4 = vmulq_s16(q4, q0); - q5 = vmulq_s16(q5, q1); - - // vswp - dLow0 = vget_low_s16(q2); - dHigh0 = vget_high_s16(q2); - dLow1 = vget_low_s16(q4); - dHigh1 = vget_high_s16(q4); - q2 = vcombine_s16(dLow0, dLow1); - q4 = vcombine_s16(dHigh0, dHigh1); - - dLow0 = vget_low_s16(q3); - dHigh0 = vget_high_s16(q3); - dLow1 = vget_low_s16(q5); - dHigh1 = vget_high_s16(q5); - q3 = vcombine_s16(dLow0, dLow1); - q5 = vcombine_s16(dHigh0, dHigh1); - - q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); - q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); - q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); - q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); - - q10 = vqaddq_s16(q2, q3); - q11 = vqsubq_s16(q2, q3); - - q8 = vshrq_n_s16(q8, 1); - q9 = vshrq_n_s16(q9, 1); - - q4 = vqaddq_s16(q4, q8); - q5 = vqaddq_s16(q5, q9); - - q2 = vqsubq_s16(q6, q5); - q3 = vqaddq_s16(q7, q4); - - q4 = vqaddq_s16(q10, q3); - q5 = vqaddq_s16(q11, q2); - q6 = vqsubq_s16(q11, q2); - q7 = vqsubq_s16(q10, q3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - // loop 2 - q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); - q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); - q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); - q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); - - q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); - q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); - - q10 = vshrq_n_s16(q10, 1); - q11 = vshrq_n_s16(q11, 1); - - q10 = vqaddq_s16(q2tmp2.val[1], q10); - q11 = vqaddq_s16(q2tmp3.val[1], q11); - - q8 = vqsubq_s16(q8, q11); - q9 = vqaddq_s16(q9, q10); - - q4 = vqaddq_s16(q2, q9); - q5 = vqaddq_s16(q3, q8); - q6 = vqsubq_s16(q3, q8); - q7 = vqsubq_s16(q2, q9); - - q4 = vrshrq_n_s16(q4, 3); - q5 = vrshrq_n_s16(q5, 3); - q6 = vrshrq_n_s16(q6, 3); - q7 = vrshrq_n_s16(q7, 3); - - q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); - q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); - q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), - vreinterpretq_s16_s32(q2tmp1.val[0])); - q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), - vreinterpretq_s16_s32(q2tmp1.val[1])); - - q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), - vreinterpret_u8_s32(d28))); - q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), - vreinterpret_u8_s32(d29))); - q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), - vreinterpret_u8_s32(d30))); - q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), - vreinterpret_u8_s32(d31))); - - d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); - d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); - d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); - d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); - - dst0 = dst; - dst1 = dst + 4; - vst1_lane_s32((int32_t *)dst0, d28, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d28, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d29, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d29, 1); - dst1 += stride; - - vst1_lane_s32((int32_t *)dst0, d30, 0); - dst0 += stride; - vst1_lane_s32((int32_t *)dst1, d30, 1); - dst1 += stride; - vst1_lane_s32((int32_t *)dst0, d31, 0); - vst1_lane_s32((int32_t *)dst1, d31, 1); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c deleted file mode 100644 index 6ea9dd712a..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -void vp8_short_inv_walsh4x4_neon( - int16_t *input, - int16_t *mb_dqcoeff) { - int16x8_t q0s16, q1s16, q2s16, q3s16; - int16x4_t d4s16, d5s16, d6s16, d7s16; - int16x4x2_t v2tmp0, v2tmp1; - int32x2x2_t v2tmp2, v2tmp3; - int16x8_t qAdd3; - - q0s16 = vld1q_s16(input); - q1s16 = vld1q_s16(input + 8); - - // 1st for loop - d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); - d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); - d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); - d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); - - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - q0s16 = vaddq_s16(q2s16, q3s16); - q1s16 = vsubq_s16(q2s16, q3s16); - - v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), - vreinterpret_s32_s16(vget_low_s16(q1s16))); - v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)), - vreinterpret_s32_s16(vget_high_s16(q1s16))); - v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), - vreinterpret_s16_s32(v2tmp3.val[0])); - v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), - vreinterpret_s16_s32(v2tmp3.val[1])); - - // 2nd for loop - d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); - d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); - d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); - d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); - q2s16 = vcombine_s16(d4s16, d5s16); - q3s16 = vcombine_s16(d6s16, d7s16); - - qAdd3 = vdupq_n_s16(3); - - q0s16 = vaddq_s16(q2s16, q3s16); - q1s16 = vsubq_s16(q2s16, q3s16); - - q0s16 = vaddq_s16(q0s16, qAdd3); - q1s16 = vaddq_s16(q1s16, qAdd3); - - q0s16 = vshrq_n_s16(q0s16, 3); - q1s16 = vshrq_n_s16(q1s16, 3); - - // store - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2); - mb_dqcoeff += 16; - - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3); - mb_dqcoeff += 16; - vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3); - mb_dqcoeff += 16; - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c deleted file mode 100644 index b25686ffb8..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include "./vpx_config.h" - -static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( - unsigned char *s, - int p, - const unsigned char *blimit) { - uint8_t *sp; - uint8x16_t qblimit, q0u8; - uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8; - int16x8_t q2s16, q3s16, q13s16; - int8x8_t d8s8, d9s8; - int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8; - - qblimit = vdupq_n_u8(*blimit); - - sp = s - (p << 1); - q5u8 = vld1q_u8(sp); - sp += p; - q6u8 = vld1q_u8(sp); - sp += p; - q7u8 = vld1q_u8(sp); - sp += p; - q8u8 = vld1q_u8(sp); - - q15u8 = vabdq_u8(q6u8, q7u8); - q14u8 = vabdq_u8(q5u8, q8u8); - - q15u8 = vqaddq_u8(q15u8, q15u8); - q14u8 = vshrq_n_u8(q14u8, 1); - q0u8 = vdupq_n_u8(0x80); - q13s16 = vdupq_n_s16(3); - q15u8 = vqaddq_u8(q15u8, q14u8); - - q5u8 = veorq_u8(q5u8, q0u8); - q6u8 = veorq_u8(q6u8, q0u8); - q7u8 = veorq_u8(q7u8, q0u8); - q8u8 = veorq_u8(q8u8, q0u8); - - q15u8 = vcgeq_u8(qblimit, q15u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), - vget_low_s8(vreinterpretq_s8_u8(q6u8))); - q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)), - vget_high_s8(vreinterpretq_s8_u8(q6u8))); - - q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), - vreinterpretq_s8_u8(q8u8)); - - q2s16 = vmulq_s16(q2s16, q13s16); - q3s16 = vmulq_s16(q3s16, q13s16); - - q10u8 = vdupq_n_u8(3); - q9u8 = vdupq_n_u8(4); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); - q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8)); - - d8s8 = vqmovn_s16(q2s16); - d9s8 = vqmovn_s16(q3s16); - q4s8 = vcombine_s8(d8s8, d9s8); - - q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8)); - - q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8)); - q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q3s8 = vshrq_n_s8(q3s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8); - - q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - - vst1q_u8(s, q7u8); - s -= p; - vst1q_u8(s, q6u8); - return; -} - -void vp8_loop_filter_bhs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - y_ptr += y_stride * 4; - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - return; -} - -void vp8_loop_filter_mbhs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c deleted file mode 100644 index 921bcad698..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include "./vpx_config.h" -#include "vpx_ports/arm.h" - -#ifdef VPX_INCOMPATIBLE_GCC -static INLINE void write_2x4(unsigned char *dst, int pitch, - const uint8x8x2_t result) { - /* - * uint8x8x2_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - --- - * after vtrn_u8 - 00 10 02 12 | 04 14 06 16 - 01 11 03 13 | 05 15 07 17 - */ - const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], - result.val[1]); - const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]); - const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]); - vst1_lane_u16((uint16_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 2); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 2); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_0_4, 3); - dst += pitch; - vst1_lane_u16((uint16_t *)dst, x_1_5, 3); -} - -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - write_2x4(dst, pitch, result); - dst += pitch * 8; - write_2x4(dst, pitch, result2); -} -#else -static INLINE void write_2x8(unsigned char *dst, int pitch, - const uint8x8x2_t result, - const uint8x8x2_t result2) { - vst2_lane_u8(dst, result, 0); - dst += pitch; - vst2_lane_u8(dst, result, 1); - dst += pitch; - vst2_lane_u8(dst, result, 2); - dst += pitch; - vst2_lane_u8(dst, result, 3); - dst += pitch; - vst2_lane_u8(dst, result, 4); - dst += pitch; - vst2_lane_u8(dst, result, 5); - dst += pitch; - vst2_lane_u8(dst, result, 6); - dst += pitch; - vst2_lane_u8(dst, result, 7); - dst += pitch; - - vst2_lane_u8(dst, result2, 0); - dst += pitch; - vst2_lane_u8(dst, result2, 1); - dst += pitch; - vst2_lane_u8(dst, result2, 2); - dst += pitch; - vst2_lane_u8(dst, result2, 3); - dst += pitch; - vst2_lane_u8(dst, result2, 4); - dst += pitch; - vst2_lane_u8(dst, result2, 5); - dst += pitch; - vst2_lane_u8(dst, result2, 6); - dst += pitch; - vst2_lane_u8(dst, result2, 7); -} -#endif // VPX_INCOMPATIBLE_GCC - - -#ifdef VPX_INCOMPATIBLE_GCC -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch) { - uint8x8x4_t x; - const uint8x8_t a = vld1_u8(src); - const uint8x8_t b = vld1_u8(src + pitch * 1); - const uint8x8_t c = vld1_u8(src + pitch * 2); - const uint8x8_t d = vld1_u8(src + pitch * 3); - const uint8x8_t e = vld1_u8(src + pitch * 4); - const uint8x8_t f = vld1_u8(src + pitch * 5); - const uint8x8_t g = vld1_u8(src + pitch * 6); - const uint8x8_t h = vld1_u8(src + pitch * 7); - const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a), - vreinterpret_u32_u8(e)); - const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b), - vreinterpret_u32_u8(f)); - const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c), - vreinterpret_u32_u8(g)); - const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d), - vreinterpret_u32_u8(h)); - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]), - vreinterpret_u16_u32(r26_u32.val[0])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]), - vreinterpret_u16_u32(r37_u32.val[0])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - /* - * after vtrn_u32 - 00 01 02 03 | 40 41 42 43 - 10 11 12 13 | 50 51 52 53 - 20 21 22 23 | 60 61 62 63 - 30 31 32 33 | 70 71 72 73 - --- - * after vtrn_u16 - 00 01 20 21 | 40 41 60 61 - 02 03 22 23 | 42 43 62 63 - 10 11 30 31 | 50 51 70 71 - 12 13 32 33 | 52 52 72 73 - - 00 01 20 21 | 40 41 60 61 - 10 11 30 31 | 50 51 70 71 - 02 03 22 23 | 42 43 62 63 - 12 13 32 33 | 52 52 72 73 - --- - * after vtrn_u8 - 00 10 20 30 | 40 50 60 70 - 01 11 21 31 | 41 51 61 71 - 02 12 22 32 | 42 52 62 72 - 03 13 23 33 | 43 53 63 73 - */ - x.val[0] = r01_u8.val[0]; - x.val[1] = r01_u8.val[1]; - x.val[2] = r23_u8.val[0]; - x.val[3] = r23_u8.val[1]; - - return x; -} -#else -static INLINE -uint8x8x4_t read_4x8(unsigned char *src, int pitch) { - uint8x8x4_t x; - x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0); - x = vld4_lane_u8(src, x, 0); - src += pitch; - x = vld4_lane_u8(src, x, 1); - src += pitch; - x = vld4_lane_u8(src, x, 2); - src += pitch; - x = vld4_lane_u8(src, x, 3); - src += pitch; - x = vld4_lane_u8(src, x, 4); - src += pitch; - x = vld4_lane_u8(src, x, 5); - src += pitch; - x = vld4_lane_u8(src, x, 6); - src += pitch; - x = vld4_lane_u8(src, x, 7); - return x; -} -#endif // VPX_INCOMPATIBLE_GCC - -static INLINE void vp8_loop_filter_simple_vertical_edge_neon( - unsigned char *s, - int p, - const unsigned char *blimit) { - unsigned char *src1; - uint8x16_t qblimit, q0u8; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; - int16x8_t q2s16, q13s16, q11s16; - int8x8_t d28s8, d29s8; - int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8; - uint8x8x4_t d0u8x4; // d6, d7, d8, d9 - uint8x8x4_t d1u8x4; // d10, d11, d12, d13 - uint8x8x2_t d2u8x2; // d12, d13 - uint8x8x2_t d3u8x2; // d14, d15 - - qblimit = vdupq_n_u8(*blimit); - - src1 = s - 2; - d0u8x4 = read_4x8(src1, p); - src1 += p * 8; - d1u8x4 = read_4x8(src1, p); - - q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10 - q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12 - q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11 - q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13 - - q15u8 = vabdq_u8(q5u8, q4u8); - q14u8 = vabdq_u8(q3u8, q6u8); - - q15u8 = vqaddq_u8(q15u8, q15u8); - q14u8 = vshrq_n_u8(q14u8, 1); - q0u8 = vdupq_n_u8(0x80); - q11s16 = vdupq_n_s16(3); - q15u8 = vqaddq_u8(q15u8, q14u8); - - q3u8 = veorq_u8(q3u8, q0u8); - q4u8 = veorq_u8(q4u8, q0u8); - q5u8 = veorq_u8(q5u8, q0u8); - q6u8 = veorq_u8(q6u8, q0u8); - - q15u8 = vcgeq_u8(qblimit, q15u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)), - vget_low_s8(vreinterpretq_s8_u8(q5u8))); - q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)), - vget_high_s8(vreinterpretq_s8_u8(q5u8))); - - q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), - vreinterpretq_s8_u8(q6u8)); - - q2s16 = vmulq_s16(q2s16, q11s16); - q13s16 = vmulq_s16(q13s16, q11s16); - - q11u8 = vdupq_n_u8(3); - q12u8 = vdupq_n_u8(4); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8)); - q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8)); - - d28s8 = vqmovn_s16(q2s16); - d29s8 = vqmovn_s16(q13s16); - q14s8 = vcombine_s8(d28s8, d29s8); - - q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8)); - - q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8)); - q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q14s8 = vshrq_n_s8(q3s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8); - - q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - - d2u8x2.val[0] = vget_low_u8(q6u8); // d12 - d2u8x2.val[1] = vget_low_u8(q7u8); // d14 - d3u8x2.val[0] = vget_high_u8(q6u8); // d13 - d3u8x2.val[1] = vget_high_u8(q7u8); // d15 - - src1 = s - 1; - write_2x8(src1, p, d2u8x2, d3u8x2); -} - -void vp8_loop_filter_bvs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - y_ptr += 4; - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - return; -} - -void vp8_loop_filter_mbvs_neon( - unsigned char *y_ptr, - int y_stride, - const unsigned char *blimit) { - vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c deleted file mode 100644 index 5351f4be66..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include "./vpx_config.h" - -static INLINE void vp8_mbloop_filter_neon( - uint8x16_t qblimit, // mblimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p2 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q4r, // p1 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r, // q1 - uint8x16_t *q9r) { // q1 - uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8; - uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16; - int8x16_t q0s8, q12s8, q14s8, q15s8; - int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q1u8 = vabdq_u8(q9, q8); - q0u8 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q1u8 = vmaxq_u8(q1u8, q0u8); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q12u8 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q1u8); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - q1u8 = vabdq_u8(q5, q8); - q12u8 = vqaddq_u8(q12u8, q12u8); - - // vp8_filter() function - // convert to signed - q0u8 = vdupq_n_u8(0x80); - q9 = veorq_u8(q9, q0u8); - q8 = veorq_u8(q8, q0u8); - q7 = veorq_u8(q7, q0u8); - q6 = veorq_u8(q6, q0u8); - q5 = veorq_u8(q5, q0u8); - q4 = veorq_u8(q4, q0u8); - - q1u8 = vshrq_n_u8(q1u8, 1); - q12u8 = vqaddq_u8(q12u8, q1u8); - - q14u8 = vorrq_u8(q13u8, q14u8); - q12u8 = vcgeq_u8(qblimit, q12u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q11s16 = vdupq_n_s16(3); - q2s16 = vmulq_s16(q2s16, q11s16); - q13s16 = vmulq_s16(q13s16, q11s16); - - q15u8 = vandq_u8(q15u8, q12u8); - - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8)); - - q12u8 = vdupq_n_u8(3); - q11u8 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2 = vqmovn_s16(q2s16); - d3 = vqmovn_s16(q13s16); - q1s8 = vcombine_s8(d2, d3); - q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8)); - q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8)); - q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8)); - q2s8 = vshrq_n_s8(q2s8, 3); - q13s8 = vshrq_n_s8(q13s8, 3); - - q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8); - q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8); - - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63); - d5 = vdup_n_s8(9); - d4 = vdup_n_s8(18); - - q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5); - q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5); - d5 = vdup_n_s8(27); - q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4); - q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4); - q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5); - q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5); - - d0 = vqshrn_n_s16(q0s16 , 7); - d1 = vqshrn_n_s16(q11s16, 7); - d24 = vqshrn_n_s16(q12s16, 7); - d25 = vqshrn_n_s16(q13s16, 7); - d28 = vqshrn_n_s16(q14s16, 7); - d29 = vqshrn_n_s16(q15s16, 7); - - q0s8 = vcombine_s8(d0, d1); - q12s8 = vcombine_s8(d24, d25); - q14s8 = vcombine_s8(d28, d29); - - q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8); - q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8); - q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8); - q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8); - q15s8 = vqsubq_s8((q7s8), q14s8); - q14s8 = vqaddq_s8((q6s8), q14s8); - - q1u8 = vdupq_n_u8(0x80); - *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8); - *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8); - *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8); - *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8); - *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8); - *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8); - return; -} - -void vp8_mbloop_filter_horizontal_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - src -= (pitch << 2); - - q3 = vld1q_u8(src); - src += pitch; - q4 = vld1q_u8(src); - src += pitch; - q5 = vld1q_u8(src); - src += pitch; - q6 = vld1q_u8(src); - src += pitch; - q7 = vld1q_u8(src); - src += pitch; - q8 = vld1q_u8(src); - src += pitch; - q9 = vld1q_u8(src); - src += pitch; - q10 = vld1q_u8(src); - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - src -= (pitch * 6); - vst1q_u8(src, q4); - src += pitch; - vst1q_u8(src, q5); - src += pitch; - vst1q_u8(src, q6); - src += pitch; - vst1q_u8(src, q7); - src += pitch; - vst1q_u8(src, q8); - src += pitch; - vst1q_u8(src, q9); - return; -} - -void vp8_mbloop_filter_horizontal_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - u -= (pitch << 2); - v -= (pitch << 2); - - d6 = vld1_u8(u); - u += pitch; - d7 = vld1_u8(v); - v += pitch; - d8 = vld1_u8(u); - u += pitch; - d9 = vld1_u8(v); - v += pitch; - d10 = vld1_u8(u); - u += pitch; - d11 = vld1_u8(v); - v += pitch; - d12 = vld1_u8(u); - u += pitch; - d13 = vld1_u8(v); - v += pitch; - d14 = vld1_u8(u); - u += pitch; - d15 = vld1_u8(v); - v += pitch; - d16 = vld1_u8(u); - u += pitch; - d17 = vld1_u8(v); - v += pitch; - d18 = vld1_u8(u); - u += pitch; - d19 = vld1_u8(v); - v += pitch; - d20 = vld1_u8(u); - d21 = vld1_u8(v); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - u -= (pitch * 6); - v -= (pitch * 6); - vst1_u8(u, vget_low_u8(q4)); - u += pitch; - vst1_u8(v, vget_high_u8(q4)); - v += pitch; - vst1_u8(u, vget_low_u8(q5)); - u += pitch; - vst1_u8(v, vget_high_u8(q5)); - v += pitch; - vst1_u8(u, vget_low_u8(q6)); - u += pitch; - vst1_u8(v, vget_high_u8(q6)); - v += pitch; - vst1_u8(u, vget_low_u8(q7)); - u += pitch; - vst1_u8(v, vget_high_u8(q7)); - v += pitch; - vst1_u8(u, vget_low_u8(q8)); - u += pitch; - vst1_u8(v, vget_high_u8(q8)); - v += pitch; - vst1_u8(u, vget_low_u8(q9)); - vst1_u8(v, vget_high_u8(q9)); - return; -} - -void vp8_mbloop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - unsigned char *s1, *s2; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - s1 = src - 4; - s2 = s1 + 8 * pitch; - d6 = vld1_u8(s1); - s1 += pitch; - d7 = vld1_u8(s2); - s2 += pitch; - d8 = vld1_u8(s1); - s1 += pitch; - d9 = vld1_u8(s2); - s2 += pitch; - d10 = vld1_u8(s1); - s1 += pitch; - d11 = vld1_u8(s2); - s2 += pitch; - d12 = vld1_u8(s1); - s1 += pitch; - d13 = vld1_u8(s2); - s2 += pitch; - d14 = vld1_u8(s1); - s1 += pitch; - d15 = vld1_u8(s2); - s2 += pitch; - d16 = vld1_u8(s1); - s1 += pitch; - d17 = vld1_u8(s2); - s2 += pitch; - d18 = vld1_u8(s1); - s1 += pitch; - d19 = vld1_u8(s2); - s2 += pitch; - d20 = vld1_u8(s1); - d21 = vld1_u8(s2); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - s1 -= 7 * pitch; - s2 -= 7 * pitch; - - vst1_u8(s1, vget_low_u8(q3)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q3)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q4)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q4)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q5)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q5)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q6)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q6)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q7)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q7)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q8)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q8)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q9)); - s1 += pitch; - vst1_u8(s2, vget_high_u8(q9)); - s2 += pitch; - vst1_u8(s1, vget_low_u8(q10)); - vst1_u8(s2, vget_high_u8(q10)); - return; -} - -void vp8_mbloop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - unsigned char *us, *ud; - unsigned char *vs, *vd; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - us = u - 4; - vs = v - 4; - d6 = vld1_u8(us); - us += pitch; - d7 = vld1_u8(vs); - vs += pitch; - d8 = vld1_u8(us); - us += pitch; - d9 = vld1_u8(vs); - vs += pitch; - d10 = vld1_u8(us); - us += pitch; - d11 = vld1_u8(vs); - vs += pitch; - d12 = vld1_u8(us); - us += pitch; - d13 = vld1_u8(vs); - vs += pitch; - d14 = vld1_u8(us); - us += pitch; - d15 = vld1_u8(vs); - vs += pitch; - d16 = vld1_u8(us); - us += pitch; - d17 = vld1_u8(vs); - vs += pitch; - d18 = vld1_u8(us); - us += pitch; - d19 = vld1_u8(vs); - vs += pitch; - d20 = vld1_u8(us); - d21 = vld1_u8(vs); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q4, &q5, &q6, &q7, &q8, &q9); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - ud = u - 4; - vst1_u8(ud, vget_low_u8(q3)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q4)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q5)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q6)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q7)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q8)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q9)); - ud += pitch; - vst1_u8(ud, vget_low_u8(q10)); - - vd = v - 4; - vst1_u8(vd, vget_high_u8(q3)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q4)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q5)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q6)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q7)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q8)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q9)); - vd += pitch; - vst1_u8(vd, vget_high_u8(q10)); - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c deleted file mode 100644 index 373afa6ed3..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -static const int16_t cospi8sqrt2minus1 = 20091; -static const int16_t sinpi8sqrt2 = 35468; - -void vp8_short_idct4x4llm_neon( - int16_t *input, - unsigned char *pred_ptr, - int pred_stride, - unsigned char *dst_ptr, - int dst_stride) { - int i; - uint32x2_t d6u32 = vdup_n_u32(0); - uint8x8_t d1u8; - int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; - uint16x8_t q1u16; - int16x8_t q1s16, q2s16, q3s16, q4s16; - int32x2x2_t v2tmp0, v2tmp1; - int16x4x2_t v2tmp2, v2tmp3; - - d2 = vld1_s16(input); - d3 = vld1_s16(input + 4); - d4 = vld1_s16(input + 8); - d5 = vld1_s16(input + 12); - - // 1st for loop - q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here - q2s16 = vcombine_s16(d3, d5); - - q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); - q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); - - d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 - d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 - - q3s16 = vshrq_n_s16(q3s16, 1); - q4s16 = vshrq_n_s16(q4s16, 1); - - q3s16 = vqaddq_s16(q3s16, q2s16); - q4s16 = vqaddq_s16(q4s16, q2s16); - - d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 - d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), - vreinterpret_s16_s32(v2tmp1.val[0])); - v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), - vreinterpret_s16_s32(v2tmp1.val[1])); - - // 2nd for loop - q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]); - q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]); - - q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); - q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); - - d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 - d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 - - q3s16 = vshrq_n_s16(q3s16, 1); - q4s16 = vshrq_n_s16(q4s16, 1); - - q3s16 = vqaddq_s16(q3s16, q2s16); - q4s16 = vqaddq_s16(q4s16, q2s16); - - d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 - d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 - - d2 = vqadd_s16(d12, d11); - d3 = vqadd_s16(d13, d10); - d4 = vqsub_s16(d13, d10); - d5 = vqsub_s16(d12, d11); - - d2 = vrshr_n_s16(d2, 3); - d3 = vrshr_n_s16(d3, 3); - d4 = vrshr_n_s16(d4, 3); - d5 = vrshr_n_s16(d5, 3); - - v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); - v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); - v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), - vreinterpret_s16_s32(v2tmp1.val[0])); - v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), - vreinterpret_s16_s32(v2tmp1.val[1])); - - q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]); - q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]); - - // dc_only_idct_add - for (i = 0; i < 2; i++, q1s16 = q2s16) { - d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0); - pred_ptr += pred_stride; - d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1); - pred_ptr += pred_stride; - - q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), - vreinterpret_u8_u32(d6u32)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); - - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0); - dst_ptr += dst_stride; - vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1); - dst_ptr += dst_stride; - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c deleted file mode 100644 index 49d8d221fc..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c +++ /dev/null @@ -1,1377 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include "vpx_ports/mem.h" - -static const int8_t vp8_sub_pel_filters[8][8] = { - {0, 0, 128, 0, 0, 0, 0, 0}, /* note that 1/8 pel positionyys are */ - {0, -6, 123, 12, -1, 0, 0, 0}, /* just as per alpha -0.5 bicubic */ - {2, -11, 108, 36, -8, 1, 0, 0}, /* New 1/4 pel 6 tap filter */ - {0, -9, 93, 50, -6, 0, 0, 0}, - {3, -16, 77, 77, -16, 3, 0, 0}, /* New 1/2 pel 6 tap filter */ - {0, -6, 50, 93, -9, 0, 0, 0}, - {1, -8, 36, 108, -11, 2, 0, 0}, /* New 1/4 pel 6 tap filter */ - {0, -1, 12, 123, -6, 0, 0, 0}, -}; - -void vp8_sixtap_predict8x4_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8; - uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8; - - if (xoffset == 0) { // secondpass_filter8x4_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - d27u8 = vld1_u8(src); - src += src_pixels_per_line; - d28u8 = vld1_u8(src); - src += src_pixels_per_line; - d29u8 = vld1_u8(src); - src += src_pixels_per_line; - d30u8 = vld1_u8(src); - - q3u16 = vmull_u8(d22u8, d0u8); - q4u16 = vmull_u8(d23u8, d0u8); - q5u16 = vmull_u8(d24u8, d0u8); - q6u16 = vmull_u8(d25u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d24u8, d2u8); - q4u16 = vmlal_u8(q4u16, d25u8, d2u8); - q5u16 = vmlal_u8(q5u16, d26u8, d2u8); - q6u16 = vmlal_u8(q6u16, d27u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d27u8, d5u8); - q4u16 = vmlal_u8(q4u16, d28u8, d5u8); - q5u16 = vmlal_u8(q5u16, d29u8, d5u8); - q6u16 = vmlal_u8(q6u16, d30u8, d5u8); - - q7u16 = vmull_u8(d25u8, d3u8); - q8u16 = vmull_u8(d26u8, d3u8); - q9u16 = vmull_u8(d27u8, d3u8); - q10u16 = vmull_u8(d28u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - - q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - - q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - - q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - - q7u16 = vmlal_u8(q7u16, d28u8, d2u8); - q8u16 = vmlal_u8(q8u16, d29u8, d2u8); - q9u16 = vmlal_u8(q9u16, d30u8, d2u8); - q10u16 = vmlal_u8(q10u16, d31u8, d2u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - q7u16 = vmlal_u8(q7u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - q9u16 = vmlal_u8(q9u16, d30u8, d5u8); - q10u16 = vmlal_u8(q10u16, d31u8, d5u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - - q3u16 = vmull_u8(d28u8, d3u8); - q4u16 = vmull_u8(d29u8, d3u8); - q5u16 = vmull_u8(d30u8, d3u8); - q6u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d22u8 = vqrshrun_n_s16(q7s16, 7); - d23u8 = vqrshrun_n_s16(q8s16, 7); - d24u8 = vqrshrun_n_s16(q9s16, 7); - d25u8 = vqrshrun_n_s16(q10s16, 7); - - if (yoffset == 0) { // firstpass_filter8x4_only - vst1_u8(dst_ptr, d22u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d23u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d24u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d25u8); - return; - } - - // First Pass on rest 5-line data - src += src_pixels_per_line; - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q7u8 = vld1q_u8(src); - - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); - - q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); - - q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); - - q8u16 = vmlal_u8(q8u16, d27u8, d2u8); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q11u16 = vmlal_u8(q11u16, d30u8, d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); - - q8u16 = vmlal_u8(q8u16, d27u8, d5u8); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q11u16 = vmlal_u8(q11u16, d30u8, d5u8); - q12u16 = vmlal_u8(q12u16, d31u8, d5u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); - - q3u16 = vmull_u8(d27u8, d3u8); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - - q8s16 = vqaddq_s16(q8s16, q3s16); - q9s16 = vqaddq_s16(q9s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q11s16 = vqaddq_s16(q11s16, q6s16); - q12s16 = vqaddq_s16(q12s16, q7s16); - - d26u8 = vqrshrun_n_s16(q8s16, 7); - d27u8 = vqrshrun_n_s16(q9s16, 7); - d28u8 = vqrshrun_n_s16(q10s16, 7); - d29u8 = vqrshrun_n_s16(q11s16, 7); - d30u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 8x4 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - q3u16 = vmull_u8(d22u8, d0u8); - q4u16 = vmull_u8(d23u8, d0u8); - q5u16 = vmull_u8(d24u8, d0u8); - q6u16 = vmull_u8(d25u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d24u8, d2u8); - q4u16 = vmlal_u8(q4u16, d25u8, d2u8); - q5u16 = vmlal_u8(q5u16, d26u8, d2u8); - q6u16 = vmlal_u8(q6u16, d27u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d27u8, d5u8); - q4u16 = vmlal_u8(q4u16, d28u8, d5u8); - q5u16 = vmlal_u8(q5u16, d29u8, d5u8); - q6u16 = vmlal_u8(q6u16, d30u8, d5u8); - - q7u16 = vmull_u8(d25u8, d3u8); - q8u16 = vmull_u8(d26u8, d3u8); - q9u16 = vmull_u8(d27u8, d3u8); - q10u16 = vmull_u8(d28u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - return; -} - -void vp8_sixtap_predict8x8_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src, *tmpp; - unsigned char tmp[64]; - int i; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8; - uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; - uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8; - - if (xoffset == 0) { // secondpass_filter8x8_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src = src_ptr - src_pixels_per_line * 2; - d18u8 = vld1_u8(src); - src += src_pixels_per_line; - d19u8 = vld1_u8(src); - src += src_pixels_per_line; - d20u8 = vld1_u8(src); - src += src_pixels_per_line; - d21u8 = vld1_u8(src); - src += src_pixels_per_line; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - d27u8 = vld1_u8(src); - src += src_pixels_per_line; - d28u8 = vld1_u8(src); - src += src_pixels_per_line; - d29u8 = vld1_u8(src); - src += src_pixels_per_line; - d30u8 = vld1_u8(src); - - for (i = 2; i > 0; i--) { - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - d23u8 = d27u8; - d24u8 = d28u8; - d25u8 = d29u8; - d26u8 = d30u8; - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - dst_ptr += dst_pitch; - } - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) // firstpass_filter4x4_only - src = src_ptr - 2; - else - src = src_ptr - 2 - (src_pixels_per_line * 2); - - tmpp = tmp; - for (i = 2; i > 0; i--) { - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - __builtin_prefetch(src + src_pixels_per_line * 2); - - q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - - q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - - q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - - q7u16 = vmlal_u8(q7u16, d28u8, d2u8); - q8u16 = vmlal_u8(q8u16, d29u8, d2u8); - q9u16 = vmlal_u8(q9u16, d30u8, d2u8); - q10u16 = vmlal_u8(q10u16, d31u8, d2u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - - q7u16 = vmlal_u8(q7u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - q9u16 = vmlal_u8(q9u16, d30u8, d5u8); - q10u16 = vmlal_u8(q10u16, d31u8, d5u8); - - d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - - q3u16 = vmull_u8(d28u8, d3u8); - q4u16 = vmull_u8(d29u8, d3u8); - q5u16 = vmull_u8(d30u8, d3u8); - q6u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d22u8 = vqrshrun_n_s16(q7s16, 7); - d23u8 = vqrshrun_n_s16(q8s16, 7); - d24u8 = vqrshrun_n_s16(q9s16, 7); - d25u8 = vqrshrun_n_s16(q10s16, 7); - - if (yoffset == 0) { // firstpass_filter8x4_only - vst1_u8(dst_ptr, d22u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d23u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d24u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d25u8); - dst_ptr += dst_pitch; - } else { - vst1_u8(tmpp, d22u8); - tmpp += 8; - vst1_u8(tmpp, d23u8); - tmpp += 8; - vst1_u8(tmpp, d24u8); - tmpp += 8; - vst1_u8(tmpp, d25u8); - tmpp += 8; - } - } - if (yoffset == 0) - return; - - // First Pass on rest 5-line data - q3u8 = vld1q_u8(src); - src += src_pixels_per_line; - q4u8 = vld1q_u8(src); - src += src_pixels_per_line; - q5u8 = vld1q_u8(src); - src += src_pixels_per_line; - q6u8 = vld1q_u8(src); - src += src_pixels_per_line; - q7u8 = vld1q_u8(src); - - q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); - q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); - q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); - q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); - q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); - - q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); - - q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); - - q8u16 = vmlal_u8(q8u16, d27u8, d2u8); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q11u16 = vmlal_u8(q11u16, d30u8, d2u8); - q12u16 = vmlal_u8(q12u16, d31u8, d2u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); - - q8u16 = vmlal_u8(q8u16, d27u8, d5u8); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q11u16 = vmlal_u8(q11u16, d30u8, d5u8); - q12u16 = vmlal_u8(q12u16, d31u8, d5u8); - - d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); - d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); - d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); - d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); - d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); - - q3u16 = vmull_u8(d27u8, d3u8); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - - q8s16 = vqaddq_s16(q8s16, q3s16); - q9s16 = vqaddq_s16(q9s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q11s16 = vqaddq_s16(q11s16, q6s16); - q12s16 = vqaddq_s16(q12s16, q7s16); - - d26u8 = vqrshrun_n_s16(q8s16, 7); - d27u8 = vqrshrun_n_s16(q9s16, 7); - d28u8 = vqrshrun_n_s16(q10s16, 7); - d29u8 = vqrshrun_n_s16(q11s16, 7); - d30u8 = vqrshrun_n_s16(q12s16, 7); - - // Second pass: 8x8 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - tmpp = tmp; - q9u8 = vld1q_u8(tmpp); - tmpp += 16; - q10u8 = vld1q_u8(tmpp); - tmpp += 16; - q11u8 = vld1q_u8(tmpp); - tmpp += 16; - q12u8 = vld1q_u8(tmpp); - - d18u8 = vget_low_u8(q9u8); - d19u8 = vget_high_u8(q9u8); - d20u8 = vget_low_u8(q10u8); - d21u8 = vget_high_u8(q10u8); - d22u8 = vget_low_u8(q11u8); - d23u8 = vget_high_u8(q11u8); - d24u8 = vget_low_u8(q12u8); - d25u8 = vget_high_u8(q12u8); - - for (i = 2; i > 0; i--) { - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - d23u8 = d27u8; - d24u8 = d28u8; - d25u8 = d29u8; - d26u8 = d30u8; - - vst1_u8(dst_ptr, d6u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d7u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d8u8); - dst_ptr += dst_pitch; - vst1_u8(dst_ptr, d9u8); - dst_ptr += dst_pitch; - } - return; -} - -void vp8_sixtap_predict16x16_neon( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch) { - unsigned char *src, *src_tmp, *dst, *tmpp; - unsigned char tmp[336]; - int i, j; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; - uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8; - uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8; - uint8x8_t d28u8, d29u8, d30u8, d31u8; - int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; - uint8x16_t q3u8, q4u8; - uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16; - uint16x8_t q11u16, q12u16, q13u16, q15u16; - int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16; - int16x8_t q11s16, q12s16, q13s16, q15s16; - - if (xoffset == 0) { // secondpass_filter8x8_only - // load second_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // load src data - src_tmp = src_ptr - src_pixels_per_line * 2; - for (i = 0; i < 2; i++) { - src = src_tmp + i * 8; - dst = dst_ptr + i * 8; - d18u8 = vld1_u8(src); - src += src_pixels_per_line; - d19u8 = vld1_u8(src); - src += src_pixels_per_line; - d20u8 = vld1_u8(src); - src += src_pixels_per_line; - d21u8 = vld1_u8(src); - src += src_pixels_per_line; - d22u8 = vld1_u8(src); - src += src_pixels_per_line; - for (j = 0; j < 4; j++) { - d23u8 = vld1_u8(src); - src += src_pixels_per_line; - d24u8 = vld1_u8(src); - src += src_pixels_per_line; - d25u8 = vld1_u8(src); - src += src_pixels_per_line; - d26u8 = vld1_u8(src); - src += src_pixels_per_line; - - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - - vst1_u8(dst, d6u8); - dst += dst_pitch; - vst1_u8(dst, d7u8); - dst += dst_pitch; - vst1_u8(dst, d8u8); - dst += dst_pitch; - vst1_u8(dst, d9u8); - dst += dst_pitch; - } - } - return; - } - - // load first_pass filter - dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - // First pass: output_height lines x output_width columns (9x4) - if (yoffset == 0) { // firstpass_filter4x4_only - src = src_ptr - 2; - dst = dst_ptr; - for (i = 0; i < 8; i++) { - d6u8 = vld1_u8(src); - d7u8 = vld1_u8(src + 8); - d8u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d9u8 = vld1_u8(src); - d10u8 = vld1_u8(src + 8); - d11u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - - q6u16 = vmull_u8(d6u8, d0u8); - q7u16 = vmull_u8(d7u8, d0u8); - q8u16 = vmull_u8(d9u8, d0u8); - q9u16 = vmull_u8(d10u8, d0u8); - - d20u8 = vext_u8(d6u8, d7u8, 1); - d21u8 = vext_u8(d9u8, d10u8, 1); - d22u8 = vext_u8(d7u8, d8u8, 1); - d23u8 = vext_u8(d10u8, d11u8, 1); - d24u8 = vext_u8(d6u8, d7u8, 4); - d25u8 = vext_u8(d9u8, d10u8, 4); - d26u8 = vext_u8(d7u8, d8u8, 4); - d27u8 = vext_u8(d10u8, d11u8, 4); - d28u8 = vext_u8(d6u8, d7u8, 5); - d29u8 = vext_u8(d9u8, d10u8, 5); - - q6u16 = vmlsl_u8(q6u16, d20u8, d1u8); - q8u16 = vmlsl_u8(q8u16, d21u8, d1u8); - q7u16 = vmlsl_u8(q7u16, d22u8, d1u8); - q9u16 = vmlsl_u8(q9u16, d23u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d24u8, d4u8); - q8u16 = vmlsl_u8(q8u16, d25u8, d4u8); - q7u16 = vmlsl_u8(q7u16, d26u8, d4u8); - q9u16 = vmlsl_u8(q9u16, d27u8, d4u8); - q6u16 = vmlal_u8(q6u16, d28u8, d5u8); - q8u16 = vmlal_u8(q8u16, d29u8, d5u8); - - d20u8 = vext_u8(d7u8, d8u8, 5); - d21u8 = vext_u8(d10u8, d11u8, 5); - d22u8 = vext_u8(d6u8, d7u8, 2); - d23u8 = vext_u8(d9u8, d10u8, 2); - d24u8 = vext_u8(d7u8, d8u8, 2); - d25u8 = vext_u8(d10u8, d11u8, 2); - d26u8 = vext_u8(d6u8, d7u8, 3); - d27u8 = vext_u8(d9u8, d10u8, 3); - d28u8 = vext_u8(d7u8, d8u8, 3); - d29u8 = vext_u8(d10u8, d11u8, 3); - - q7u16 = vmlal_u8(q7u16, d20u8, d5u8); - q9u16 = vmlal_u8(q9u16, d21u8, d5u8); - q6u16 = vmlal_u8(q6u16, d22u8, d2u8); - q8u16 = vmlal_u8(q8u16, d23u8, d2u8); - q7u16 = vmlal_u8(q7u16, d24u8, d2u8); - q9u16 = vmlal_u8(q9u16, d25u8, d2u8); - - q10u16 = vmull_u8(d26u8, d3u8); - q11u16 = vmull_u8(d27u8, d3u8); - q12u16 = vmull_u8(d28u8, d3u8); - q15u16 = vmull_u8(d29u8, d3u8); - - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q15s16 = vreinterpretq_s16_u16(q15u16); - - q6s16 = vqaddq_s16(q6s16, q10s16); - q8s16 = vqaddq_s16(q8s16, q11s16); - q7s16 = vqaddq_s16(q7s16, q12s16); - q9s16 = vqaddq_s16(q9s16, q15s16); - - d6u8 = vqrshrun_n_s16(q6s16, 7); - d7u8 = vqrshrun_n_s16(q7s16, 7); - d8u8 = vqrshrun_n_s16(q8s16, 7); - d9u8 = vqrshrun_n_s16(q9s16, 7); - - q3u8 = vcombine_u8(d6u8, d7u8); - q4u8 = vcombine_u8(d8u8, d9u8); - vst1q_u8(dst, q3u8); - dst += dst_pitch; - vst1q_u8(dst, q4u8); - dst += dst_pitch; - } - return; - } - - src = src_ptr - 2 - src_pixels_per_line * 2; - tmpp = tmp; - for (i = 0; i < 7; i++) { - d6u8 = vld1_u8(src); - d7u8 = vld1_u8(src + 8); - d8u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d9u8 = vld1_u8(src); - d10u8 = vld1_u8(src + 8); - d11u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - d12u8 = vld1_u8(src); - d13u8 = vld1_u8(src + 8); - d14u8 = vld1_u8(src + 16); - src += src_pixels_per_line; - - __builtin_prefetch(src); - __builtin_prefetch(src + src_pixels_per_line); - __builtin_prefetch(src + src_pixels_per_line * 2); - - q8u16 = vmull_u8(d6u8, d0u8); - q9u16 = vmull_u8(d7u8, d0u8); - q10u16 = vmull_u8(d9u8, d0u8); - q11u16 = vmull_u8(d10u8, d0u8); - q12u16 = vmull_u8(d12u8, d0u8); - q13u16 = vmull_u8(d13u8, d0u8); - - d28u8 = vext_u8(d6u8, d7u8, 1); - d29u8 = vext_u8(d9u8, d10u8, 1); - d30u8 = vext_u8(d12u8, d13u8, 1); - q8u16 = vmlsl_u8(q8u16, d28u8, d1u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); - q12u16 = vmlsl_u8(q12u16, d30u8, d1u8); - d28u8 = vext_u8(d7u8, d8u8, 1); - d29u8 = vext_u8(d10u8, d11u8, 1); - d30u8 = vext_u8(d13u8, d14u8, 1); - q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); - q11u16 = vmlsl_u8(q11u16, d29u8, d1u8); - q13u16 = vmlsl_u8(q13u16, d30u8, d1u8); - - d28u8 = vext_u8(d6u8, d7u8, 4); - d29u8 = vext_u8(d9u8, d10u8, 4); - d30u8 = vext_u8(d12u8, d13u8, 4); - q8u16 = vmlsl_u8(q8u16, d28u8, d4u8); - q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); - q12u16 = vmlsl_u8(q12u16, d30u8, d4u8); - d28u8 = vext_u8(d7u8, d8u8, 4); - d29u8 = vext_u8(d10u8, d11u8, 4); - d30u8 = vext_u8(d13u8, d14u8, 4); - q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); - q11u16 = vmlsl_u8(q11u16, d29u8, d4u8); - q13u16 = vmlsl_u8(q13u16, d30u8, d4u8); - - d28u8 = vext_u8(d6u8, d7u8, 5); - d29u8 = vext_u8(d9u8, d10u8, 5); - d30u8 = vext_u8(d12u8, d13u8, 5); - q8u16 = vmlal_u8(q8u16, d28u8, d5u8); - q10u16 = vmlal_u8(q10u16, d29u8, d5u8); - q12u16 = vmlal_u8(q12u16, d30u8, d5u8); - d28u8 = vext_u8(d7u8, d8u8, 5); - d29u8 = vext_u8(d10u8, d11u8, 5); - d30u8 = vext_u8(d13u8, d14u8, 5); - q9u16 = vmlal_u8(q9u16, d28u8, d5u8); - q11u16 = vmlal_u8(q11u16, d29u8, d5u8); - q13u16 = vmlal_u8(q13u16, d30u8, d5u8); - - d28u8 = vext_u8(d6u8, d7u8, 2); - d29u8 = vext_u8(d9u8, d10u8, 2); - d30u8 = vext_u8(d12u8, d13u8, 2); - q8u16 = vmlal_u8(q8u16, d28u8, d2u8); - q10u16 = vmlal_u8(q10u16, d29u8, d2u8); - q12u16 = vmlal_u8(q12u16, d30u8, d2u8); - d28u8 = vext_u8(d7u8, d8u8, 2); - d29u8 = vext_u8(d10u8, d11u8, 2); - d30u8 = vext_u8(d13u8, d14u8, 2); - q9u16 = vmlal_u8(q9u16, d28u8, d2u8); - q11u16 = vmlal_u8(q11u16, d29u8, d2u8); - q13u16 = vmlal_u8(q13u16, d30u8, d2u8); - - d28u8 = vext_u8(d6u8, d7u8, 3); - d29u8 = vext_u8(d9u8, d10u8, 3); - d30u8 = vext_u8(d12u8, d13u8, 3); - d15u8 = vext_u8(d7u8, d8u8, 3); - d31u8 = vext_u8(d10u8, d11u8, 3); - d6u8 = vext_u8(d13u8, d14u8, 3); - q4u16 = vmull_u8(d28u8, d3u8); - q5u16 = vmull_u8(d29u8, d3u8); - q6u16 = vmull_u8(d30u8, d3u8); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - q12s16 = vreinterpretq_s16_u16(q12u16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q10s16 = vqaddq_s16(q10s16, q5s16); - q12s16 = vqaddq_s16(q12s16, q6s16); - - q6u16 = vmull_u8(d15u8, d3u8); - q7u16 = vmull_u8(d31u8, d3u8); - q3u16 = vmull_u8(d6u8, d3u8); - q3s16 = vreinterpretq_s16_u16(q3u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q11s16 = vreinterpretq_s16_u16(q11u16); - q13s16 = vreinterpretq_s16_u16(q13u16); - q9s16 = vqaddq_s16(q9s16, q6s16); - q11s16 = vqaddq_s16(q11s16, q7s16); - q13s16 = vqaddq_s16(q13s16, q3s16); - - d6u8 = vqrshrun_n_s16(q8s16, 7); - d7u8 = vqrshrun_n_s16(q9s16, 7); - d8u8 = vqrshrun_n_s16(q10s16, 7); - d9u8 = vqrshrun_n_s16(q11s16, 7); - d10u8 = vqrshrun_n_s16(q12s16, 7); - d11u8 = vqrshrun_n_s16(q13s16, 7); - - vst1_u8(tmpp, d6u8); - tmpp += 8; - vst1_u8(tmpp, d7u8); - tmpp += 8; - vst1_u8(tmpp, d8u8); - tmpp += 8; - vst1_u8(tmpp, d9u8); - tmpp += 8; - vst1_u8(tmpp, d10u8); - tmpp += 8; - vst1_u8(tmpp, d11u8); - tmpp += 8; - } - - // Second pass: 16x16 - dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); - d0s8 = vdup_lane_s8(dtmps8, 0); - d1s8 = vdup_lane_s8(dtmps8, 1); - d2s8 = vdup_lane_s8(dtmps8, 2); - d3s8 = vdup_lane_s8(dtmps8, 3); - d4s8 = vdup_lane_s8(dtmps8, 4); - d5s8 = vdup_lane_s8(dtmps8, 5); - d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); - d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); - d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); - d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); - d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); - d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); - - for (i = 0; i < 2; i++) { - dst = dst_ptr + 8 * i; - tmpp = tmp + 8 * i; - d18u8 = vld1_u8(tmpp); - tmpp += 16; - d19u8 = vld1_u8(tmpp); - tmpp += 16; - d20u8 = vld1_u8(tmpp); - tmpp += 16; - d21u8 = vld1_u8(tmpp); - tmpp += 16; - d22u8 = vld1_u8(tmpp); - tmpp += 16; - for (j = 0; j < 4; j++) { - d23u8 = vld1_u8(tmpp); - tmpp += 16; - d24u8 = vld1_u8(tmpp); - tmpp += 16; - d25u8 = vld1_u8(tmpp); - tmpp += 16; - d26u8 = vld1_u8(tmpp); - tmpp += 16; - - q3u16 = vmull_u8(d18u8, d0u8); - q4u16 = vmull_u8(d19u8, d0u8); - q5u16 = vmull_u8(d20u8, d0u8); - q6u16 = vmull_u8(d21u8, d0u8); - - q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); - q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); - q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); - q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); - - q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); - q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); - q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); - q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); - - q3u16 = vmlal_u8(q3u16, d20u8, d2u8); - q4u16 = vmlal_u8(q4u16, d21u8, d2u8); - q5u16 = vmlal_u8(q5u16, d22u8, d2u8); - q6u16 = vmlal_u8(q6u16, d23u8, d2u8); - - q3u16 = vmlal_u8(q3u16, d23u8, d5u8); - q4u16 = vmlal_u8(q4u16, d24u8, d5u8); - q5u16 = vmlal_u8(q5u16, d25u8, d5u8); - q6u16 = vmlal_u8(q6u16, d26u8, d5u8); - - q7u16 = vmull_u8(d21u8, d3u8); - q8u16 = vmull_u8(d22u8, d3u8); - q9u16 = vmull_u8(d23u8, d3u8); - q10u16 = vmull_u8(d24u8, d3u8); - - q3s16 = vreinterpretq_s16_u16(q3u16); - q4s16 = vreinterpretq_s16_u16(q4u16); - q5s16 = vreinterpretq_s16_u16(q5u16); - q6s16 = vreinterpretq_s16_u16(q6u16); - q7s16 = vreinterpretq_s16_u16(q7u16); - q8s16 = vreinterpretq_s16_u16(q8u16); - q9s16 = vreinterpretq_s16_u16(q9u16); - q10s16 = vreinterpretq_s16_u16(q10u16); - - q7s16 = vqaddq_s16(q7s16, q3s16); - q8s16 = vqaddq_s16(q8s16, q4s16); - q9s16 = vqaddq_s16(q9s16, q5s16); - q10s16 = vqaddq_s16(q10s16, q6s16); - - d6u8 = vqrshrun_n_s16(q7s16, 7); - d7u8 = vqrshrun_n_s16(q8s16, 7); - d8u8 = vqrshrun_n_s16(q9s16, 7); - d9u8 = vqrshrun_n_s16(q10s16, 7); - - d18u8 = d22u8; - d19u8 = d23u8; - d20u8 = d24u8; - d21u8 = d25u8; - d22u8 = d26u8; - - vst1_u8(dst, d6u8); - dst += dst_pitch; - vst1_u8(dst, d7u8); - dst += dst_pitch; - vst1_u8(dst, d8u8); - dst += dst_pitch; - vst1_u8(dst, d9u8); - dst += dst_pitch; - } - } - return; -} diff --git a/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c b/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c deleted file mode 100644 index 9d6807af71..0000000000 --- a/thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include "./vpx_config.h" -#include "vpx_ports/arm.h" - -static INLINE void vp8_loop_filter_neon( - uint8x16_t qblimit, // flimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p3 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r) { // q1 - uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q2s16, q11s16; - uint16x8_t q4u16; - int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8; - int8x8_t d2s8, d3s8; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q3 = vabdq_u8(q9, q8); - q4 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q3 = vmaxq_u8(q3, q4); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q9 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q3); - - q2u8 = vabdq_u8(q5, q8); - q9 = vqaddq_u8(q9, q9); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - // vp8_filter() function - // convert to signed - q10 = vdupq_n_u8(0x80); - q8 = veorq_u8(q8, q10); - q7 = veorq_u8(q7, q10); - q6 = veorq_u8(q6, q10); - q5 = veorq_u8(q5, q10); - - q2u8 = vshrq_n_u8(q2u8, 1); - q9 = vqaddq_u8(q9, q2u8); - - q10 = vdupq_n_u8(3); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q9 = vcgeq_u8(qblimit, q9); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q14u8 = vorrq_u8(q13u8, q14u8); - - q4u16 = vmovl_u8(vget_low_u8(q10)); - q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); - q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); - - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); - q15u8 = vandq_u8(q15u8, q9); - - q1s8 = vreinterpretq_s8_u8(q1u8); - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); - - q9 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2s8 = vqmovn_s16(q2s16); - d3s8 = vqmovn_s16(q11s16); - q1s8 = vcombine_s8(d2s8, d3s8); - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); - q1s8 = vreinterpretq_s8_u8(q1u8); - - q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10)); - q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); - q2s8 = vshrq_n_s8(q2s8, 3); - q1s8 = vshrq_n_s8(q1s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); - q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); - - q1s8 = vrshrq_n_s8(q1s8, 1); - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); - q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); - - q0u8 = vdupq_n_u8(0x80); - *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8); - *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); - *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); - *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8); - return; -} - -void vp8_loop_filter_horizontal_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - src -= (pitch << 2); - - q3 = vld1q_u8(src); - src += pitch; - q4 = vld1q_u8(src); - src += pitch; - q5 = vld1q_u8(src); - src += pitch; - q6 = vld1q_u8(src); - src += pitch; - q7 = vld1q_u8(src); - src += pitch; - q8 = vld1q_u8(src); - src += pitch; - q9 = vld1q_u8(src); - src += pitch; - q10 = vld1q_u8(src); - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - src -= (pitch * 5); - vst1q_u8(src, q5); - src += pitch; - vst1q_u8(src, q6); - src += pitch; - vst1q_u8(src, q7); - src += pitch; - vst1q_u8(src, q8); - return; -} - -void vp8_loop_filter_horizontal_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - u -= (pitch << 2); - v -= (pitch << 2); - - d6 = vld1_u8(u); - u += pitch; - d7 = vld1_u8(v); - v += pitch; - d8 = vld1_u8(u); - u += pitch; - d9 = vld1_u8(v); - v += pitch; - d10 = vld1_u8(u); - u += pitch; - d11 = vld1_u8(v); - v += pitch; - d12 = vld1_u8(u); - u += pitch; - d13 = vld1_u8(v); - v += pitch; - d14 = vld1_u8(u); - u += pitch; - d15 = vld1_u8(v); - v += pitch; - d16 = vld1_u8(u); - u += pitch; - d17 = vld1_u8(v); - v += pitch; - d18 = vld1_u8(u); - u += pitch; - d19 = vld1_u8(v); - v += pitch; - d20 = vld1_u8(u); - d21 = vld1_u8(v); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - u -= (pitch * 5); - vst1_u8(u, vget_low_u8(q5)); - u += pitch; - vst1_u8(u, vget_low_u8(q6)); - u += pitch; - vst1_u8(u, vget_low_u8(q7)); - u += pitch; - vst1_u8(u, vget_low_u8(q8)); - - v -= (pitch * 5); - vst1_u8(v, vget_high_u8(q5)); - v += pitch; - vst1_u8(v, vget_high_u8(q6)); - v += pitch; - vst1_u8(v, vget_high_u8(q7)); - v += pitch; - vst1_u8(v, vget_high_u8(q8)); - return; -} - -static INLINE void write_4x8(unsigned char *dst, int pitch, - const uint8x8x4_t result) { -#ifdef VPX_INCOMPATIBLE_GCC - /* - * uint8x8x4_t result - 00 01 02 03 | 04 05 06 07 - 10 11 12 13 | 14 15 16 17 - 20 21 22 23 | 24 25 26 27 - 30 31 32 33 | 34 35 36 37 - --- - * after vtrn_u16 - 00 01 20 21 | 04 05 24 25 - 02 03 22 23 | 06 07 26 27 - 10 11 30 31 | 14 15 34 35 - 12 13 32 33 | 16 17 36 37 - --- - * after vtrn_u8 - 00 10 20 30 | 04 14 24 34 - 01 11 21 31 | 05 15 25 35 - 02 12 22 32 | 06 16 26 36 - 03 13 23 33 | 07 17 27 37 - */ - const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), - vreinterpret_u16_u8(result.val[2])); - const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), - vreinterpret_u16_u8(result.val[3])); - const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), - vreinterpret_u8_u16(r13_u16.val[0])); - const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), - vreinterpret_u8_u16(r13_u16.val[1])); - const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); - const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); - const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); - const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); - vst1_lane_u32((uint32_t *)dst, x_0_4, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 0); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_0_4, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_1_5, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_2_6, 1); - dst += pitch; - vst1_lane_u32((uint32_t *)dst, x_3_7, 1); -#else - vst4_lane_u8(dst, result, 0); - dst += pitch; - vst4_lane_u8(dst, result, 1); - dst += pitch; - vst4_lane_u8(dst, result, 2); - dst += pitch; - vst4_lane_u8(dst, result, 3); - dst += pitch; - vst4_lane_u8(dst, result, 4); - dst += pitch; - vst4_lane_u8(dst, result, 5); - dst += pitch; - vst4_lane_u8(dst, result, 6); - dst += pitch; - vst4_lane_u8(dst, result, 7); -#endif // VPX_INCOMPATIBLE_GCC -} - -void vp8_loop_filter_vertical_edge_y_neon( - unsigned char *src, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh) { - unsigned char *s, *d; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - uint8x8x4_t q4ResultH, q4ResultL; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - s = src - 4; - d6 = vld1_u8(s); - s += pitch; - d8 = vld1_u8(s); - s += pitch; - d10 = vld1_u8(s); - s += pitch; - d12 = vld1_u8(s); - s += pitch; - d14 = vld1_u8(s); - s += pitch; - d16 = vld1_u8(s); - s += pitch; - d18 = vld1_u8(s); - s += pitch; - d20 = vld1_u8(s); - s += pitch; - d7 = vld1_u8(s); - s += pitch; - d9 = vld1_u8(s); - s += pitch; - d11 = vld1_u8(s); - s += pitch; - d13 = vld1_u8(s); - s += pitch; - d15 = vld1_u8(s); - s += pitch; - d17 = vld1_u8(s); - s += pitch; - d19 = vld1_u8(s); - s += pitch; - d21 = vld1_u8(s); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - q4ResultL.val[0] = vget_low_u8(q5); // d10 - q4ResultL.val[1] = vget_low_u8(q6); // d12 - q4ResultL.val[2] = vget_low_u8(q7); // d14 - q4ResultL.val[3] = vget_low_u8(q8); // d16 - q4ResultH.val[0] = vget_high_u8(q5); // d11 - q4ResultH.val[1] = vget_high_u8(q6); // d13 - q4ResultH.val[2] = vget_high_u8(q7); // d15 - q4ResultH.val[3] = vget_high_u8(q8); // d17 - - d = src - 2; - write_4x8(d, pitch, q4ResultL); - d += pitch * 8; - write_4x8(d, pitch, q4ResultH); -} - -void vp8_loop_filter_vertical_edge_uv_neon( - unsigned char *u, - int pitch, - unsigned char blimit, - unsigned char limit, - unsigned char thresh, - unsigned char *v) { - unsigned char *us, *ud; - unsigned char *vs, *vd; - uint8x16_t qblimit, qlimit, qthresh, q3, q4; - uint8x16_t q5, q6, q7, q8, q9, q10; - uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; - uint8x8_t d15, d16, d17, d18, d19, d20, d21; - uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; - uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; - uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; - uint8x8x4_t q4ResultH, q4ResultL; - - qblimit = vdupq_n_u8(blimit); - qlimit = vdupq_n_u8(limit); - qthresh = vdupq_n_u8(thresh); - - us = u - 4; - d6 = vld1_u8(us); - us += pitch; - d8 = vld1_u8(us); - us += pitch; - d10 = vld1_u8(us); - us += pitch; - d12 = vld1_u8(us); - us += pitch; - d14 = vld1_u8(us); - us += pitch; - d16 = vld1_u8(us); - us += pitch; - d18 = vld1_u8(us); - us += pitch; - d20 = vld1_u8(us); - - vs = v - 4; - d7 = vld1_u8(vs); - vs += pitch; - d9 = vld1_u8(vs); - vs += pitch; - d11 = vld1_u8(vs); - vs += pitch; - d13 = vld1_u8(vs); - vs += pitch; - d15 = vld1_u8(vs); - vs += pitch; - d17 = vld1_u8(vs); - vs += pitch; - d19 = vld1_u8(vs); - vs += pitch; - d21 = vld1_u8(vs); - - q3 = vcombine_u8(d6, d7); - q4 = vcombine_u8(d8, d9); - q5 = vcombine_u8(d10, d11); - q6 = vcombine_u8(d12, d13); - q7 = vcombine_u8(d14, d15); - q8 = vcombine_u8(d16, d17); - q9 = vcombine_u8(d18, d19); - q10 = vcombine_u8(d20, d21); - - q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); - q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); - q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); - q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); - - q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), - vreinterpretq_u16_u32(q2tmp2.val[0])); - q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), - vreinterpretq_u16_u32(q2tmp3.val[0])); - q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), - vreinterpretq_u16_u32(q2tmp2.val[1])); - q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), - vreinterpretq_u16_u32(q2tmp3.val[1])); - - q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), - vreinterpretq_u8_u16(q2tmp5.val[0])); - q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), - vreinterpretq_u8_u16(q2tmp5.val[1])); - q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), - vreinterpretq_u8_u16(q2tmp7.val[0])); - q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), - vreinterpretq_u8_u16(q2tmp7.val[1])); - - q3 = q2tmp8.val[0]; - q4 = q2tmp8.val[1]; - q5 = q2tmp9.val[0]; - q6 = q2tmp9.val[1]; - q7 = q2tmp10.val[0]; - q8 = q2tmp10.val[1]; - q9 = q2tmp11.val[0]; - q10 = q2tmp11.val[1]; - - vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, - q5, q6, q7, q8, q9, q10, - &q5, &q6, &q7, &q8); - - q4ResultL.val[0] = vget_low_u8(q5); // d10 - q4ResultL.val[1] = vget_low_u8(q6); // d12 - q4ResultL.val[2] = vget_low_u8(q7); // d14 - q4ResultL.val[3] = vget_low_u8(q8); // d16 - ud = u - 2; - write_4x8(ud, pitch, q4ResultL); - - q4ResultH.val[0] = vget_high_u8(q5); // d11 - q4ResultH.val[1] = vget_high_u8(q6); // d13 - q4ResultH.val[2] = vget_high_u8(q7); // d15 - q4ResultH.val[3] = vget_high_u8(q8); // d17 - vd = v - 2; - write_4x8(vd, pitch, q4ResultH); -} diff --git a/thirdparty/libvpx/vp8/common/blockd.c b/thirdparty/libvpx/vp8/common/blockd.c deleted file mode 100644 index 1fc3cd0ca7..0000000000 --- a/thirdparty/libvpx/vp8/common/blockd.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "blockd.h" -#include "vpx_mem/vpx_mem.h" - -const unsigned char vp8_block2left[25] = -{ - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; -const unsigned char vp8_block2above[25] = -{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 -}; diff --git a/thirdparty/libvpx/vp8/common/blockd.h b/thirdparty/libvpx/vp8/common/blockd.h deleted file mode 100644 index 192108a06d..0000000000 --- a/thirdparty/libvpx/vp8/common/blockd.h +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_BLOCKD_H_ -#define VP8_COMMON_BLOCKD_H_ - -void vpx_log(const char *format, ...); - -#include "vpx_config.h" -#include "vpx_scale/yv12config.h" -#include "mv.h" -#include "treecoder.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*#define DCPRED 1*/ -#define DCPREDSIMTHRESH 0 -#define DCPREDCNTTHRESH 3 - -#define MB_FEATURE_TREE_PROBS 3 -#define MAX_MB_SEGMENTS 4 - -#define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 4 - -/* Segment Feature Masks */ -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 - -typedef struct -{ - int r, c; -} POS; - -#define PLANE_TYPE_Y_NO_DC 0 -#define PLANE_TYPE_Y2 1 -#define PLANE_TYPE_UV 2 -#define PLANE_TYPE_Y_WITH_DC 3 - - -typedef char ENTROPY_CONTEXT; -typedef struct -{ - ENTROPY_CONTEXT y1[4]; - ENTROPY_CONTEXT u[2]; - ENTROPY_CONTEXT v[2]; - ENTROPY_CONTEXT y2; -} ENTROPY_CONTEXT_PLANES; - -extern const unsigned char vp8_block2left[25]; -extern const unsigned char vp8_block2above[25]; - -#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ - Dest = (A)+(B); - - -typedef enum -{ - KEY_FRAME = 0, - INTER_FRAME = 1 -} FRAME_TYPE; - -typedef enum -{ - DC_PRED, /* average of above and left pixels */ - V_PRED, /* vertical prediction */ - H_PRED, /* horizontal prediction */ - TM_PRED, /* Truemotion prediction */ - B_PRED, /* block based prediction, each block has its own prediction mode */ - - NEARESTMV, - NEARMV, - ZEROMV, - NEWMV, - SPLITMV, - - MB_MODE_COUNT -} MB_PREDICTION_MODE; - -/* Macroblock level features */ -typedef enum -{ - MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ - MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ - MB_LVL_MAX = 2 /* Number of MB level features supported */ - -} MB_LVL_FEATURES; - -/* Segment Feature Masks */ -#define SEGMENT_ALTQ 0x01 -#define SEGMENT_ALT_LF 0x02 - -#define VP8_YMODES (B_PRED + 1) -#define VP8_UV_MODES (TM_PRED + 1) - -#define VP8_MVREFS (1 + SPLITMV - NEARESTMV) - -typedef enum -{ - B_DC_PRED, /* average of above and left pixels */ - B_TM_PRED, - - B_VE_PRED, /* vertical prediction */ - B_HE_PRED, /* horizontal prediction */ - - B_LD_PRED, - B_RD_PRED, - - B_VR_PRED, - B_VL_PRED, - B_HD_PRED, - B_HU_PRED, - - LEFT4X4, - ABOVE4X4, - ZERO4X4, - NEW4X4, - - B_MODE_COUNT -} B_PREDICTION_MODE; - -#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */ -#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4) - -/* For keyframes, intra block modes are predicted by the (already decoded) - modes for the Y blocks to the left and above us; for interframes, there - is a single probability table. */ - -union b_mode_info -{ - B_PREDICTION_MODE as_mode; - int_mv mv; -}; - -typedef enum -{ - INTRA_FRAME = 0, - LAST_FRAME = 1, - GOLDEN_FRAME = 2, - ALTREF_FRAME = 3, - MAX_REF_FRAMES = 4 -} MV_REFERENCE_FRAME; - -typedef struct -{ - uint8_t mode, uv_mode; - uint8_t ref_frame; - uint8_t is_4x4; - int_mv mv; - - uint8_t partitioning; - uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ - uint8_t need_to_clamp_mvs; - uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ -} MB_MODE_INFO; - -typedef struct modeinfo -{ - MB_MODE_INFO mbmi; - union b_mode_info bmi[16]; -} MODE_INFO; - -#if CONFIG_MULTI_RES_ENCODING -/* The mb-level information needed to be stored for higher-resolution encoder */ -typedef struct -{ - MB_PREDICTION_MODE mode; - MV_REFERENCE_FRAME ref_frame; - int_mv mv; - int dissim; /* dissimilarity level of the macroblock */ -} LOWER_RES_MB_INFO; - -/* The frame-level information needed to be stored for higher-resolution - * encoder */ -typedef struct -{ - FRAME_TYPE frame_type; - int is_frame_dropped; - // The frame rate for the lowest resolution. - double low_res_framerate; - /* The frame number of each reference frames */ - unsigned int low_res_ref_frames[MAX_REF_FRAMES]; - // The video frame counter value for the key frame, for lowest resolution. - unsigned int key_frame_counter_value; - LOWER_RES_MB_INFO *mb_info; -} LOWER_RES_FRAME_INFO; -#endif - -typedef struct blockd -{ - short *qcoeff; - short *dqcoeff; - unsigned char *predictor; - short *dequant; - - int offset; - char *eob; - - union b_mode_info bmi; -} BLOCKD; - -typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch); - -typedef struct macroblockd -{ - DECLARE_ALIGNED(16, unsigned char, predictor[384]); - DECLARE_ALIGNED(16, short, qcoeff[400]); - DECLARE_ALIGNED(16, short, dqcoeff[400]); - DECLARE_ALIGNED(16, char, eobs[25]); - - DECLARE_ALIGNED(16, short, dequant_y1[16]); - DECLARE_ALIGNED(16, short, dequant_y1_dc[16]); - DECLARE_ALIGNED(16, short, dequant_y2[16]); - DECLARE_ALIGNED(16, short, dequant_uv[16]); - - /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ - BLOCKD block[25]; - int fullpixel_mask; - - YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ - YV12_BUFFER_CONFIG dst; - - MODE_INFO *mode_info_context; - int mode_info_stride; - - FRAME_TYPE frame_type; - - int up_available; - int left_available; - - unsigned char *recon_above[3]; - unsigned char *recon_left[3]; - int recon_left_stride[2]; - - /* Y,U,V,Y2 */ - ENTROPY_CONTEXT_PLANES *above_context; - ENTROPY_CONTEXT_PLANES *left_context; - - /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */ - unsigned char segmentation_enabled; - - /* 0 (do not update) 1 (update) the macroblock segmentation map. */ - unsigned char update_mb_segmentation_map; - - /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ - unsigned char update_mb_segmentation_data; - - /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ - unsigned char mb_segement_abs_delta; - - /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */ - /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ - vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */ - - signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */ - - /* mode_based Loop filter adjustment */ - unsigned char mode_ref_lf_delta_enabled; - unsigned char mode_ref_lf_delta_update; - - /* Delta values have the range +/- MAX_LOOP_FILTER */ - signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ - signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ - signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ - signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ - - /* Distance of MB away from frame edges */ - int mb_to_left_edge; - int mb_to_right_edge; - int mb_to_top_edge; - int mb_to_bottom_edge; - - - - vp8_subpix_fn_t subpixel_predict; - vp8_subpix_fn_t subpixel_predict8x4; - vp8_subpix_fn_t subpixel_predict8x8; - vp8_subpix_fn_t subpixel_predict16x16; - - void *current_bc; - - int corrupted; - -#if ARCH_X86 || ARCH_X86_64 - /* This is an intermediate buffer currently used in sub-pixel motion search - * to keep a copy of the reference area. This buffer can be used for other - * purpose. - */ - DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]); -#endif -} MACROBLOCKD; - - -extern void vp8_build_block_doffsets(MACROBLOCKD *x); -extern void vp8_setup_block_dptrs(MACROBLOCKD *x); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp8/common/coefupdateprobs.h b/thirdparty/libvpx/vp8/common/coefupdateprobs.h deleted file mode 100644 index d96a19e747..0000000000 --- a/thirdparty/libvpx/vp8/common/coefupdateprobs.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_COEFUPDATEPROBS_H_ -#define VP8_COMMON_COEFUPDATEPROBS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Update probabilities for the nodes in the token entropy tree. - Generated file included by entropy.c */ - -const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = -{ - { - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, }, - {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, }, - {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, }, - {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, }, - {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, }, - }, - { - {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, - { - { - {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, - {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, - {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - { - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, - }, - }, -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_COEFUPDATEPROBS_H_ diff --git a/thirdparty/libvpx/vp8/common/common.h b/thirdparty/libvpx/vp8/common/common.h deleted file mode 100644 index e58a9cc23b..0000000000 --- a/thirdparty/libvpx/vp8/common/common.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_COMMON_H_ -#define VP8_COMMON_COMMON_H_ - -#include <assert.h> - -/* Interface header for common constant data structures and lookup tables */ - -#include "vpx_mem/vpx_mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Only need this for fixed-size arrays, for structs just assign. */ - -#define vp8_copy( Dest, Src) { \ - assert( sizeof( Dest) == sizeof( Src)); \ - memcpy( Dest, Src, sizeof( Src)); \ - } - -/* Use this for variably-sized arrays. */ - -#define vp8_copy_array( Dest, Src, N) { \ - assert( sizeof( *Dest) == sizeof( *Src)); \ - memcpy( Dest, Src, N * sizeof( *Src)); \ - } - -#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest)); - -#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest)); - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_COMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/copy_c.c b/thirdparty/libvpx/vp8/common/copy_c.c deleted file mode 100644 index e3392913f6..0000000000 --- a/thirdparty/libvpx/vp8/common/copy_c.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <string.h> - -#include "./vp8_rtcd.h" -#include "vpx/vpx_integer.h" - -/* Copy 2 macroblocks to a buffer */ -void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride, - unsigned char *dst_ptr, int dst_stride, - int height) -{ - int r; - - for (r = 0; r < height; r++) - { - memcpy(dst_ptr, src_ptr, 32); - - src_ptr += src_stride; - dst_ptr += dst_stride; - - } -} diff --git a/thirdparty/libvpx/vp8/common/debugmodes.c b/thirdparty/libvpx/vp8/common/debugmodes.c deleted file mode 100644 index 159fddc6a7..0000000000 --- a/thirdparty/libvpx/vp8/common/debugmodes.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <stdio.h> -#include "blockd.h" - - -void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame) -{ - - int mb_row; - int mb_col; - int mb_index = 0; - FILE *mvs = fopen("mvs.stt", "a"); - - /* print out the macroblock Y modes */ - mb_index = 0; - fprintf(mvs, "Mb Modes for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - - fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode); - - mb_index++; - } - - fprintf(mvs, "\n"); - mb_index++; - } - - fprintf(mvs, "\n"); - - mb_index = 0; - fprintf(mvs, "Mb mv ref for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - - fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame); - - mb_index++; - } - - fprintf(mvs, "\n"); - mb_index++; - } - - fprintf(mvs, "\n"); - - /* print out the macroblock UV modes */ - mb_index = 0; - fprintf(mvs, "UV Modes for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - - fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode); - - mb_index++; - } - - mb_index++; - fprintf(mvs, "\n"); - } - - fprintf(mvs, "\n"); - - /* print out the block modes */ - fprintf(mvs, "Mbs for Frame %d\n", frame); - { - int b_row; - - for (b_row = 0; b_row < 4 * rows; b_row++) - { - int b_col; - int bindex; - - for (b_col = 0; b_col < 4 * cols; b_col++) - { - mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); - bindex = (b_row & 3) * 4 + (b_col & 3); - - if (mi[mb_index].mbmi.mode == B_PRED) - fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode); - else - fprintf(mvs, "xx "); - - } - - fprintf(mvs, "\n"); - } - } - fprintf(mvs, "\n"); - - /* print out the macroblock mvs */ - mb_index = 0; - fprintf(mvs, "MVs for Frame %d\n", frame); - - for (mb_row = 0; mb_row < rows; mb_row++) - { - for (mb_col = 0; mb_col < cols; mb_col++) - { - fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2); - - mb_index++; - } - - mb_index++; - fprintf(mvs, "\n"); - } - - fprintf(mvs, "\n"); - - - /* print out the block modes */ - fprintf(mvs, "MVs for Frame %d\n", frame); - { - int b_row; - - for (b_row = 0; b_row < 4 * rows; b_row++) - { - int b_col; - int bindex; - - for (b_col = 0; b_col < 4 * cols; b_col++) - { - mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); - bindex = (b_row & 3) * 4 + (b_col & 3); - fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col); - - } - - fprintf(mvs, "\n"); - } - } - fprintf(mvs, "\n"); - - - fclose(mvs); -} diff --git a/thirdparty/libvpx/vp8/common/default_coef_probs.h b/thirdparty/libvpx/vp8/common/default_coef_probs.h deleted file mode 100644 index 4d69e4be66..0000000000 --- a/thirdparty/libvpx/vp8/common/default_coef_probs.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. -*/ - -#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_ -#define VP8_COMMON_DEFAULT_COEF_PROBS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/*Generated file, included by entropy.c*/ - - -static const vp8_prob default_coef_probs [BLOCK_TYPES] - [COEF_BANDS] - [PREV_COEF_CONTEXTS] - [ENTROPY_NODES] = -{ - { /* Block Type ( 0 ) */ - { /* Coeff Band ( 0 )*/ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, - { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, - { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, - { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, - { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, - { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, - { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, - { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, - { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, - { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, - { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, - { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, - { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, - { /* Block Type ( 1 ) */ - { /* Coeff Band ( 0 )*/ - { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, - { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, - { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, - { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, - { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, - { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, - { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, - { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, - { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, - { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, - { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, - { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, - { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, - { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, - { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } - } - }, - { /* Block Type ( 2 ) */ - { /* Coeff Band ( 0 )*/ - { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, - { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, - { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, - { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, - { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, - { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, - { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, - { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, - { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, - { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } - } - }, - { /* Block Type ( 3 ) */ - { /* Coeff Band ( 0 )*/ - { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, - { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, - { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } - }, - { /* Coeff Band ( 1 )*/ - { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, - { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, - { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } - }, - { /* Coeff Band ( 2 )*/ - { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, - { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, - { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } - }, - { /* Coeff Band ( 3 )*/ - { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, - { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, - { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } - }, - { /* Coeff Band ( 4 )*/ - { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, - { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, - { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } - }, - { /* Coeff Band ( 5 )*/ - { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, - { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, - { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } - }, - { /* Coeff Band ( 6 )*/ - { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, - { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, - { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } - }, - { /* Coeff Band ( 7 )*/ - { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, - { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } - } - } -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_ diff --git a/thirdparty/libvpx/vp8/common/dequantize.c b/thirdparty/libvpx/vp8/common/dequantize.c deleted file mode 100644 index f8b04fa4ee..0000000000 --- a/thirdparty/libvpx/vp8/common/dequantize.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vp8/common/blockd.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_dequantize_b_c(BLOCKD *d, short *DQC) -{ - int i; - short *DQ = d->dqcoeff; - short *Q = d->qcoeff; - - for (i = 0; i < 16; i++) - { - DQ[i] = Q[i] * DQC[i]; - } -} - -void vp8_dequant_idct_add_c(short *input, short *dq, - unsigned char *dest, int stride) -{ - int i; - - for (i = 0; i < 16; i++) - { - input[i] = dq[i] * input[i]; - } - - vp8_short_idct4x4llm_c(input, dest, stride, dest, stride); - - memset(input, 0, 32); - -} diff --git a/thirdparty/libvpx/vp8/common/entropy.c b/thirdparty/libvpx/vp8/common/entropy.c deleted file mode 100644 index c00e565f06..0000000000 --- a/thirdparty/libvpx/vp8/common/entropy.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "entropy.h" -#include "blockd.h" -#include "onyxc_int.h" -#include "vpx_mem/vpx_mem.h" - -#include "coefupdateprobs.h" - -DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = -{ - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = -{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7}; - -DECLARE_ALIGNED(16, const unsigned char, - vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = -{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0}; - -DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = -{ - 0, 1, 4, 8, - 5, 2, 3, 6, - 9, 12, 13, 10, - 7, 11, 14, 15, -}; - -DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = -{ - 1, 2, 6, 7, - 3, 5, 8, 13, - 4, 9, 12, 14, - 10, 11, 15, 16 -}; - -/* vp8_default_zig_zag_mask generated with: - - void vp8_init_scan_order_mask() - { - int i; - - for (i = 0; i < 16; i++) - { - vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; - } - - } -*/ -DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) = -{ - 1, 2, 32, 64, - 4, 16, 128, 4096, - 8, 256, 2048, 8192, - 512, 1024, 16384, -32768 -}; - -const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; - -/* Array indices are identical to previously-existing CONTEXT_NODE indices */ - -const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */ -{ - -DCT_EOB_TOKEN, 2, /* 0 = EOB */ - -ZERO_TOKEN, 4, /* 1 = ZERO */ - -ONE_TOKEN, 6, /* 2 = ONE */ - 8, 12, /* 3 = LOW_VAL */ - -TWO_TOKEN, 10, /* 4 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 14, 16, /* 6 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ - 18, 20, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ -}; - -/* vp8_coef_encodings generated with: - vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); -*/ -vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = -{ - {2, 2}, - {6, 3}, - {28, 5}, - {58, 6}, - {59, 6}, - {60, 6}, - {61, 6}, - {124, 7}, - {125, 7}, - {126, 7}, - {127, 7}, - {0, 1} -}; - -/* Trees for extra bits. Probabilities are constant and - do not depend on previously encoded bits */ - -static const vp8_prob Pcat1[] = { 159}; -static const vp8_prob Pcat2[] = { 165, 145}; -static const vp8_prob Pcat3[] = { 173, 148, 140}; -static const vp8_prob Pcat4[] = { 176, 155, 140, 135}; -static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130}; -static const vp8_prob Pcat6[] = -{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129}; - - -/* tree index tables generated with: - - void init_bit_tree(vp8_tree_index *p, int n) - { - int i = 0; - - while (++i < n) - { - p[0] = p[1] = i << 1; - p += 2; - } - - p[0] = p[1] = 0; - } - - void init_bit_trees() - { - init_bit_tree(cat1, 1); - init_bit_tree(cat2, 2); - init_bit_tree(cat3, 3); - init_bit_tree(cat4, 4); - init_bit_tree(cat5, 5); - init_bit_tree(cat6, 11); - } -*/ - -static const vp8_tree_index cat1[2] = { 0, 0 }; -static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 }; -static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 }; -static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 }; -static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 }; -static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, - 14, 14, 16, 16, 18, 18, 20, 20, 0, 0 }; - -const vp8_extra_bit_struct vp8_extra_bits[12] = -{ - { 0, 0, 0, 0}, - { 0, 0, 0, 1}, - { 0, 0, 0, 2}, - { 0, 0, 0, 3}, - { 0, 0, 0, 4}, - { cat1, Pcat1, 1, 5}, - { cat2, Pcat2, 2, 7}, - { cat3, Pcat3, 3, 11}, - { cat4, Pcat4, 4, 19}, - { cat5, Pcat5, 5, 35}, - { cat6, Pcat6, 11, 67}, - { 0, 0, 0, 0} -}; - -#include "default_coef_probs.h" - -void vp8_default_coef_probs(VP8_COMMON *pc) -{ - memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs)); -} - diff --git a/thirdparty/libvpx/vp8/common/entropy.h b/thirdparty/libvpx/vp8/common/entropy.h deleted file mode 100644 index a90bab4bac..0000000000 --- a/thirdparty/libvpx/vp8/common/entropy.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ENTROPY_H_ -#define VP8_COMMON_ENTROPY_H_ - -#include "treecoder.h" -#include "blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Coefficient token alphabet */ - -#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ -#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ -#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ -#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ -#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ -#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ -#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ -#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ -#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ -#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ -#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ -#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ - -#define MAX_ENTROPY_TOKENS 12 -#define ENTROPY_NODES 11 - -extern const vp8_tree_index vp8_coef_tree[]; - -extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS]; - -typedef struct -{ - vp8_tree_p tree; - const vp8_prob *prob; - int Len; - int base_val; -} vp8_extra_bit_struct; - -extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ - -#define PROB_UPDATE_BASELINE_COST 7 - -#define MAX_PROB 255 -#define DCT_MAX_VALUE 2048 - - -/* Coefficients are predicted via a 3-dimensional probability table. */ - -/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ - -#define BLOCK_TYPES 4 - -/* Middle dimension is a coarsening of the coefficient's - position within the 4x4 DCT. */ - -#define COEF_BANDS 8 -extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); - -/* Inside dimension is 3-valued measure of nearby complexity, that is, - the extent to which nearby coefficients are nonzero. For the first - coefficient (DC, unless block type is 0), we look at the (already encoded) - blocks above and to the left of the current block. The context index is - then the number (0,1,or 2) of these blocks having nonzero coefficients. - After decoding a coefficient, the measure is roughly the size of the - most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1). - Note that the intuitive meaning of this measure changes as coefficients - are decoded, e.g., prior to the first token, a zero means that my neighbors - are empty while, after the first token, because of the use of end-of-block, - a zero means we just decoded a zero and hence guarantees that a non-zero - coefficient will appear later in this block. However, this shift - in meaning is perfectly OK because our context depends also on the - coefficient band (and since zigzag positions 0, 1, and 2 are in - distinct bands). */ - -/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ -# define PREV_COEF_CONTEXTS 3 - -extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); - -extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - - -struct VP8Common; -void vp8_default_coef_probs(struct VP8Common *); - -extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); -extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); -extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]); -extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; - -void vp8_coef_tree_initialize(void); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp8/common/entropymode.c b/thirdparty/libvpx/vp8/common/entropymode.c deleted file mode 100644 index 8981a8d3c2..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymode.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#define USE_PREBUILT_TABLES - -#include "entropymode.h" -#include "entropy.h" -#include "vpx_mem/vpx_mem.h" - -#include "vp8_entropymodedata.h" - -int vp8_mv_cont(const int_mv *l, const int_mv *a) -{ - int lez = (l->as_int == 0); - int aez = (a->as_int == 0); - int lea = (l->as_int == a->as_int); - - if (lea && lez) - return SUBMVREF_LEFT_ABOVE_ZED; - - if (lea) - return SUBMVREF_LEFT_ABOVE_SAME; - - if (aez) - return SUBMVREF_ABOVE_ZED; - - if (lez) - return SUBMVREF_LEFT_ZED; - - return SUBMVREF_NORMAL; -} - -static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25}; - -const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] = -{ - { 147, 136, 18 }, - { 106, 145, 1 }, - { 179, 121, 1 }, - { 223, 1 , 34 }, - { 208, 1 , 1 } -}; - - - -const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] = -{ - { - 0, 0, 0, 0, - 0, 0, 0, 0, - 1, 1, 1, 1, - 1, 1, 1, 1, - }, - { - 0, 0, 1, 1, - 0, 0, 1, 1, - 0, 0, 1, 1, - 0, 0, 1, 1, - }, - { - 0, 0, 1, 1, - 0, 0, 1, 1, - 2, 2, 3, 3, - 2, 2, 3, 3, - }, - { - 0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15, - } -}; - -const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16}; - -const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150}; - - -/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ - -const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */ -{ - -B_DC_PRED, 2, /* 0 = DC_NODE */ - -B_TM_PRED, 4, /* 1 = TM_NODE */ - -B_VE_PRED, 6, /* 2 = VE_NODE */ - 8, 12, /* 3 = COM_NODE */ - -B_HE_PRED, 10, /* 4 = HE_NODE */ - -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ - -B_LD_PRED, 14, /* 6 = LD_NODE */ - -B_VL_PRED, 16, /* 7 = VL_NODE */ - -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ -}; - -/* Again, these trees use the same probability indices as their - explicitly-programmed predecessors. */ - -const vp8_tree_index vp8_ymode_tree[8] = -{ - -DC_PRED, 2, - 4, 6, - -V_PRED, -H_PRED, - -TM_PRED, -B_PRED -}; - -const vp8_tree_index vp8_kf_ymode_tree[8] = -{ - -B_PRED, 2, - 4, 6, - -DC_PRED, -V_PRED, - -H_PRED, -TM_PRED -}; - -const vp8_tree_index vp8_uv_mode_tree[6] = -{ - -DC_PRED, 2, - -V_PRED, 4, - -H_PRED, -TM_PRED -}; - -const vp8_tree_index vp8_mbsplit_tree[6] = -{ - -3, 2, - -2, 4, - -0, -1 -}; - -const vp8_tree_index vp8_mv_ref_tree[8] = -{ - -ZEROMV, 2, - -NEARESTMV, 4, - -NEARMV, 6, - -NEWMV, -SPLITMV -}; - -const vp8_tree_index vp8_sub_mv_ref_tree[6] = -{ - -LEFT4X4, 2, - -ABOVE4X4, 4, - -ZERO4X4, -NEW4X4 -}; - -const vp8_tree_index vp8_small_mvtree [14] = -{ - 2, 8, - 4, 6, - -0, -1, - -2, -3, - 10, 12, - -4, -5, - -6, -7 -}; - -void vp8_init_mbmode_probs(VP8_COMMON *x) -{ - memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); - memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); - memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); -} - -void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1]) -{ - memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob)); -} - diff --git a/thirdparty/libvpx/vp8/common/entropymode.h b/thirdparty/libvpx/vp8/common/entropymode.h deleted file mode 100644 index 81bdfc4b8b..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymode.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ENTROPYMODE_H_ -#define VP8_COMMON_ENTROPYMODE_H_ - -#include "onyxc_int.h" -#include "treecoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum -{ - SUBMVREF_NORMAL, - SUBMVREF_LEFT_ZED, - SUBMVREF_ABOVE_ZED, - SUBMVREF_LEFT_ABOVE_SAME, - SUBMVREF_LEFT_ABOVE_ZED -} sumvfref_t; - -typedef int vp8_mbsplit[16]; - -#define VP8_NUMMBSPLITS 4 - -extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS]; - -extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */ - -extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1]; - -extern int vp8_mv_cont(const int_mv *l, const int_mv *a); -#define SUBMVREF_COUNT 5 -extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1]; - - -extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES]; - - -extern const vp8_tree_index vp8_bmode_tree[]; - -extern const vp8_tree_index vp8_ymode_tree[]; -extern const vp8_tree_index vp8_kf_ymode_tree[]; -extern const vp8_tree_index vp8_uv_mode_tree[]; - -extern const vp8_tree_index vp8_mbsplit_tree[]; -extern const vp8_tree_index vp8_mv_ref_tree[]; -extern const vp8_tree_index vp8_sub_mv_ref_tree[]; - -extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES]; -extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES]; -extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES]; -extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES]; -extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS]; - -/* Inter mode values do not start at zero */ - -extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS]; -extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS]; - -extern const vp8_tree_index vp8_small_mvtree[]; - -extern const struct vp8_token_struct vp8_small_mvencodings[8]; - -/* Key frame default mode probs */ -extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] -[VP8_BINTRAMODES-1]; -extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1]; -extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1]; - -void vp8_init_mbmode_probs(VP8_COMMON *x); -void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]); -void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp8/common/entropymv.c b/thirdparty/libvpx/vp8/common/entropymv.c deleted file mode 100644 index e5df1f0955..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymv.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "entropymv.h" - -const MV_CONTEXT vp8_mv_update_probs[2] = -{ - {{ - 237, - 246, - 253, 253, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 - }}, - {{ - 231, - 243, - 245, 253, 254, 254, 254, 254, 254, - 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 - }} -}; -const MV_CONTEXT vp8_default_mv_context[2] = -{ - {{ - /* row */ - 162, /* is short */ - 128, /* sign */ - 225, 146, 172, 147, 214, 39, 156, /* short tree */ - 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ - }}, - - - - {{ - /* same for column */ - 164, /* is short */ - 128, - 204, 170, 119, 235, 140, 230, 228, - 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ - - }} -}; diff --git a/thirdparty/libvpx/vp8/common/entropymv.h b/thirdparty/libvpx/vp8/common/entropymv.h deleted file mode 100644 index 42840d58ad..0000000000 --- a/thirdparty/libvpx/vp8/common/entropymv.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ENTROPYMV_H_ -#define VP8_COMMON_ENTROPYMV_H_ - -#include "treecoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum -{ - mv_max = 1023, /* max absolute value of a MV component */ - MVvals = (2 * mv_max) + 1, /* # possible values "" */ - mvfp_max = 255, /* max absolute value of a full pixel MV component */ - MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */ - - mvlong_width = 10, /* Large MVs have 9 bit magnitudes */ - mvnum_short = 8, /* magnitudes 0 through 7 */ - - /* probability offsets for coding each MV component */ - - mvpis_short = 0, /* short (<= 7) vs long (>= 8) */ - MVPsign, /* sign for non-zero */ - MVPshort, /* 8 short values = 7-position tree */ - - MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */ - MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */ -}; - -typedef struct mv_context -{ - vp8_prob prob[MVPcount]; /* often come in row, col pairs */ -} MV_CONTEXT; - -extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp8/common/extend.c b/thirdparty/libvpx/vp8/common/extend.c deleted file mode 100644 index 2d938ad782..0000000000 --- a/thirdparty/libvpx/vp8/common/extend.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "extend.h" -#include "vpx_mem/vpx_mem.h" - - -static void copy_and_extend_plane -( - unsigned char *s, /* source */ - int sp, /* source pitch */ - unsigned char *d, /* destination */ - int dp, /* destination pitch */ - int h, /* height */ - int w, /* width */ - int et, /* extend top border */ - int el, /* extend left border */ - int eb, /* extend bottom border */ - int er /* extend right border */ -) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - int linesize; - - /* copy the left and right most columns out */ - src_ptr1 = s; - src_ptr2 = s + w - 1; - dest_ptr1 = d - el; - dest_ptr2 = d + w; - - for (i = 0; i < h; i++) - { - memset(dest_ptr1, src_ptr1[0], el); - memcpy(dest_ptr1 + el, src_ptr1, w); - memset(dest_ptr2, src_ptr2[0], er); - src_ptr1 += sp; - src_ptr2 += sp; - dest_ptr1 += dp; - dest_ptr2 += dp; - } - - /* Now copy the top and bottom lines into each line of the respective - * borders - */ - src_ptr1 = d - el; - src_ptr2 = d + dp * (h - 1) - el; - dest_ptr1 = d + dp * (-et) - el; - dest_ptr2 = d + dp * (h) - el; - linesize = el + er + w; - - for (i = 0; i < et; i++) - { - memcpy(dest_ptr1, src_ptr1, linesize); - dest_ptr1 += dp; - } - - for (i = 0; i < eb; i++) - { - memcpy(dest_ptr2, src_ptr2, linesize); - dest_ptr2 += dp; - } -} - - -void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) -{ - int et = dst->border; - int el = dst->border; - int eb = dst->border + dst->y_height - src->y_height; - int er = dst->border + dst->y_width - src->y_width; - - copy_and_extend_plane(src->y_buffer, src->y_stride, - dst->y_buffer, dst->y_stride, - src->y_height, src->y_width, - et, el, eb, er); - - et = dst->border >> 1; - el = dst->border >> 1; - eb = (dst->border >> 1) + dst->uv_height - src->uv_height; - er = (dst->border >> 1) + dst->uv_width - src->uv_width; - - copy_and_extend_plane(src->u_buffer, src->uv_stride, - dst->u_buffer, dst->uv_stride, - src->uv_height, src->uv_width, - et, el, eb, er); - - copy_and_extend_plane(src->v_buffer, src->uv_stride, - dst->v_buffer, dst->uv_stride, - src->uv_height, src->uv_width, - et, el, eb, er); -} - - -void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int srcy, int srcx, - int srch, int srcw) -{ - int et = dst->border; - int el = dst->border; - int eb = dst->border + dst->y_height - src->y_height; - int er = dst->border + dst->y_width - src->y_width; - int src_y_offset = srcy * src->y_stride + srcx; - int dst_y_offset = srcy * dst->y_stride + srcx; - int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); - int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - - /* If the side is not touching the bounder then don't extend. */ - if (srcy) - et = 0; - if (srcx) - el = 0; - if (srcy + srch != src->y_height) - eb = 0; - if (srcx + srcw != src->y_width) - er = 0; - - copy_and_extend_plane(src->y_buffer + src_y_offset, - src->y_stride, - dst->y_buffer + dst_y_offset, - dst->y_stride, - srch, srcw, - et, el, eb, er); - - et = (et + 1) >> 1; - el = (el + 1) >> 1; - eb = (eb + 1) >> 1; - er = (er + 1) >> 1; - srch = (srch + 1) >> 1; - srcw = (srcw + 1) >> 1; - - copy_and_extend_plane(src->u_buffer + src_uv_offset, - src->uv_stride, - dst->u_buffer + dst_uv_offset, - dst->uv_stride, - srch, srcw, - et, el, eb, er); - - copy_and_extend_plane(src->v_buffer + src_uv_offset, - src->uv_stride, - dst->v_buffer + dst_uv_offset, - dst->uv_stride, - srch, srcw, - et, el, eb, er); -} - - -/* note the extension is only for the last row, for intra prediction purpose */ -void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, - unsigned char *YPtr, - unsigned char *UPtr, - unsigned char *VPtr) -{ - int i; - - YPtr += ybf->y_stride * 14; - UPtr += ybf->uv_stride * 6; - VPtr += ybf->uv_stride * 6; - - for (i = 0; i < 4; i++) - { - YPtr[i] = YPtr[-1]; - UPtr[i] = UPtr[-1]; - VPtr[i] = VPtr[-1]; - } - - YPtr += ybf->y_stride; - UPtr += ybf->uv_stride; - VPtr += ybf->uv_stride; - - for (i = 0; i < 4; i++) - { - YPtr[i] = YPtr[-1]; - UPtr[i] = UPtr[-1]; - VPtr[i] = VPtr[-1]; - } -} diff --git a/thirdparty/libvpx/vp8/common/extend.h b/thirdparty/libvpx/vp8/common/extend.h deleted file mode 100644 index 068f4ac523..0000000000 --- a/thirdparty/libvpx/vp8/common/extend.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_EXTEND_H_ -#define VP8_COMMON_EXTEND_H_ - -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr); -void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst); -void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int srcy, int srcx, - int srch, int srcw); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_EXTEND_H_ diff --git a/thirdparty/libvpx/vp8/common/filter.c b/thirdparty/libvpx/vp8/common/filter.c deleted file mode 100644 index 84c608effa..0000000000 --- a/thirdparty/libvpx/vp8/common/filter.c +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "filter.h" -#include "./vp8_rtcd.h" - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = -{ - { 128, 0 }, - { 112, 16 }, - { 96, 32 }, - { 80, 48 }, - { 64, 64 }, - { 48, 80 }, - { 32, 96 }, - { 16, 112 } -}; - -DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = -{ - - { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ - { 0, -6, 123, 12, -1, 0 }, - { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ - { 0, -9, 93, 50, -6, 0 }, - { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ - { 0, -6, 50, 93, -9, 0 }, - { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ - { 0, -1, 12, 123, -6, 0 }, -}; - -static void filter_block2d_first_pass -( - unsigned char *src_ptr, - int *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + - ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - - /* Normalize back to 0-255 */ - Temp = Temp >> VP8_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = Temp; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_width; - } -} - -static void filter_block2d_second_pass -( - int *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < output_height; i++) - { - for (j = 0; j < output_width; j++) - { - /* Apply filter */ - Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + - ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + - ((int)src_ptr[0] * vp8_filter[2]) + - ((int)src_ptr[pixel_step] * vp8_filter[3]) + - ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + - ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - - /* Normalize back to 0-255 */ - Temp = Temp >> VP8_FILTER_SHIFT; - - if (Temp < 0) - Temp = 0; - else if (Temp > 255) - Temp = 255; - - output_ptr[j] = (unsigned char)Temp; - src_ptr++; - } - - /* Start next row */ - src_ptr += src_pixels_per_line - output_width; - output_ptr += output_pitch; - } -} - - -static void filter_block2d -( - unsigned char *src_ptr, - unsigned char *output_ptr, - unsigned int src_pixels_per_line, - int output_pitch, - const short *HFilter, - const short *VFilter -) -{ - int FData[9*4]; /* Temp data buffer used in filtering */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); -} - - -void vp8_sixtap_predict4x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); -} -void vp8_sixtap_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); - - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); - -} - -void vp8_sixtap_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[13*16]; /* Temp data buffer used in filtering */ - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); - - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); - -} - -void vp8_sixtap_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - int FData[21*24]; /* Temp data buffer used in filtering */ - - - HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ - VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ - - /* First filter 1-D horizontally... */ - filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); - - /* then filter verticaly... */ - filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); - -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_stride : Stride of source block. - * UINT32 height : Block height. - * UINT32 width : Block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block - * in the horizontal direction to produce the filtered output - * block. Used to implement first-pass of 2-D separable filter. - * - * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. - * Two filter taps should sum to VP8_FILTER_WEIGHT. - * - ****************************************************************************/ -static void filter_block2d_bil_first_pass -( - unsigned char *src_ptr, - unsigned short *dst_ptr, - unsigned int src_stride, - unsigned int height, - unsigned int width, - const short *vp8_filter -) -{ - unsigned int i, j; - - for (i = 0; i < height; i++) - { - for (j = 0; j < width; j++) - { - /* Apply bilinear filter */ - dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[1] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; - src_ptr++; - } - - /* Next row... */ - src_ptr += src_stride - width; - dst_ptr += width; - } -} - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : INT32 *src_ptr : Pointer to source block. - * UINT32 dst_pitch : Destination block pitch. - * UINT32 height : Block height. - * UINT32 width : Block width. - * INT32 *vp8_filter : Array of 2 bi-linear filter taps. - * - * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block - * in the vertical direction to produce the filtered output - * block. Used to implement second-pass of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. - * Two filter taps should sum to VP8_FILTER_WEIGHT. - * - ****************************************************************************/ -static void filter_block2d_bil_second_pass -( - unsigned short *src_ptr, - unsigned char *dst_ptr, - int dst_pitch, - unsigned int height, - unsigned int width, - const short *vp8_filter -) -{ - unsigned int i, j; - int Temp; - - for (i = 0; i < height; i++) - { - for (j = 0; j < width; j++) - { - /* Apply filter */ - Temp = ((int)src_ptr[0] * vp8_filter[0]) + - ((int)src_ptr[width] * vp8_filter[1]) + - (VP8_FILTER_WEIGHT / 2); - dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); - src_ptr++; - } - - /* Next row... */ - dst_ptr += dst_pitch; - } -} - - -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil - * - * INPUTS : UINT8 *src_ptr : Pointer to source block. - * UINT32 src_pitch : Stride of source block. - * UINT32 dst_pitch : Stride of destination block. - * INT32 *HFilter : Array of 2 horizontal filter taps. - * INT32 *VFilter : Array of 2 vertical filter taps. - * INT32 Width : Block width - * INT32 Height : Block height - * - * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : 2-D filters an input block by applying a 2-tap - * bi-linear filter horizontally followed by a 2-tap - * bi-linear filter vertically on the result. - * - * SPECIAL NOTES : The largest block size can be handled here is 16x16 - * - ****************************************************************************/ -static void filter_block2d_bil -( - unsigned char *src_ptr, - unsigned char *dst_ptr, - unsigned int src_pitch, - unsigned int dst_pitch, - const short *HFilter, - const short *VFilter, - int Width, - int Height -) -{ - - unsigned short FData[17*16]; /* Temp data buffer used in filtering */ - - /* First filter 1-D horizontally... */ - filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); - - /* then 1-D vertically... */ - filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); -} - - -void vp8_bilinear_predict4x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; -#if 0 - { - int i; - unsigned char temp1[16]; - unsigned char temp2[16]; - - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - - for (i = 0; i < 16; i++) - { - if (temp1[i] != temp2[i]) - { - bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); - filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); - } - } - } -#endif - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); - -} - -void vp8_bilinear_predict8x8_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); - -} - -void vp8_bilinear_predict8x4_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); - -} - -void vp8_bilinear_predict16x16_c -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - const short *HFilter; - const short *VFilter; - - HFilter = vp8_bilinear_filters[xoffset]; - VFilter = vp8_bilinear_filters[yoffset]; - - filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); -} diff --git a/thirdparty/libvpx/vp8/common/filter.h b/thirdparty/libvpx/vp8/common/filter.h deleted file mode 100644 index cfba775fce..0000000000 --- a/thirdparty/libvpx/vp8/common/filter.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_FILTER_H_ -#define VP8_COMMON_FILTER_H_ - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define BLOCK_HEIGHT_WIDTH 4 -#define VP8_FILTER_WEIGHT 128 -#define VP8_FILTER_SHIFT 7 - -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]); -extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_FILTER_H_ diff --git a/thirdparty/libvpx/vp8/common/findnearmv.c b/thirdparty/libvpx/vp8/common/findnearmv.c deleted file mode 100644 index e8ee40f56c..0000000000 --- a/thirdparty/libvpx/vp8/common/findnearmv.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "findnearmv.h" - -const unsigned char vp8_mbsplit_offset[4][16] = { - { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} -}; - -/* Predict motion vectors using those from already-decoded nearby blocks. - Note that we only consider one 4x4 subblock from each candidate 16x16 - macroblock. */ -void vp8_find_near_mvs -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv *nearest, - int_mv *nearby, - int_mv *best_mv, - int cnt[4], - int refframe, - int *ref_frame_sign_bias -) -{ - const MODE_INFO *above = here - xd->mode_info_stride; - const MODE_INFO *left = here - 1; - const MODE_INFO *aboveleft = above - 1; - int_mv near_mvs[4]; - int_mv *mv = near_mvs; - int *cntx = cnt; - enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV}; - - /* Zero accumulators */ - mv[0].as_int = mv[1].as_int = mv[2].as_int = 0; - cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; - - /* Process above */ - if (above->mbmi.ref_frame != INTRA_FRAME) - { - if (above->mbmi.mv.as_int) - { - (++mv)->as_int = above->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias); - ++cntx; - } - - *cntx += 2; - } - - /* Process left */ - if (left->mbmi.ref_frame != INTRA_FRAME) - { - if (left->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = left->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != mv->as_int) - { - (++mv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 2; - } - else - cnt[CNT_INTRA] += 2; - } - - /* Process above left */ - if (aboveleft->mbmi.ref_frame != INTRA_FRAME) - { - if (aboveleft->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = aboveleft->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != mv->as_int) - { - (++mv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 1; - } - else - cnt[CNT_INTRA] += 1; - } - - /* If we have three distinct MV's ... */ - if (cnt[CNT_SPLITMV]) - { - /* See if above-left MV can be merged with NEAREST */ - if (mv->as_int == near_mvs[CNT_NEAREST].as_int) - cnt[CNT_NEAREST] += 1; - } - - cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) - + (left->mbmi.mode == SPLITMV)) * 2 - + (aboveleft->mbmi.mode == SPLITMV); - - /* Swap near and nearest if necessary */ - if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) - { - int tmp; - tmp = cnt[CNT_NEAREST]; - cnt[CNT_NEAREST] = cnt[CNT_NEAR]; - cnt[CNT_NEAR] = tmp; - tmp = near_mvs[CNT_NEAREST].as_int; - near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; - near_mvs[CNT_NEAR].as_int = tmp; - } - - /* Use near_mvs[0] to store the "best" MV */ - if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]) - near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST]; - - /* Set up return values */ - best_mv->as_int = near_mvs[0].as_int; - nearest->as_int = near_mvs[CNT_NEAREST].as_int; - nearby->as_int = near_mvs[CNT_NEAR].as_int; -} - - -static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd) -{ - inv->as_mv.row = src->as_mv.row * -1; - inv->as_mv.col = src->as_mv.col * -1; - vp8_clamp_mv2(inv, xd); - vp8_clamp_mv2(src, xd); -} - - -int vp8_find_near_mvs_bias -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv mode_mv_sb[2][MB_MODE_COUNT], - int_mv best_mv_sb[2], - int cnt[4], - int refframe, - int *ref_frame_sign_bias -) -{ - int sign_bias = ref_frame_sign_bias[refframe]; - - vp8_find_near_mvs(xd, - here, - &mode_mv_sb[sign_bias][NEARESTMV], - &mode_mv_sb[sign_bias][NEARMV], - &best_mv_sb[sign_bias], - cnt, - refframe, - ref_frame_sign_bias); - - invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV], - &mode_mv_sb[sign_bias][NEARESTMV], xd); - invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV], - &mode_mv_sb[sign_bias][NEARMV], xd); - invert_and_clamp_mvs(&best_mv_sb[!sign_bias], - &best_mv_sb[sign_bias], xd); - - return sign_bias; -} - - -vp8_prob *vp8_mv_ref_probs( - vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4] -) -{ - p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0]; - p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1]; - p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2]; - p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3]; - /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/ - return p; -} - diff --git a/thirdparty/libvpx/vp8/common/findnearmv.h b/thirdparty/libvpx/vp8/common/findnearmv.h deleted file mode 100644 index 472a7b5d8d..0000000000 --- a/thirdparty/libvpx/vp8/common/findnearmv.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_FINDNEARMV_H_ -#define VP8_COMMON_FINDNEARMV_H_ - -#include "./vpx_config.h" -#include "mv.h" -#include "blockd.h" -#include "modecont.h" -#include "treecoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe, - int_mv *mvp, const int *ref_frame_sign_bias) -{ - if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) - { - mvp->as_mv.row *= -1; - mvp->as_mv.col *= -1; - } -} - -#define LEFT_TOP_MARGIN (16 << 3) -#define RIGHT_BOTTOM_MARGIN (16 << 3) -static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) -{ - if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) - mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) - mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) - mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) - mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; -} - -static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, - int mb_to_right_edge, int mb_to_top_edge, - int mb_to_bottom_edge) -{ - mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ? - mb_to_left_edge : mv->as_mv.col; - mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ? - mb_to_right_edge : mv->as_mv.col; - mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ? - mb_to_top_edge : mv->as_mv.row; - mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ? - mb_to_bottom_edge : mv->as_mv.row; -} -static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, - int mb_to_right_edge, - int mb_to_top_edge, - int mb_to_bottom_edge) -{ - unsigned int need_to_clamp; - need_to_clamp = (mv->as_mv.col < mb_to_left_edge); - need_to_clamp |= (mv->as_mv.col > mb_to_right_edge); - need_to_clamp |= (mv->as_mv.row < mb_to_top_edge); - need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge); - return need_to_clamp; -} - -void vp8_find_near_mvs -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv *nearest, int_mv *nearby, int_mv *best, - int near_mv_ref_cts[4], - int refframe, - int *ref_frame_sign_bias -); - - -int vp8_find_near_mvs_bias -( - MACROBLOCKD *xd, - const MODE_INFO *here, - int_mv mode_mv_sb[2][MB_MODE_COUNT], - int_mv best_mv_sb[2], - int cnt[4], - int refframe, - int *ref_frame_sign_bias -); - - -vp8_prob *vp8_mv_ref_probs( - vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4] -); - -extern const unsigned char vp8_mbsplit_offset[4][16]; - - -static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) -{ - if (!(b & 3)) - { - /* On L edge, get from MB to left of us */ - --cur_mb; - - if(cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.mv.as_int; - b += 4; - } - - return (cur_mb->bmi + b - 1)->mv.as_int; -} - -static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b, - int mi_stride) -{ - if (!(b >> 2)) - { - /* On top edge, get from MB above us */ - cur_mb -= mi_stride; - - if(cur_mb->mbmi.mode != SPLITMV) - return cur_mb->mbmi.mv.as_int; - b += 16; - } - - return (cur_mb->bmi + (b - 4))->mv.as_int; -} -static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) -{ - if (!(b & 3)) - { - /* On L edge, get from MB to left of us */ - --cur_mb; - switch (cur_mb->mbmi.mode) - { - case B_PRED: - return (cur_mb->bmi + b + 3)->as_mode; - case DC_PRED: - return B_DC_PRED; - case V_PRED: - return B_VE_PRED; - case H_PRED: - return B_HE_PRED; - case TM_PRED: - return B_TM_PRED; - default: - return B_DC_PRED; - } - } - - return (cur_mb->bmi + b - 1)->as_mode; -} - -static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, - int mi_stride) -{ - if (!(b >> 2)) - { - /* On top edge, get from MB above us */ - cur_mb -= mi_stride; - - switch (cur_mb->mbmi.mode) - { - case B_PRED: - return (cur_mb->bmi + b + 12)->as_mode; - case DC_PRED: - return B_DC_PRED; - case V_PRED: - return B_VE_PRED; - case H_PRED: - return B_HE_PRED; - case TM_PRED: - return B_TM_PRED; - default: - return B_DC_PRED; - } - } - - return (cur_mb->bmi + b - 4)->as_mode; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_FINDNEARMV_H_ diff --git a/thirdparty/libvpx/vp8/common/generic/systemdependent.c b/thirdparty/libvpx/vp8/common/generic/systemdependent.c deleted file mode 100644 index 6d5f302d7a..0000000000 --- a/thirdparty/libvpx/vp8/common/generic/systemdependent.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#if ARCH_ARM -#include "vpx_ports/arm.h" -#elif ARCH_X86 || ARCH_X86_64 -#include "vpx_ports/x86.h" -#endif -#include "vp8/common/onyxc_int.h" -#include "vp8/common/systemdependent.h" - -#if CONFIG_MULTITHREAD -#if HAVE_UNISTD_H && !defined(__OS2__) -#include <unistd.h> -#elif defined(_WIN32) -#include <windows.h> -typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); -#elif defined(__OS2__) -#define INCL_DOS -#define INCL_DOSSPINLOCK -#include <os2.h> -#endif -#endif - -#if CONFIG_MULTITHREAD -static int get_cpu_count() -{ - int core_count = 16; - -#if HAVE_UNISTD_H && !defined(__OS2__) -#if defined(_SC_NPROCESSORS_ONLN) - core_count = sysconf(_SC_NPROCESSORS_ONLN); -#elif defined(_SC_NPROC_ONLN) - core_count = sysconf(_SC_NPROC_ONLN); -#endif -#elif defined(_WIN32) - { -#if _WIN32_WINNT >= 0x0501 - SYSTEM_INFO sysinfo; - GetNativeSystemInfo(&sysinfo); -#else - PGNSI pGNSI; - SYSTEM_INFO sysinfo; - - /* Call GetNativeSystemInfo if supported or - * GetSystemInfo otherwise. */ - - pGNSI = (PGNSI) GetProcAddress( - GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); - if (pGNSI != NULL) - pGNSI(&sysinfo); - else - GetSystemInfo(&sysinfo); -#endif - - core_count = sysinfo.dwNumberOfProcessors; - } -#elif defined(__OS2__) - { - ULONG proc_id; - ULONG status; - - core_count = 0; - for (proc_id = 1; ; proc_id++) - { - if (DosGetProcessorStatus(proc_id, &status)) - break; - - if (status == PROC_ONLINE) - core_count++; - } - } -#else - /* other platforms */ -#endif - - return core_count > 0 ? core_count : 1; -} -#endif - -void vp8_clear_system_state_c() {}; - -void vp8_machine_specific_config(VP8_COMMON *ctx) -{ -#if CONFIG_MULTITHREAD - ctx->processor_core_count = get_cpu_count(); -#else - (void)ctx; -#endif /* CONFIG_MULTITHREAD */ - -#if ARCH_ARM - ctx->cpu_caps = arm_cpu_caps(); -#elif ARCH_X86 || ARCH_X86_64 - ctx->cpu_caps = x86_simd_caps(); -#endif -} diff --git a/thirdparty/libvpx/vp8/common/header.h b/thirdparty/libvpx/vp8/common/header.h deleted file mode 100644 index e27bca16bd..0000000000 --- a/thirdparty/libvpx/vp8/common/header.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_HEADER_H_ -#define VP8_COMMON_HEADER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* 24 bits total */ -typedef struct -{ - unsigned int type: 1; - unsigned int version: 3; - unsigned int show_frame: 1; - - /* Allow 2^20 bytes = 8 megabits for first partition */ - - unsigned int first_partition_length_in_bytes: 19; - -#ifdef PACKET_TESTING - unsigned int frame_number; - unsigned int update_gold: 1; - unsigned int uses_gold: 1; - unsigned int update_last: 1; - unsigned int uses_last: 1; -#endif - -} VP8_HEADER; - -#ifdef PACKET_TESTING -#define VP8_HEADER_SIZE 8 -#else -#define VP8_HEADER_SIZE 3 -#endif - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_HEADER_H_ diff --git a/thirdparty/libvpx/vp8/common/idct_blk.c b/thirdparty/libvpx/vp8/common/idct_blk.c deleted file mode 100644 index 8aa7d9bf0f..0000000000 --- a/thirdparty/libvpx/vp8/common/idct_blk.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_dequant_idct_add_c(short *input, short *dq, - unsigned char *dest, int stride); -void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred, - int pred_stride, unsigned char *dst_ptr, - int dst_stride); - -void vp8_dequant_idct_add_y_block_c - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i, j; - - for (i = 0; i < 4; i++) - { - for (j = 0; j < 4; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dst, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - q += 16; - dst += 4; - } - - dst += 4*stride - 16; - } -} - -void vp8_dequant_idct_add_uv_block_c - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - int i, j; - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dstu, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - q += 16; - dstu += 4; - } - - dstu += 4*stride - 8; - } - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, dstv, stride); - else - { - vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - q += 16; - dstv += 4; - } - - dstv += 4*stride - 8; - } -} diff --git a/thirdparty/libvpx/vp8/common/idctllm.c b/thirdparty/libvpx/vp8/common/idctllm.c deleted file mode 100644 index f5403c5aaf..0000000000 --- a/thirdparty/libvpx/vp8/common/idctllm.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp8_rtcd.h" - -/**************************************************************************** - * Notes: - * - * This implementation makes use of 16 bit fixed point verio of two multiply - * constants: - * 1. sqrt(2) * cos (pi/8) - * 2. sqrt(2) * sin (pi/8) - * Becuase the first constant is bigger than 1, to maintain the same 16 bit - * fixed point precision as the second one, we use a trick of - * x * a = x + x*(a-1) - * so - * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). - **************************************************************************/ -static const int cospi8sqrt2minus1 = 20091; -static const int sinpi8sqrt2 = 35468; - -void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride) -{ - int i; - int r, c; - int a1, b1, c1, d1; - short output[16]; - short *ip = input; - short *op = output; - int temp1, temp2; - int shortpitch = 4; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[8]; - b1 = ip[0] - ip[8]; - - temp1 = (ip[4] * sinpi8sqrt2) >> 16; - temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); - c1 = temp1 - temp2; - - temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); - temp2 = (ip[12] * sinpi8sqrt2) >> 16; - d1 = temp1 + temp2; - - op[shortpitch*0] = a1 + d1; - op[shortpitch*3] = a1 - d1; - - op[shortpitch*1] = b1 + c1; - op[shortpitch*2] = b1 - c1; - - ip++; - op++; - } - - ip = output; - op = output; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[2]; - b1 = ip[0] - ip[2]; - - temp1 = (ip[1] * sinpi8sqrt2) >> 16; - temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); - c1 = temp1 - temp2; - - temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); - temp2 = (ip[3] * sinpi8sqrt2) >> 16; - d1 = temp1 + temp2; - - - op[0] = (a1 + d1 + 4) >> 3; - op[3] = (a1 - d1 + 4) >> 3; - - op[1] = (b1 + c1 + 4) >> 3; - op[2] = (b1 - c1 + 4) >> 3; - - ip += shortpitch; - op += shortpitch; - } - - ip = output; - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int a = ip[c] + pred_ptr[c] ; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a ; - } - ip += 4; - dst_ptr += dst_stride; - pred_ptr += pred_stride; - } -} - -void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, - int pred_stride, unsigned char *dst_ptr, - int dst_stride) -{ - int a1 = ((input_dc + 4) >> 3); - int r, c; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int a = a1 + pred_ptr[c] ; - - if (a < 0) - a = 0; - - if (a > 255) - a = 255; - - dst_ptr[c] = (unsigned char) a ; - } - - dst_ptr += dst_stride; - pred_ptr += pred_stride; - } - -} - -void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) -{ - short output[16]; - int i; - int a1, b1, c1, d1; - int a2, b2, c2, d2; - short *ip = input; - short *op = output; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; - - op[0] = a1 + b1; - op[4] = c1 + d1; - op[8] = a1 - b1; - op[12] = d1 - c1; - ip++; - op++; - } - - ip = output; - op = output; - - for (i = 0; i < 4; i++) - { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; - - a2 = a1 + b1; - b2 = c1 + d1; - c2 = a1 - b1; - d2 = d1 - c1; - - op[0] = (a2 + 3) >> 3; - op[1] = (b2 + 3) >> 3; - op[2] = (c2 + 3) >> 3; - op[3] = (d2 + 3) >> 3; - - ip += 4; - op += 4; - } - - for(i = 0; i < 16; i++) - { - mb_dqcoeff[i * 16] = output[i]; - } -} - -void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) -{ - int i; - int a1; - - a1 = ((input[0] + 3) >> 3); - for(i = 0; i < 16; i++) - { - mb_dqcoeff[i * 16] = a1; - } -} diff --git a/thirdparty/libvpx/vp8/common/invtrans.h b/thirdparty/libvpx/vp8/common/invtrans.h deleted file mode 100644 index 9cfea8d513..0000000000 --- a/thirdparty/libvpx/vp8/common/invtrans.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_INVTRANS_H_ -#define VP8_COMMON_INVTRANS_H_ - -#include "./vpx_config.h" -#include "vp8_rtcd.h" -#include "blockd.h" -#include "onyxc_int.h" - -#if CONFIG_MULTITHREAD -#include "vpx_mem/vpx_mem.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -static void eob_adjust(char *eobs, short *diff) -{ - /* eob adjust.... the idct can only skip if both the dc and eob are zero */ - int js; - for(js = 0; js < 16; js++) - { - if((eobs[js] == 0) && (diff[0] != 0)) - eobs[js]++; - diff+=16; - } -} - -static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) -{ - short *DQC = xd->dequant_y1; - - if (xd->mode_info_context->mbmi.mode != SPLITMV) - { - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_short_inv_walsh4x4 - (&xd->block[24].dqcoeff[0], xd->qcoeff); - } - else - { - vp8_short_inv_walsh4x4_1 - (&xd->block[24].dqcoeff[0], xd->qcoeff); - } - eob_adjust(xd->eobs, xd->qcoeff); - - DQC = xd->dequant_y1_dc; - } - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); -} -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_INVTRANS_H_ diff --git a/thirdparty/libvpx/vp8/common/loopfilter.h b/thirdparty/libvpx/vp8/common/loopfilter.h deleted file mode 100644 index 20a6bd375b..0000000000 --- a/thirdparty/libvpx/vp8/common/loopfilter.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_LOOPFILTER_H_ -#define VP8_COMMON_LOOPFILTER_H_ - -#include "vpx_ports/mem.h" -#include "vpx_config.h" -#include "vp8_rtcd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_LOOP_FILTER 63 -/* fraction of total macroblock rows to be used in fast filter level picking */ -/* has to be > 2 */ -#define PARTIAL_FRAME_FRACTION 8 - -typedef enum -{ - NORMAL_LOOPFILTER = 0, - SIMPLE_LOOPFILTER = 1 -} LOOPFILTERTYPE; - -#if ARCH_ARM -#define SIMD_WIDTH 1 -#else -#define SIMD_WIDTH 16 -#endif - -/* Need to align this structure so when it is declared and - * passed it can be loaded into vector registers. - */ -typedef struct -{ - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]); - unsigned char lvl[4][4][4]; - unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1]; - unsigned char mode_lf_lut[10]; -} loop_filter_info_n; - -typedef struct loop_filter_info -{ - const unsigned char * mblim; - const unsigned char * blim; - const unsigned char * lim; - const unsigned char * hev_thr; -} loop_filter_info; - - -typedef void loop_filter_uvfunction -( - unsigned char *u, /* source pointer */ - int p, /* pitch */ - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - unsigned char *v -); - -/* assorted loopfilter functions which get used elsewhere */ -struct VP8Common; -struct macroblockd; -struct modeinfo; - -void vp8_loop_filter_init(struct VP8Common *cm); - -void vp8_loop_filter_frame_init(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - -void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, - int frame_type); - -void vp8_loop_filter_partial_frame(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - -void vp8_loop_filter_frame_yonly(struct VP8Common *cm, - struct macroblockd *mbd, - int default_filt_lvl); - -void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, - int sharpness_lvl); - -void vp8_loop_filter_row_normal(struct VP8Common *cm, - struct modeinfo *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr); - -void vp8_loop_filter_row_simple(struct VP8Common *cm, - struct modeinfo *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp8/common/loopfilter_filters.c b/thirdparty/libvpx/vp8/common/loopfilter_filters.c deleted file mode 100644 index 1d51696ff7..0000000000 --- a/thirdparty/libvpx/vp8/common/loopfilter_filters.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <stdlib.h> -#include "loopfilter.h" -#include "onyxc_int.h" - -typedef unsigned char uc; - -static signed char vp8_signed_char_clamp(int t) -{ - t = (t < -128 ? -128 : t); - t = (t > 127 ? 127 : t); - return (signed char) t; -} - - -/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static signed char vp8_filter_mask(uc limit, uc blimit, - uc p3, uc p2, uc p1, uc p0, - uc q0, uc q1, uc q2, uc q3) -{ - signed char mask = 0; - mask |= (abs(p3 - p2) > limit); - mask |= (abs(p2 - p1) > limit); - mask |= (abs(p1 - p0) > limit); - mask |= (abs(q1 - q0) > limit); - mask |= (abs(q2 - q1) > limit); - mask |= (abs(q3 - q2) > limit); - mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit); - return mask - 1; -} - -/* is there high variance internal edge ( 11111111 yes, 00000000 no) */ -static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) -{ - signed char hev = 0; - hev |= (abs(p1 - p0) > thresh) * -1; - hev |= (abs(q1 - q0) > thresh) * -1; - return hev; -} - -static void vp8_filter(signed char mask, uc hev, uc *op1, - uc *op0, uc *oq0, uc *oq1) - -{ - signed char ps0, qs0; - signed char ps1, qs1; - signed char filter_value, Filter1, Filter2; - signed char u; - - ps1 = (signed char) * op1 ^ 0x80; - ps0 = (signed char) * op0 ^ 0x80; - qs0 = (signed char) * oq0 ^ 0x80; - qs1 = (signed char) * oq1 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter_value = vp8_signed_char_clamp(ps1 - qs1); - filter_value &= hev; - - /* inner taps */ - filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); - filter_value &= mask; - - /* save bottom 3 bits so that we round one side +4 and the other +3 - * if it equals 4 we'll set to adjust by -1 to account for the fact - * we'd round 3 the other way - */ - Filter1 = vp8_signed_char_clamp(filter_value + 4); - Filter2 = vp8_signed_char_clamp(filter_value + 3); - Filter1 >>= 3; - Filter2 >>= 3; - u = vp8_signed_char_clamp(qs0 - Filter1); - *oq0 = u ^ 0x80; - u = vp8_signed_char_clamp(ps0 + Filter2); - *op0 = u ^ 0x80; - filter_value = Filter1; - - /* outer tap adjustments */ - filter_value += 1; - filter_value >>= 1; - filter_value &= ~hev; - - u = vp8_signed_char_clamp(qs1 - filter_value); - *oq1 = u ^ 0x80; - u = vp8_signed_char_clamp(ps1 + filter_value); - *op1 = u ^ 0x80; - -} -void vp8_loop_filter_horizontal_edge_c -( - unsigned char *s, - int p, /* pitch */ - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - int hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do - { - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4*p], s[-3*p], s[-2*p], s[-1*p], - s[0*p], s[1*p], s[2*p], s[3*p]); - - hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); - - vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p); - - ++s; - } - while (++i < count * 8); -} - -void vp8_loop_filter_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - int hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do - { - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); - - hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - - vp8_filter(mask, hev, s - 2, s - 1, s, s + 1); - - s += p; - } - while (++i < count * 8); -} - -static void vp8_mbfilter(signed char mask, uc hev, - uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2) -{ - signed char s, u; - signed char filter_value, Filter1, Filter2; - signed char ps2 = (signed char) * op2 ^ 0x80; - signed char ps1 = (signed char) * op1 ^ 0x80; - signed char ps0 = (signed char) * op0 ^ 0x80; - signed char qs0 = (signed char) * oq0 ^ 0x80; - signed char qs1 = (signed char) * oq1 ^ 0x80; - signed char qs2 = (signed char) * oq2 ^ 0x80; - - /* add outer taps if we have high edge variance */ - filter_value = vp8_signed_char_clamp(ps1 - qs1); - filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); - filter_value &= mask; - - Filter2 = filter_value; - Filter2 &= hev; - - /* save bottom 3 bits so that we round one side +4 and the other +3 */ - Filter1 = vp8_signed_char_clamp(Filter2 + 4); - Filter2 = vp8_signed_char_clamp(Filter2 + 3); - Filter1 >>= 3; - Filter2 >>= 3; - qs0 = vp8_signed_char_clamp(qs0 - Filter1); - ps0 = vp8_signed_char_clamp(ps0 + Filter2); - - - /* only apply wider filter if not high edge variance */ - filter_value &= ~hev; - Filter2 = filter_value; - - /* roughly 3/7th difference across boundary */ - u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7); - s = vp8_signed_char_clamp(qs0 - u); - *oq0 = s ^ 0x80; - s = vp8_signed_char_clamp(ps0 + u); - *op0 = s ^ 0x80; - - /* roughly 2/7th difference across boundary */ - u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7); - s = vp8_signed_char_clamp(qs1 - u); - *oq1 = s ^ 0x80; - s = vp8_signed_char_clamp(ps1 + u); - *op1 = s ^ 0x80; - - /* roughly 1/7th difference across boundary */ - u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); - s = vp8_signed_char_clamp(qs2 - u); - *oq2 = s ^ 0x80; - s = vp8_signed_char_clamp(ps2 + u); - *op2 = s ^ 0x80; -} - -void vp8_mbloop_filter_horizontal_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - /* loop filter designed to work using chars so that we can make maximum use - * of 8 bit simd instructions. - */ - do - { - - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4*p], s[-3*p], s[-2*p], s[-1*p], - s[0*p], s[1*p], s[2*p], s[3*p]); - - hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); - - vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p); - - ++s; - } - while (++i < count * 8); - -} - - -void vp8_mbloop_filter_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count -) -{ - signed char hev = 0; /* high edge variance */ - signed char mask = 0; - int i = 0; - - do - { - - mask = vp8_filter_mask(limit[0], blimit[0], - s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); - - hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); - - vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2); - - s += p; - } - while (++i < count * 8); - -} - -/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ -static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1) -{ -/* Why does this cause problems for win32? - * error C2143: syntax error : missing ';' before 'type' - * (void) limit; - */ - signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; - return mask; -} - -static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) -{ - signed char filter_value, Filter1, Filter2; - signed char p1 = (signed char) * op1 ^ 0x80; - signed char p0 = (signed char) * op0 ^ 0x80; - signed char q0 = (signed char) * oq0 ^ 0x80; - signed char q1 = (signed char) * oq1 ^ 0x80; - signed char u; - - filter_value = vp8_signed_char_clamp(p1 - q1); - filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0)); - filter_value &= mask; - - /* save bottom 3 bits so that we round one side +4 and the other +3 */ - Filter1 = vp8_signed_char_clamp(filter_value + 4); - Filter1 >>= 3; - u = vp8_signed_char_clamp(q0 - Filter1); - *oq0 = u ^ 0x80; - - Filter2 = vp8_signed_char_clamp(filter_value + 3); - Filter2 >>= 3; - u = vp8_signed_char_clamp(p0 + Filter2); - *op0 = u ^ 0x80; -} - -void vp8_loop_filter_simple_horizontal_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit -) -{ - signed char mask = 0; - int i = 0; - - do - { - mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]); - vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p); - ++s; - } - while (++i < 16); -} - -void vp8_loop_filter_simple_vertical_edge_c -( - unsigned char *s, - int p, - const unsigned char *blimit -) -{ - signed char mask = 0; - int i = 0; - - do - { - mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]); - vp8_simple_filter(mask, s - 2, s - 1, s, s + 1); - s += p; - } - while (++i < 16); - -} - -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit); -} - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr, int y_stride, int uv_stride, - loop_filter_info *lfi) -{ - vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - -void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride, - const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); -} diff --git a/thirdparty/libvpx/vp8/common/mbpitch.c b/thirdparty/libvpx/vp8/common/mbpitch.c deleted file mode 100644 index 32e1b66409..0000000000 --- a/thirdparty/libvpx/vp8/common/mbpitch.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "blockd.h" - -void vp8_setup_block_dptrs(MACROBLOCKD *x) -{ - int r, c; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4; - } - } - - for (r = 0; r < 2; r++) - { - for (c = 0; c < 2; c++) - { - x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4; - - } - } - - for (r = 0; r < 2; r++) - { - for (c = 0; c < 2; c++) - { - x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4; - - } - } - - for (r = 0; r < 25; r++) - { - x->block[r].qcoeff = x->qcoeff + r * 16; - x->block[r].dqcoeff = x->dqcoeff + r * 16; - x->block[r].eob = x->eobs + r; - } -} - -void vp8_build_block_doffsets(MACROBLOCKD *x) -{ - int block; - - for (block = 0; block < 16; block++) /* y blocks */ - { - x->block[block].offset = - (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4; - } - - for (block = 16; block < 20; block++) /* U and V blocks */ - { - x->block[block+4].offset = - x->block[block].offset = - ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4; - } -} diff --git a/thirdparty/libvpx/vp8/common/modecont.c b/thirdparty/libvpx/vp8/common/modecont.c deleted file mode 100644 index 86a74bc0ff..0000000000 --- a/thirdparty/libvpx/vp8/common/modecont.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "entropy.h" - -const int vp8_mode_contexts[6][4] = -{ - { - /* 0 */ - 7, 1, 1, 143, - }, - { - /* 1 */ - 14, 18, 14, 107, - }, - { - /* 2 */ - 135, 64, 57, 68, - }, - { - /* 3 */ - 60, 56, 128, 65, - }, - { - /* 4 */ - 159, 134, 128, 34, - }, - { - /* 5 */ - 234, 188, 128, 28, - }, -}; diff --git a/thirdparty/libvpx/vp8/common/modecont.h b/thirdparty/libvpx/vp8/common/modecont.h deleted file mode 100644 index ff34c33c55..0000000000 --- a/thirdparty/libvpx/vp8/common/modecont.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_MODECONT_H_ -#define VP8_COMMON_MODECONT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -extern const int vp8_mode_contexts[6][4]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_MODECONT_H_ diff --git a/thirdparty/libvpx/vp8/common/mv.h b/thirdparty/libvpx/vp8/common/mv.h deleted file mode 100644 index 111ccd63c7..0000000000 --- a/thirdparty/libvpx/vp8/common/mv.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_MV_H_ -#define VP8_COMMON_MV_H_ -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - short row; - short col; -} MV; - -typedef union int_mv -{ - uint32_t as_int; - MV as_mv; -} int_mv; /* facilitates faster equality tests and copies */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_MV_H_ diff --git a/thirdparty/libvpx/vp8/common/onyxc_int.h b/thirdparty/libvpx/vp8/common/onyxc_int.h deleted file mode 100644 index 6d89865c60..0000000000 --- a/thirdparty/libvpx/vp8/common/onyxc_int.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ONYXC_INT_H_ -#define VP8_COMMON_ONYXC_INT_H_ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "loopfilter.h" -#include "entropymv.h" -#include "entropy.h" -#if CONFIG_POSTPROC -#include "postproc.h" -#endif - -/*#ifdef PACKET_TESTING*/ -#include "header.h" -/*#endif*/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define MINQ 0 -#define MAXQ 127 -#define QINDEX_RANGE (MAXQ + 1) - -#define NUM_YV12_BUFFERS 4 - -#define MAX_PARTITIONS 9 - -typedef struct frame_contexts -{ - vp8_prob bmode_prob [VP8_BINTRAMODES-1]; - vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */ - vp8_prob uv_mode_prob [VP8_UV_MODES-1]; - vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; - vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - MV_CONTEXT mvc[2]; -} FRAME_CONTEXT; - -typedef enum -{ - ONE_PARTITION = 0, - TWO_PARTITION = 1, - FOUR_PARTITION = 2, - EIGHT_PARTITION = 3 -} TOKEN_PARTITION; - -typedef enum -{ - RECON_CLAMP_REQUIRED = 0, - RECON_CLAMP_NOTREQUIRED = 1 -} CLAMP_TYPE; - -typedef struct VP8Common - -{ - struct vpx_internal_error_info error; - - DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]); - DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]); - DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]); - - int Width; - int Height; - int horiz_scale; - int vert_scale; - - CLAMP_TYPE clamp_type; - - YV12_BUFFER_CONFIG *frame_to_show; - - YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS]; - int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; - int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx; - - YV12_BUFFER_CONFIG temp_scale_frame; - -#if CONFIG_POSTPROC - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; - int post_proc_buffer_int_used; - unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ -#endif - - FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ - FRAME_TYPE frame_type; - - int show_frame; - - int frame_flags; - int MBs; - int mb_rows; - int mb_cols; - int mode_info_stride; - - /* profile settings */ - int mb_no_coeff_skip; - int no_lpf; - int use_bilinear_mc_filter; - int full_pixel; - - int base_qindex; - - int y1dc_delta_q; - int y2dc_delta_q; - int y2ac_delta_q; - int uvdc_delta_q; - int uvac_delta_q; - - /* We allocate a MODE_INFO struct for each macroblock, together with - an extra row on top and column on the left to simplify prediction. */ - - MODE_INFO *mip; /* Base of allocated array */ - MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ -#if CONFIG_ERROR_CONCEALMENT - MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ - MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ -#endif - MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame - to show */ - LOOPFILTERTYPE filter_type; - - loop_filter_info_n lf_info; - - int filter_level; - int last_sharpness_level; - int sharpness_level; - - int refresh_last_frame; /* Two state 0 = NO, 1 = YES */ - int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */ - int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */ - - int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */ - int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */ - - int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */ - - int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ - - /* Y,U,V,Y2 */ - ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ - - FRAME_CONTEXT lfc; /* last frame entropy */ - FRAME_CONTEXT fc; /* this frame entropy */ - - unsigned int current_video_frame; - - int version; - - TOKEN_PARTITION multi_token_partition; - -#ifdef PACKET_TESTING - VP8_HEADER oh; -#endif -#if CONFIG_POSTPROC_VISUALIZER - double bitrate; - double framerate; -#endif - -#if CONFIG_MULTITHREAD - int processor_core_count; -#endif -#if CONFIG_POSTPROC - struct postproc_state postproc_state; -#endif - int cpu_caps; -} VP8_COMMON; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp8/common/onyxd.h b/thirdparty/libvpx/vp8/common/onyxd.h deleted file mode 100644 index e37b29f32c..0000000000 --- a/thirdparty/libvpx/vp8/common/onyxd.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_ONYXD_H_ -#define VP8_COMMON_ONYXD_H_ - - -/* Create/destroy static data structures. */ -#ifdef __cplusplus -extern "C" -{ -#endif -#include "vpx_scale/yv12config.h" -#include "ppflags.h" -#include "vpx_ports/mem.h" -#include "vpx/vpx_codec.h" -#include "vpx/vp8.h" - - struct VP8D_COMP; - - typedef struct - { - int Width; - int Height; - int Version; - int postprocess; - int max_threads; - int error_concealment; - } VP8D_CONFIG; - - typedef enum - { - VP8D_OK = 0 - } VP8D_SETTING; - - void vp8dx_initialize(void); - - void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x); - - int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst); - - int vp8dx_receive_compressed_data(struct VP8D_COMP* comp, - size_t size, const uint8_t *dest, - int64_t time_stamp); - int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); - - vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); - -#ifdef __cplusplus -} -#endif - - -#endif // VP8_COMMON_ONYXD_H_ diff --git a/thirdparty/libvpx/vp8/common/ppflags.h b/thirdparty/libvpx/vp8/common/ppflags.h deleted file mode 100644 index 768224aad5..0000000000 --- a/thirdparty/libvpx/vp8/common/ppflags.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_PPFLAGS_H_ -#define VP8_COMMON_PPFLAGS_H_ - -#ifdef __cplusplus -extern "C" { -#endif -enum -{ - VP8D_NOFILTERING = 0, - VP8D_DEBLOCK = 1<<0, - VP8D_DEMACROBLOCK = 1<<1, - VP8D_ADDNOISE = 1<<2, - VP8D_DEBUG_TXT_FRAME_INFO = 1<<3, - VP8D_DEBUG_TXT_MBLK_MODES = 1<<4, - VP8D_DEBUG_TXT_DC_DIFF = 1<<5, - VP8D_DEBUG_TXT_RATE_INFO = 1<<6, - VP8D_DEBUG_DRAW_MV = 1<<7, - VP8D_DEBUG_CLR_BLK_MODES = 1<<8, - VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9, - VP8D_MFQE = 1<<10 -}; - -typedef struct -{ - int post_proc_flag; - int deblocking_level; - int noise_level; - int display_ref_frame_flag; - int display_mb_modes_flag; - int display_b_modes_flag; - int display_mv_flag; -} vp8_ppflags_t; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp8/common/quant_common.c b/thirdparty/libvpx/vp8/common/quant_common.c deleted file mode 100644 index 05f9210702..0000000000 --- a/thirdparty/libvpx/vp8/common/quant_common.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "quant_common.h" - -static const int dc_qlookup[QINDEX_RANGE] = -{ - 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, - 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, - 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, -}; - -static const int ac_qlookup[QINDEX_RANGE] = -{ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, - 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, - 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, - 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, - 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, -}; - - -int vp8_dc_quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = dc_qlookup[ QIndex ]; - return retval; -} - -int vp8_dc2quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = dc_qlookup[ QIndex ] * 2; - return retval; - -} -int vp8_dc_uv_quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = dc_qlookup[ QIndex ]; - - if (retval > 132) - retval = 132; - - return retval; -} - -int vp8_ac_yquant(int QIndex) -{ - int retval; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = ac_qlookup[ QIndex ]; - return retval; -} - -int vp8_ac2quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. - * The smallest precision for that is '(x*6349) >> 12' but 16 is a good - * word size. */ - retval = (ac_qlookup[ QIndex ] * 101581) >> 16; - - if (retval < 8) - retval = 8; - - return retval; -} -int vp8_ac_uv_quant(int QIndex, int Delta) -{ - int retval; - - QIndex = QIndex + Delta; - - if (QIndex > 127) - QIndex = 127; - else if (QIndex < 0) - QIndex = 0; - - retval = ac_qlookup[ QIndex ]; - return retval; -} diff --git a/thirdparty/libvpx/vp8/common/quant_common.h b/thirdparty/libvpx/vp8/common/quant_common.h deleted file mode 100644 index 700b5e6d72..0000000000 --- a/thirdparty/libvpx/vp8/common/quant_common.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_QUANT_COMMON_H_ -#define VP8_COMMON_QUANT_COMMON_H_ - - -#include "string.h" -#include "blockd.h" -#include "onyxc_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern int vp8_ac_yquant(int QIndex); -extern int vp8_dc_quant(int QIndex, int Delta); -extern int vp8_dc2quant(int QIndex, int Delta); -extern int vp8_ac2quant(int QIndex, int Delta); -extern int vp8_dc_uv_quant(int QIndex, int Delta); -extern int vp8_ac_uv_quant(int QIndex, int Delta); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp8/common/reconinter.c b/thirdparty/libvpx/vp8/common/reconinter.c deleted file mode 100644 index e302595587..0000000000 --- a/thirdparty/libvpx/vp8/common/reconinter.c +++ /dev/null @@ -1,544 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <limits.h> -#include <string.h> - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx/vpx_integer.h" -#include "blockd.h" -#include "reconinter.h" -#if CONFIG_RUNTIME_CPU_DETECT -#include "onyxc_int.h" -#endif - -void vp8_copy_mem16x16_c( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) -{ - - int r; - - for (r = 0; r < 16; r++) - { - memcpy(dst, src, 16); - - src += src_stride; - dst += dst_stride; - - } - -} - -void vp8_copy_mem8x8_c( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) -{ - int r; - - for (r = 0; r < 8; r++) - { - memcpy(dst, src, 8); - - src += src_stride; - dst += dst_stride; - - } - -} - -void vp8_copy_mem8x4_c( - unsigned char *src, - int src_stride, - unsigned char *dst, - int dst_stride) -{ - int r; - - for (r = 0; r < 4; r++) - { - memcpy(dst, src, 8); - - src += src_stride; - dst += dst_stride; - - } - -} - - -void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) -{ - int r; - unsigned char *pred_ptr = d->predictor; - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); - } - else - { - for (r = 0; r < 4; r++) - { - pred_ptr[0] = ptr[0]; - pred_ptr[1] = ptr[1]; - pred_ptr[2] = ptr[2]; - pred_ptr[3] = ptr[3]; - pred_ptr += pitch; - ptr += pre_stride; - } - } -} - -static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) -{ - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); - } - else - { - vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride); - } -} - -static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) -{ - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); - } - else - { - vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride); - } -} - -static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) -{ - int r; - unsigned char *ptr; - ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); - - if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) - { - sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); - } - else - { - for (r = 0; r < 4; r++) - { - dst[0] = ptr[0]; - dst[1] = ptr[1]; - dst[2] = ptr[2]; - dst[3] = ptr[3]; - dst += dst_stride; - ptr += pre_stride; - } - } -} - - -/*encoder only*/ -void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) -{ - unsigned char *uptr, *vptr; - unsigned char *upred_ptr = &x->predictor[256]; - unsigned char *vpred_ptr = &x->predictor[320]; - - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int offset; - int pre_stride = x->pre.uv_stride; - - /* calc uv motion vectors */ - mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1)); - mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1)); - mv_row /= 2; - mv_col /= 2; - mv_row &= x->fullpixel_mask; - mv_col &= x->fullpixel_mask; - - offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); - x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); - } - else - { - vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8); - vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8); - } -} - -/*encoder only*/ -void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) -{ - int i, j; - int pre_stride = x->pre.uv_stride; - unsigned char *base_pre; - - /* build uv mvs */ - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - int yoffset = i * 8 + j * 2; - int uoffset = 16 + i * 2 + j; - int voffset = 20 + i * 2 + j; - - int temp; - - temp = x->block[yoffset ].bmi.mv.as_mv.row - + x->block[yoffset+1].bmi.mv.as_mv.row - + x->block[yoffset+4].bmi.mv.as_mv.row - + x->block[yoffset+5].bmi.mv.as_mv.row; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; - - temp = x->block[yoffset ].bmi.mv.as_mv.col - + x->block[yoffset+1].bmi.mv.as_mv.col - + x->block[yoffset+4].bmi.mv.as_mv.col - + x->block[yoffset+5].bmi.mv.as_mv.col; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; - - x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; - } - } - - base_pre = x->pre.u_buffer; - for (i = 16; i < 20; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); - else - { - vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); - } - } - - base_pre = x->pre.v_buffer; - for (i = 20; i < 24; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); - else - { - vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); - vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); - } - } -} - - -/*encoder only*/ -void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, - unsigned char *dst_y, - int dst_ystride) -{ - unsigned char *ptr_base; - unsigned char *ptr; - int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; - int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; - int pre_stride = x->pre.y_stride; - - ptr_base = x->pre.y_buffer; - ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); - - if ((mv_row | mv_col) & 7) - { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, - dst_y, dst_ystride); - } - else - { - vp8_copy_mem16x16(ptr, pre_stride, dst_y, - dst_ystride); - } -} - -static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) -{ - /* If the MV points so far into the UMV border that no visible pixels - * are used for reconstruction, the subpel part of the MV can be - * discarded and the MV limited to 16 pixels with equivalent results. - * - * This limit kicks in at 19 pixels for the top and left edges, for - * the 16 pixels plus 3 taps right of the central pixel when subpel - * filtering. The bottom and right edges use 16 pixels plus 2 pixels - * left of the central pixel when filtering. - */ - if (mv->col < (xd->mb_to_left_edge - (19 << 3))) - mv->col = xd->mb_to_left_edge - (16 << 3); - else if (mv->col > xd->mb_to_right_edge + (18 << 3)) - mv->col = xd->mb_to_right_edge + (16 << 3); - - if (mv->row < (xd->mb_to_top_edge - (19 << 3))) - mv->row = xd->mb_to_top_edge - (16 << 3); - else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) - mv->row = xd->mb_to_bottom_edge + (16 << 3); -} - -/* A version of the above function for chroma block MVs.*/ -static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) -{ - mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? - (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; - mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? - (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; - - mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? - (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; - mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? - (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; -} - -void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, - int dst_ystride, - int dst_uvstride) -{ - int offset; - unsigned char *ptr; - unsigned char *uptr, *vptr; - - int_mv _16x16mv; - - unsigned char *ptr_base = x->pre.y_buffer; - int pre_stride = x->pre.y_stride; - - _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int; - - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - { - clamp_mv_to_umv_border(&_16x16mv.as_mv, x); - } - - ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); - - if ( _16x16mv.as_int & 0x00070007) - { - x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride); - } - else - { - vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); - } - - /* calc uv motion vectors */ - _16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1)); - _16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1)); - _16x16mv.as_mv.row /= 2; - _16x16mv.as_mv.col /= 2; - _16x16mv.as_mv.row &= x->fullpixel_mask; - _16x16mv.as_mv.col &= x->fullpixel_mask; - - pre_stride >>= 1; - offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); - uptr = x->pre.u_buffer + offset; - vptr = x->pre.v_buffer + offset; - - if ( _16x16mv.as_int & 0x00070007) - { - x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); - x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); - } - else - { - vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride); - vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride); - } -} - -static void build_inter4x4_predictors_mb(MACROBLOCKD *x) -{ - int i; - unsigned char *base_dst = x->dst.y_buffer; - unsigned char *base_pre = x->pre.y_buffer; - - if (x->mode_info_context->mbmi.partitioning < 3) - { - BLOCKD *b; - int dst_stride = x->dst.y_stride; - - x->block[ 0].bmi = x->mode_info_context->bmi[ 0]; - x->block[ 2].bmi = x->mode_info_context->bmi[ 2]; - x->block[ 8].bmi = x->mode_info_context->bmi[ 8]; - x->block[10].bmi = x->mode_info_context->bmi[10]; - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - { - clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x); - } - - b = &x->block[ 0]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - b = &x->block[ 2]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - b = &x->block[ 8]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - b = &x->block[10]; - build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); - } - else - { - for (i = 0; i < 16; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - int dst_stride = x->dst.y_stride; - - x->block[i+0].bmi = x->mode_info_context->bmi[i+0]; - x->block[i+1].bmi = x->mode_info_context->bmi[i+1]; - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - { - clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x); - clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x); - } - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); - else - { - build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - } - - } - - } - base_dst = x->dst.u_buffer; - base_pre = x->pre.u_buffer; - for (i = 16; i < 20; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - int dst_stride = x->dst.uv_stride; - - /* Note: uv mvs already clamped in build_4x4uvmvs() */ - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); - else - { - build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - } - } - - base_dst = x->dst.v_buffer; - base_pre = x->pre.v_buffer; - for (i = 20; i < 24; i += 2) - { - BLOCKD *d0 = &x->block[i]; - BLOCKD *d1 = &x->block[i+1]; - int dst_stride = x->dst.uv_stride; - - /* Note: uv mvs already clamped in build_4x4uvmvs() */ - - if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) - build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); - else - { - build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); - } - } -} - -static -void build_4x4uvmvs(MACROBLOCKD *x) -{ - int i, j; - - for (i = 0; i < 2; i++) - { - for (j = 0; j < 2; j++) - { - int yoffset = i * 8 + j * 2; - int uoffset = 16 + i * 2 + j; - int voffset = 20 + i * 2 + j; - - int temp; - - temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row - + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row - + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row - + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; - - temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col - + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col - + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col - + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; - - temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); - - x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; - - if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x); - - x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; - } - } -} - -void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) -{ - if (xd->mode_info_context->mbmi.mode != SPLITMV) - { - vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.y_stride, xd->dst.uv_stride); - } - else - { - build_4x4uvmvs(xd); - build_inter4x4_predictors_mb(xd); - } -} diff --git a/thirdparty/libvpx/vp8/common/reconinter.h b/thirdparty/libvpx/vp8/common/reconinter.h deleted file mode 100644 index ba979b9664..0000000000 --- a/thirdparty/libvpx/vp8/common/reconinter.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_RECONINTER_H_ -#define VP8_COMMON_RECONINTER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x); -extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, - unsigned char *dst_y, - unsigned char *dst_u, - unsigned char *dst_v, - int dst_ystride, - int dst_uvstride); - - -extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, - unsigned char *dst_y, - int dst_ystride); -extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, - unsigned char *base_pre, - int pre_stride, - vp8_subpix_fn_t sppf); - -extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); -extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp8/common/reconintra.c b/thirdparty/libvpx/vp8/common/reconintra.c deleted file mode 100644 index 356655dac7..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "./vp8_rtcd.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/vpx_once.h" -#include "blockd.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/reconintra4x4.h" - -enum { - SIZE_16, - SIZE_8, - NUM_SIZES, -}; - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[4][NUM_SIZES]; -static intra_pred_fn dc_pred[2][2][NUM_SIZES]; - -static void vp8_init_intra_predictors_internal(void) -{ -#define INIT_SIZE(sz) \ - pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \ - pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \ - pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \ - \ - dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \ - dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \ - dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \ - dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz - - INIT_SIZE(16); - INIT_SIZE(8); - vp8_init_intra4x4_predictors_internal(); -} - -void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, - unsigned char * yabove_row, - unsigned char * yleft, - int left_stride, - unsigned char * ypred_ptr, - int y_stride) -{ - MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode; - DECLARE_ALIGNED(16, uint8_t, yleft_col[16]); - int i; - intra_pred_fn fn; - - for (i = 0; i < 16; i++) - { - yleft_col[i] = yleft[i* left_stride]; - } - - if (mode == DC_PRED) - { - fn = dc_pred[x->left_available][x->up_available][SIZE_16]; - } - else - { - fn = pred[mode][SIZE_16]; - } - - fn(ypred_ptr, y_stride, yabove_row, yleft_col); -} - -void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride) -{ - MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode; - unsigned char uleft_col[8]; - unsigned char vleft_col[8]; - int i; - intra_pred_fn fn; - - for (i = 0; i < 8; i++) - { - uleft_col[i] = uleft[i * left_stride]; - vleft_col[i] = vleft[i * left_stride]; - } - - if (uvmode == DC_PRED) - { - fn = dc_pred[x->left_available][x->up_available][SIZE_8]; - } - else - { - fn = pred[uvmode][SIZE_8]; - } - - fn(upred_ptr, pred_stride, uabove_row, uleft_col); - fn(vpred_ptr, pred_stride, vabove_row, vleft_col); -} - -void vp8_init_intra_predictors(void) -{ - once(vp8_init_intra_predictors_internal); -} diff --git a/thirdparty/libvpx/vp8/common/reconintra.h b/thirdparty/libvpx/vp8/common/reconintra.h deleted file mode 100644 index b6225a6637..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_RECONINTRA_H_ -#define VP8_COMMON_RECONINTRA_H_ - -#include "vp8/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, - unsigned char *yabove_row, - unsigned char *yleft, - int left_stride, - unsigned char *ypred_ptr, - int y_stride); - -void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x, - unsigned char * uabove_row, - unsigned char * vabove_row, - unsigned char * uleft, - unsigned char * vleft, - int left_stride, - unsigned char * upred_ptr, - unsigned char * vpred_ptr, - int pred_stride); - -void vp8_init_intra_predictors(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp8/common/reconintra4x4.c b/thirdparty/libvpx/vp8/common/reconintra4x4.c deleted file mode 100644 index 35ad891eff..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra4x4.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <string.h> - -#include "vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vp8_rtcd.h" -#include "blockd.h" - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[10]; - -void vp8_init_intra4x4_predictors_internal(void) -{ - pred[B_DC_PRED] = vpx_dc_predictor_4x4; - pred[B_TM_PRED] = vpx_tm_predictor_4x4; - pred[B_VE_PRED] = vpx_ve_predictor_4x4; - pred[B_HE_PRED] = vpx_he_predictor_4x4; - pred[B_LD_PRED] = vpx_d45e_predictor_4x4; - pred[B_RD_PRED] = vpx_d135_predictor_4x4; - pred[B_VR_PRED] = vpx_d117_predictor_4x4; - pred[B_VL_PRED] = vpx_d63f_predictor_4x4; - pred[B_HD_PRED] = vpx_d153_predictor_4x4; - pred[B_HU_PRED] = vpx_d207_predictor_4x4; -} - -void vp8_intra4x4_predict(unsigned char *above, - unsigned char *yleft, int left_stride, - B_PREDICTION_MODE b_mode, - unsigned char *dst, int dst_stride, - unsigned char top_left) -{ - unsigned char Left[4]; - unsigned char Aboveb[12], *Above = Aboveb + 4; - - Left[0] = yleft[0]; - Left[1] = yleft[left_stride]; - Left[2] = yleft[2 * left_stride]; - Left[3] = yleft[3 * left_stride]; - memcpy(Above, above, 8); - Above[-1] = top_left; - - pred[b_mode](dst, dst_stride, Above, Left); -} diff --git a/thirdparty/libvpx/vp8/common/reconintra4x4.h b/thirdparty/libvpx/vp8/common/reconintra4x4.h deleted file mode 100644 index 5dc5d13a5c..0000000000 --- a/thirdparty/libvpx/vp8/common/reconintra4x4.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_RECONINTRA4X4_H_ -#define VP8_COMMON_RECONINTRA4X4_H_ -#include "vp8/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd, - unsigned char *above_right_src) -{ - int dst_stride = xd->dst.y_stride; - unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16; - - unsigned int *src_ptr = (unsigned int *)above_right_src; - unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride); - unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride); - unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride); - - *dst_ptr0 = *src_ptr; - *dst_ptr1 = *src_ptr; - *dst_ptr2 = *src_ptr; -} - -void vp8_intra4x4_predict(unsigned char *Above, - unsigned char *yleft, int left_stride, - B_PREDICTION_MODE b_mode, - unsigned char *dst, int dst_stride, - unsigned char top_left); - -void vp8_init_intra4x4_predictors_internal(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_RECONINTRA4X4_H_ diff --git a/thirdparty/libvpx/vp8/common/rtcd.c b/thirdparty/libvpx/vp8/common/rtcd.c deleted file mode 100644 index ab0e9b47fe..0000000000 --- a/thirdparty/libvpx/vp8/common/rtcd.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vp8_rtcd.h" -#include "vpx_ports/vpx_once.h" - - -void vp8_rtcd() -{ - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vp8/common/setupintrarecon.c b/thirdparty/libvpx/vp8/common/setupintrarecon.c deleted file mode 100644 index 669564db42..0000000000 --- a/thirdparty/libvpx/vp8/common/setupintrarecon.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "setupintrarecon.h" -#include "vpx_mem/vpx_mem.h" - -void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) -{ - int i; - - /* set up frame new frame for intra coded blocks */ - memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - for (i = 0; i < ybf->y_height; i++) - ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129; - - memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) - ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - - memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) - ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - -} - -void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) -{ - memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); -} diff --git a/thirdparty/libvpx/vp8/common/setupintrarecon.h b/thirdparty/libvpx/vp8/common/setupintrarecon.h deleted file mode 100644 index 1857c4e26a..0000000000 --- a/thirdparty/libvpx/vp8/common/setupintrarecon.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_SETUPINTRARECON_H_ -#define VP8_COMMON_SETUPINTRARECON_H_ - -#include "./vpx_config.h" -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif -extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); -extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); - -static INLINE void setup_intra_recon_left(unsigned char *y_buffer, - unsigned char *u_buffer, - unsigned char *v_buffer, - int y_stride, - int uv_stride) -{ - int i; - - for (i = 0; i < 16; i++) - y_buffer[y_stride *i] = (unsigned char) 129; - - for (i = 0; i < 8; i++) - u_buffer[uv_stride *i] = (unsigned char) 129; - - for (i = 0; i < 8; i++) - v_buffer[uv_stride *i] = (unsigned char) 129; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_SETUPINTRARECON_H_ diff --git a/thirdparty/libvpx/vp8/common/swapyv12buffer.c b/thirdparty/libvpx/vp8/common/swapyv12buffer.c deleted file mode 100644 index 73656b3d72..0000000000 --- a/thirdparty/libvpx/vp8/common/swapyv12buffer.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "swapyv12buffer.h" - -void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame) -{ - unsigned char *temp; - - temp = last_frame->buffer_alloc; - last_frame->buffer_alloc = new_frame->buffer_alloc; - new_frame->buffer_alloc = temp; - - temp = last_frame->y_buffer; - last_frame->y_buffer = new_frame->y_buffer; - new_frame->y_buffer = temp; - - temp = last_frame->u_buffer; - last_frame->u_buffer = new_frame->u_buffer; - new_frame->u_buffer = temp; - - temp = last_frame->v_buffer; - last_frame->v_buffer = new_frame->v_buffer; - new_frame->v_buffer = temp; - -} diff --git a/thirdparty/libvpx/vp8/common/swapyv12buffer.h b/thirdparty/libvpx/vp8/common/swapyv12buffer.h deleted file mode 100644 index 1d66cd3d62..0000000000 --- a/thirdparty/libvpx/vp8/common/swapyv12buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_SWAPYV12BUFFER_H_ -#define VP8_COMMON_SWAPYV12BUFFER_H_ - -#include "vpx_scale/yv12config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_SWAPYV12BUFFER_H_ diff --git a/thirdparty/libvpx/vp8/common/systemdependent.h b/thirdparty/libvpx/vp8/common/systemdependent.h deleted file mode 100644 index 3d44e37cf2..0000000000 --- a/thirdparty/libvpx/vp8/common/systemdependent.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_SYSTEMDEPENDENT_H_ -#define VP8_COMMON_SYSTEMDEPENDENT_H_ - -#include "vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP8Common; -void vp8_machine_specific_config(struct VP8Common *); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_SYSTEMDEPENDENT_H_ diff --git a/thirdparty/libvpx/vp8/common/threading.h b/thirdparty/libvpx/vp8/common/threading.h deleted file mode 100644 index 183b49b8ff..0000000000 --- a/thirdparty/libvpx/vp8/common/threading.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_THREADING_H_ -#define VP8_COMMON_THREADING_H_ - -#include "./vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD - -/* Thread management macros */ -#if defined(_WIN32) && !HAVE_PTHREAD_H -/* Win32 */ -#include <process.h> -#include <windows.h> -#define THREAD_FUNCTION unsigned int __stdcall -#define THREAD_FUNCTION_RETURN DWORD -#define THREAD_SPECIFIC_INDEX DWORD -#define pthread_t HANDLE -#define pthread_attr_t DWORD -#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread) -#define thread_sleep(nms) Sleep(nms) -#define pthread_cancel(thread) terminate_thread(thread,0) -#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();}; -#define pthread_getspecific(ts_key) TlsGetValue(ts_key) -#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value) -#define pthread_self() GetCurrentThreadId() - -#elif defined(__OS2__) -/* OS/2 */ -#define INCL_DOS -#include <os2.h> - -#include <stdlib.h> -#define THREAD_FUNCTION void * -#define THREAD_FUNCTION_RETURN void * -#define THREAD_SPECIFIC_INDEX PULONG -#define pthread_t TID -#define pthread_attr_t ULONG -#define pthread_detach(thread) 0 -#define thread_sleep(nms) DosSleep(nms) -#define pthread_cancel(thread) DosKillThread(thread) -#define ts_key_create(ts_key, destructor) \ - DosAllocThreadLocalMemory(1, &(ts_key)); -#define pthread_getspecific(ts_key) ((void *)(*(ts_key))) -#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value)) -#define pthread_self() _gettid() -#else -#ifdef __APPLE__ -#include <mach/mach_init.h> -#include <mach/semaphore.h> -#include <mach/task.h> -#include <time.h> -#include <unistd.h> - -#else -#include <semaphore.h> -#endif - -#include <pthread.h> -/* pthreads */ -/* Nearly everything is already defined */ -#define THREAD_FUNCTION void * -#define THREAD_FUNCTION_RETURN void * -#define THREAD_SPECIFIC_INDEX pthread_key_t -#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor); -#endif - -/* Synchronization macros: Win32 and Pthreads */ -#if defined(_WIN32) && !HAVE_PTHREAD_H -#define sem_t HANDLE -#define pause(voidpara) __asm PAUSE -#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL) -#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE)) -#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL) -#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE) -#define thread_sleep(nms) Sleep(nms) - -#elif defined(__OS2__) -typedef struct -{ - HEV event; - HMTX wait_mutex; - HMTX count_mutex; - int count; -} sem_t; - -static inline int sem_init(sem_t *sem, int pshared, unsigned int value) -{ - DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0, - value > 0 ? TRUE : FALSE); - DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE); - DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE); - - sem->count = value; - - return 0; -} - -static inline int sem_wait(sem_t * sem) -{ - DosRequestMutexSem(sem->wait_mutex, -1); - - DosWaitEventSem(sem->event, -1); - - DosRequestMutexSem(sem->count_mutex, -1); - - sem->count--; - if (sem->count == 0) - { - ULONG post_count; - - DosResetEventSem(sem->event, &post_count); - } - - DosReleaseMutexSem(sem->count_mutex); - - DosReleaseMutexSem(sem->wait_mutex); - - return 0; -} - -static inline int sem_post(sem_t * sem) -{ - DosRequestMutexSem(sem->count_mutex, -1); - - if (sem->count < 32768) - { - sem->count++; - DosPostEventSem(sem->event); - } - - DosReleaseMutexSem(sem->count_mutex); - - return 0; -} - -static inline int sem_destroy(sem_t * sem) -{ - DosCloseEventSem(sem->event); - DosCloseMutexSem(sem->wait_mutex); - DosCloseMutexSem(sem->count_mutex); - - return 0; -} - -#define thread_sleep(nms) DosSleep(nms) - -#else - -#ifdef __APPLE__ -#define sem_t semaphore_t -#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z) -#define sem_wait(sem) (semaphore_wait(*sem) ) -#define sem_post(sem) semaphore_signal(*sem) -#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem) -#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ -#else -#include <unistd.h> -#include <sched.h> -#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ -#endif -/* Not Windows. Assume pthreads */ - -#endif - -#if ARCH_X86 || ARCH_X86_64 -#include "vpx_ports/x86.h" -#else -#define x86_pause_hint() -#endif - -#include "vpx_util/vpx_thread.h" - -static INLINE void mutex_lock(pthread_mutex_t *const mutex) { - const int kMaxTryLocks = 4000; - int locked = 0; - int i; - - for (i = 0; i < kMaxTryLocks; ++i) { - if (!pthread_mutex_trylock(mutex)) { - locked = 1; - break; - } - } - - if (!locked) - pthread_mutex_lock(mutex); -} - -static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) { - int ret; - mutex_lock(mutex); - ret = *p; - pthread_mutex_unlock(mutex); - return ret; -} - -static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col, - const int *last_row_current_mb_col, - const int nsync) { - while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) { - x86_pause_hint(); - thread_sleep(0); - } -} - -static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) { - mutex_lock(mutex); - *p = v; - pthread_mutex_unlock(mutex); -} - -#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_THREADING_H_ diff --git a/thirdparty/libvpx/vp8/common/treecoder.c b/thirdparty/libvpx/vp8/common/treecoder.c deleted file mode 100644 index d80c64bdfa..0000000000 --- a/thirdparty/libvpx/vp8/common/treecoder.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#if CONFIG_DEBUG -#include <assert.h> -#endif -#include <stdio.h> - -#include "treecoder.h" - -static void tree2tok( - struct vp8_token_struct *const p, - vp8_tree t, - int i, - int v, - int L -) -{ - v += v; - ++L; - - do - { - const vp8_tree_index j = t[i++]; - - if (j <= 0) - { - p[-j].value = v; - p[-j].Len = L; - } - else - tree2tok(p, t, j, v, L); - } - while (++v & 1); -} - -void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) -{ - tree2tok(p, t, 0, 0, 0); -} - -void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t, - int offset) -{ - tree2tok(p - offset, t, 0, 0, 0); -} - -static void branch_counts( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ] -) -{ - const int tree_len = n - 1; - int t = 0; - -#if CONFIG_DEBUG - assert(tree_len); -#endif - - do - { - branch_ct[t][0] = branch_ct[t][1] = 0; - } - while (++t < tree_len); - - t = 0; - - do - { - int L = tok[t].Len; - const int enc = tok[t].value; - const unsigned int ct = num_events[t]; - - vp8_tree_index i = 0; - - do - { - const int b = (enc >> --L) & 1; - const int j = i >> 1; -#if CONFIG_DEBUG - assert(j < tree_len && 0 <= L); -#endif - - branch_ct [j] [b] += ct; - i = tree[ i + b]; - } - while (i > 0); - -#if CONFIG_DEBUG - assert(!L); -#endif - } - while (++t < n); - -} - - -void vp8_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - vp8_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - unsigned int Pfac, - int rd -) -{ - const int tree_len = n - 1; - int t = 0; - - branch_counts(n, tok, tree, branch_ct, num_events); - - do - { - const unsigned int *const c = branch_ct[t]; - const unsigned int tot = c[0] + c[1]; - -#if CONFIG_DEBUG - assert(tot < (1 << 24)); /* no overflow below */ -#endif - - if (tot) - { - const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot; - probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */ - } - else - probs[t] = vp8_prob_half; - } - while (++t < tree_len); -} diff --git a/thirdparty/libvpx/vp8/common/treecoder.h b/thirdparty/libvpx/vp8/common/treecoder.h deleted file mode 100644 index d22b7c570c..0000000000 --- a/thirdparty/libvpx/vp8/common/treecoder.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_COMMON_TREECODER_H_ -#define VP8_COMMON_TREECODER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef unsigned char vp8bc_index_t; /* probability index */ - - -typedef unsigned char vp8_prob; - -#define vp8_prob_half ( (vp8_prob) 128) - -typedef signed char vp8_tree_index; -struct bool_coder_spec; - -typedef struct bool_coder_spec bool_coder_spec; -typedef struct bool_writer bool_writer; -typedef struct bool_reader bool_reader; - -typedef const bool_coder_spec c_bool_coder_spec; -typedef const bool_writer c_bool_writer; -typedef const bool_reader c_bool_reader; - - - -# define vp8_complement( x) (255 - x) - - -/* We build coding trees compactly in arrays. - Each node of the tree is a pair of vp8_tree_indices. - Array index often references a corresponding probability table. - Index <= 0 means done encoding/decoding and value = -Index, - Index > 0 means need another bit, specification at index. - Nonnegative indices are always even; processing begins at node 0. */ - -typedef const vp8_tree_index vp8_tree[], *vp8_tree_p; - - -typedef const struct vp8_token_struct -{ - int value; - int Len; -} vp8_token; - -/* Construct encoding array from tree. */ - -void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree); -void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree, - int offset); - - -/* Convert array of token occurrence counts into a table of probabilities - for the associated binary encoding tree. Also writes count of branches - taken for each node on the tree; this facilitiates decisions as to - probability updates. */ - -void vp8_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - vp8_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - unsigned int Pfactor, - int Round -); - -/* Variant of above using coder spec rather than hardwired 8-bit probs. */ - -void vp8bc_tree_probs_from_distribution( - int n, /* n = size of alphabet */ - vp8_token tok [ /* n */ ], - vp8_tree tree, - vp8_prob probs [ /* n-1 */ ], - unsigned int branch_ct [ /* n-1 */ ] [2], - const unsigned int num_events[ /* n */ ], - c_bool_coder_spec *s -); - - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_TREECODER_H_ diff --git a/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h b/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h deleted file mode 100644 index c4aed49897..0000000000 --- a/thirdparty/libvpx/vp8/common/vp8_entropymodedata.h +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. -*/ - -#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_ -#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/*Generated file, included by entropymode.c*/ - - -const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] = -{ - { 0, 1 }, - { 2, 2 }, - { 6, 3 }, - { 28, 5 }, - { 30, 5 }, - { 58, 6 }, - { 59, 6 }, - { 62, 6 }, - { 126, 7 }, - { 127, 7 } -}; - -const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] = -{ - { 0, 1 }, - { 4, 3 }, - { 5, 3 }, - { 6, 3 }, - { 7, 3 } -}; - -const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] = -{ - { 4, 3 }, - { 5, 3 }, - { 6, 3 }, - { 7, 3 }, - { 0, 1 } -}; - -const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] = -{ - { 0, 1 }, - { 2, 2 }, - { 6, 3 }, - { 7, 3 } -}; - -const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] = -{ - { 6, 3 }, - { 7, 3 }, - { 2, 2 }, - { 0, 1 } -}; - -const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] = -{ - { 2, 2 }, - { 6, 3 }, - { 0, 1 }, - { 14, 4 }, - { 15, 4 } -}; - -const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] = -{ - { 0, 1 }, - { 2, 2 }, - { 6, 3 }, - { 7, 3 } -}; - -const struct vp8_token_struct vp8_small_mvencodings[8] = -{ - { 0, 3 }, - { 1, 3 }, - { 2, 3 }, - { 3, 3 }, - { 4, 3 }, - { 5, 3 }, - { 6, 3 }, - { 7, 3 } -}; - -const vp8_prob vp8_ymode_prob[VP8_YMODES-1] = -{ - 112, 86, 140, 37 -}; - -const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1] = -{ - 145, 156, 163, 128 -}; - -const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES-1] = -{ - 162, 101, 204 -}; - -const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1] = -{ - 142, 114, 183 -}; - -const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES-1] = -{ - 120, 90, 79, 133, 87, 85, 80, 111, 151 -}; - - - -const vp8_prob vp8_kf_bmode_prob -[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1] = -{ - { - { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, - { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, - { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, - { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, - { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, - { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, - { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, - { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, - { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, - { 81, 40, 11, 96, 182, 84, 29, 16, 36 } - }, - { - { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, - { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, - { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, - { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, - { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, - { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, - { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, - { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, - { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, - { 66, 45, 25, 102, 197, 189, 23, 18, 22 } - }, - { - { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, - { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, - { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, - { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, - { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, - { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, - { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, - { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, - { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, - { 62, 18, 78, 95, 85, 57, 50, 48, 51 } - }, - { - { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, - { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, - { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, - { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, - { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, - { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, - { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, - { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, - { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, - { 51, 50, 17, 168, 209, 192, 23, 25, 82 } - }, - { - { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, - { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, - { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, - { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, - { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, - { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, - { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, - { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, - { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, - { 117, 20, 15, 36, 163, 128, 68, 1, 26 } - }, - { - { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, - { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, - { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, - { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, - { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, - { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, - { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, - { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, - { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, - { 87, 37, 9, 115, 59, 77, 64, 21, 47 } - }, - { - { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, - { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, - { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, - { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, - { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, - { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, - { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, - { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, - { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, - { 55, 38, 70, 124, 73, 102, 1, 34, 98 } - }, - { - { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, - { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, - { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, - { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, - { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, - { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, - { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, - { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, - { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, - { 58, 15, 20, 82, 135, 57, 26, 121, 40 } - }, - { - { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, - { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, - { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, - { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, - { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, - { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, - { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, - { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, - { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, - { 35, 27, 10, 146, 174, 171, 12, 26, 128 } - }, - { - { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, - { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, - { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, - { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, - { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, - { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, - { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, - { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, - { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, - { 112, 19, 12, 61, 195, 128, 48, 4, 24 } - } -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_VP8_ENTROPYMODEDATA_H_ diff --git a/thirdparty/libvpx/vp8/common/vp8_loopfilter.c b/thirdparty/libvpx/vp8/common/vp8_loopfilter.c deleted file mode 100644 index 756ad488f9..0000000000 --- a/thirdparty/libvpx/vp8/common/vp8_loopfilter.c +++ /dev/null @@ -1,661 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "loopfilter.h" -#include "onyxc_int.h" -#include "vpx_mem/vpx_mem.h" - - -static void lf_init_lut(loop_filter_info_n *lfi) -{ - int filt_lvl; - - for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++) - { - if (filt_lvl >= 40) - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3; - } - else if (filt_lvl >= 20) - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2; - } - else if (filt_lvl >= 15) - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1; - } - else - { - lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0; - lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0; - } - } - - lfi->mode_lf_lut[DC_PRED] = 1; - lfi->mode_lf_lut[V_PRED] = 1; - lfi->mode_lf_lut[H_PRED] = 1; - lfi->mode_lf_lut[TM_PRED] = 1; - lfi->mode_lf_lut[B_PRED] = 0; - - lfi->mode_lf_lut[ZEROMV] = 1; - lfi->mode_lf_lut[NEARESTMV] = 2; - lfi->mode_lf_lut[NEARMV] = 2; - lfi->mode_lf_lut[NEWMV] = 2; - lfi->mode_lf_lut[SPLITMV] = 3; - -} - -void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, - int sharpness_lvl) -{ - int i; - - /* For each possible value for the loop filter fill out limits */ - for (i = 0; i <= MAX_LOOP_FILTER; i++) - { - int filt_lvl = i; - int block_inside_limit = 0; - - /* Set loop filter paramaeters that control sharpness. */ - block_inside_limit = filt_lvl >> (sharpness_lvl > 0); - block_inside_limit = block_inside_limit >> (sharpness_lvl > 4); - - if (sharpness_lvl > 0) - { - if (block_inside_limit > (9 - sharpness_lvl)) - block_inside_limit = (9 - sharpness_lvl); - } - - if (block_inside_limit < 1) - block_inside_limit = 1; - - memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH); - memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH); - memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit), - SIMD_WIDTH); - } -} - -void vp8_loop_filter_init(VP8_COMMON *cm) -{ - loop_filter_info_n *lfi = &cm->lf_info; - int i; - - /* init limits for given sharpness*/ - vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); - cm->last_sharpness_level = cm->sharpness_level; - - /* init LUT for lvl and hev thr picking */ - lf_init_lut(lfi); - - /* init hev threshold const vectors */ - for(i = 0; i < 4 ; i++) - { - memset(lfi->hev_thr[i], i, SIMD_WIDTH); - } -} - -void vp8_loop_filter_frame_init(VP8_COMMON *cm, - MACROBLOCKD *mbd, - int default_filt_lvl) -{ - int seg, /* segment number */ - ref, /* index in ref_lf_deltas */ - mode; /* index in mode_lf_deltas */ - - loop_filter_info_n *lfi = &cm->lf_info; - - /* update limits if sharpness has changed */ - if(cm->last_sharpness_level != cm->sharpness_level) - { - vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); - cm->last_sharpness_level = cm->sharpness_level; - } - - for(seg = 0; seg < MAX_MB_SEGMENTS; seg++) - { - int lvl_seg = default_filt_lvl; - int lvl_ref, lvl_mode; - - /* Note the baseline filter values for each segment */ - if (mbd->segmentation_enabled) - { - /* Abs value */ - if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) - { - lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; - } - else /* Delta Value */ - { - lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; - } - lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0; - } - - if (!mbd->mode_ref_lf_delta_enabled) - { - /* we could get rid of this if we assume that deltas are set to - * zero when not in use; encoder always uses deltas - */ - memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 ); - continue; - } - - /* INTRA_FRAME */ - ref = INTRA_FRAME; - - /* Apply delta for reference frame */ - lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; - - /* Apply delta for Intra modes */ - mode = 0; /* B_PRED */ - /* Only the split mode BPRED has a further special case */ - lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; - /* clamp */ - lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; - - lfi->lvl[seg][ref][mode] = lvl_mode; - - mode = 1; /* all the rest of Intra modes */ - /* clamp */ - lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; - lfi->lvl[seg][ref][mode] = lvl_mode; - - /* LAST, GOLDEN, ALT */ - for(ref = 1; ref < MAX_REF_FRAMES; ref++) - { - /* Apply delta for reference frame */ - lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; - - /* Apply delta for Inter modes */ - for (mode = 1; mode < 4; mode++) - { - lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; - /* clamp */ - lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; - - lfi->lvl[seg][ref][mode] = lvl_mode; - } - } - } -} - - -void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr) -{ - int mb_col; - int filter_level; - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - FRAME_TYPE frame_type = cm->frame_type; - - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - -} - -void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, - int mb_row, int post_ystride, int post_uvstride, - unsigned char *y_ptr, unsigned char *u_ptr, - unsigned char *v_ptr) -{ - int mb_col; - int filter_level; - loop_filter_info_n *lfi_n = &cm->lf_info; - (void)post_uvstride; - - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post_ystride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post_ystride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (y_ptr, post_ystride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post_ystride, lfi_n->blim[filter_level]); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - -} -void vp8_loop_filter_frame(VP8_COMMON *cm, - MACROBLOCKD *mbd, - int frame_type) -{ - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - - int mb_row; - int mb_col; - int mb_rows = cm->mb_rows; - int mb_cols = cm->mb_cols; - - int filter_level; - - unsigned char *y_ptr, *u_ptr, *v_ptr; - - /* Point at base of Mb MODE_INFO list */ - const MODE_INFO *mode_info_context = cm->mi; - int post_y_stride = post->y_stride; - int post_uv_stride = post->uv_stride; - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init(cm, mbd, cm->filter_level); - - /* Set up the buffer pointers */ - y_ptr = post->y_buffer; - u_ptr = post->u_buffer; - v_ptr = post->v_buffer; - - /* vp8_filter each macro block */ - if (cm->filter_type == NORMAL_LOOPFILTER) - { - for (mb_row = 0; mb_row < mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - y_ptr += post_y_stride * 16 - post->y_width; - u_ptr += post_uv_stride * 8 - post->uv_width; - v_ptr += post_uv_stride * 8 - post->uv_width; - - mode_info_context++; /* Skip border mb */ - - } - } - else /* SIMPLE_LOOPFILTER */ - { - for (mb_row = 0; mb_row < mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - if (filter_level) - { - const unsigned char * mblim = lfi_n->mblim[filter_level]; - const unsigned char * blim = lfi_n->blim[filter_level]; - - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post_y_stride, mblim); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post_y_stride, blim); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (y_ptr, post_y_stride, mblim); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post_y_stride, blim); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mode_info_context++; /* step to next MB */ - } - y_ptr += post_y_stride * 16 - post->y_width; - u_ptr += post_uv_stride * 8 - post->uv_width; - v_ptr += post_uv_stride * 8 - post->uv_width; - - mode_info_context++; /* Skip border mb */ - - } - } -} - -void vp8_loop_filter_frame_yonly -( - VP8_COMMON *cm, - MACROBLOCKD *mbd, - int default_filt_lvl -) -{ - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - - unsigned char *y_ptr; - int mb_row; - int mb_col; - - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - - int filter_level; - FRAME_TYPE frame_type = cm->frame_type; - - /* Point at base of Mb MODE_INFO list */ - const MODE_INFO *mode_info_context = cm->mi; - -#if 0 - if(default_filt_lvl == 0) /* no filter applied */ - return; -#endif - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl); - - /* Set up the buffer pointers */ - y_ptr = post->y_buffer; - - /* vp8_filter each macro block */ - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - if (cm->filter_type == NORMAL_LOOPFILTER) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - } - else - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - } - } - - y_ptr += 16; - mode_info_context ++; /* step to next MB */ - - } - - y_ptr += post->y_stride * 16 - post->y_width; - mode_info_context ++; /* Skip border mb */ - } - -} - -void vp8_loop_filter_partial_frame -( - VP8_COMMON *cm, - MACROBLOCKD *mbd, - int default_filt_lvl -) -{ - YV12_BUFFER_CONFIG *post = cm->frame_to_show; - - unsigned char *y_ptr; - int mb_row; - int mb_col; - int mb_cols = post->y_width >> 4; - int mb_rows = post->y_height >> 4; - - int linestocopy; - - loop_filter_info_n *lfi_n = &cm->lf_info; - loop_filter_info lfi; - - int filter_level; - FRAME_TYPE frame_type = cm->frame_type; - - const MODE_INFO *mode_info_context; - -#if 0 - if(default_filt_lvl == 0) /* no filter applied */ - return; -#endif - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl); - - /* number of MB rows to use in partial filtering */ - linestocopy = mb_rows / PARTIAL_FRAME_FRACTION; - linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */ - - /* Set up the buffer pointers; partial image starts at ~middle of frame */ - y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride; - mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); - - /* vp8_filter each macro block */ - for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++) - { - for (mb_col = 0; mb_col < mb_cols; mb_col++) - { - int skip_lf = (mode_info_context->mbmi.mode != B_PRED && - mode_info_context->mbmi.mode != SPLITMV && - mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = - lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; - const int seg = mode_info_context->mbmi.segment_id; - const int ref_frame = mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if (filter_level) - { - if (cm->filter_type == NORMAL_LOOPFILTER) - { - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - vp8_loop_filter_mbh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (y_ptr, 0, 0, post->y_stride, 0, &lfi); - } - else - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - - vp8_loop_filter_simple_mbh - (y_ptr, post->y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (y_ptr, post->y_stride, lfi_n->blim[filter_level]); - } - } - - y_ptr += 16; - mode_info_context += 1; /* step to next MB */ - } - - y_ptr += post->y_stride * 16 - post->y_width; - mode_info_context += 1; /* Skip border mb */ - } -} diff --git a/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm b/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm deleted file mode 100644 index 86fae26956..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/copy_sse2.asm +++ /dev/null @@ -1,93 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_copy32xn_sse2( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp8_copy32xn_sse2) PRIVATE -sym(vp8_copy32xn_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;dst_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;dst_stride - movsxd rcx, dword ptr arg(4) ;height - -.block_copy_sse2_loopx4: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - movdqu xmm2, XMMWORD PTR [rsi + rax] - movdqu xmm3, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqu xmm4, XMMWORD PTR [rsi] - movdqu xmm5, XMMWORD PTR [rsi + 16] - movdqu xmm6, XMMWORD PTR [rsi + rax] - movdqu xmm7, XMMWORD PTR [rsi + rax + 16] - - lea rsi, [rsi+rax*2] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - movdqa XMMWORD PTR [rdi + rdx], xmm2 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 - - lea rdi, [rdi+rdx*2] - - movdqa XMMWORD PTR [rdi], xmm4 - movdqa XMMWORD PTR [rdi + 16], xmm5 - movdqa XMMWORD PTR [rdi + rdx], xmm6 - movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 - - lea rdi, [rdi+rdx*2] - - sub rcx, 4 - cmp rcx, 4 - jge .block_copy_sse2_loopx4 - - cmp rcx, 0 - je .copy_is_done - -.block_copy_sse2_loop: - movdqu xmm0, XMMWORD PTR [rsi] - movdqu xmm1, XMMWORD PTR [rsi + 16] - lea rsi, [rsi+rax] - - movdqa XMMWORD PTR [rdi], xmm0 - movdqa XMMWORD PTR [rdi + 16], xmm1 - lea rdi, [rdi+rdx] - - sub rcx, 1 - jne .block_copy_sse2_loop - -.copy_is_done: - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm b/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm deleted file mode 100644 index d789a40ccf..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/copy_sse3.asm +++ /dev/null @@ -1,146 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE_X3 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define ref_ptr rdi - %define ref_stride rdx - %define end_ptr rcx - %define ret_var rbx - %define result_ptr arg(4) - %define max_sad arg(4) - %define height dword ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - mov rsi, arg(0) ; src_ptr - mov rdi, arg(2) ; ref_ptr - - movsxd rax, dword ptr arg(1) ; src_stride - movsxd rdx, dword ptr arg(3) ; ref_stride -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define ref_ptr r8 - %define ref_stride r9 - %define end_ptr r10 - %define ret_var r11 - %define result_ptr [rsp+xmm_stack_space+8+4*8] - %define max_sad [rsp+xmm_stack_space+8+4*8] - %define height dword ptr [rsp+xmm_stack_space+8+4*8] - %else - %define src_ptr rdi - %define src_stride rsi - %define ref_ptr rdx - %define ref_stride rcx - %define end_ptr r9 - %define ret_var r10 - %define result_ptr r8 - %define max_sad r8 - %define height r8 - %endif -%endif - -%endmacro - -%macro STACK_FRAME_DESTROY_X3 0 - %define src_ptr - %define src_stride - %define ref_ptr - %define ref_stride - %define end_ptr - %define ret_var - %define result_ptr - %define max_sad - %define height - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %if LIBVPX_YASM_WIN64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - - -;void vp8_copy32xn_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *dst_ptr, -; int dst_stride, -; int height); -global sym(vp8_copy32xn_sse3) PRIVATE -sym(vp8_copy32xn_sse3): - - STACK_FRAME_CREATE_X3 - -.block_copy_sse3_loopx4: - lea end_ptr, [src_ptr+src_stride*2] - - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] - movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] - movdqu xmm4, XMMWORD PTR [end_ptr] - movdqu xmm5, XMMWORD PTR [end_ptr + 16] - movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] - movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] - - lea src_ptr, [src_ptr+src_stride*4] - - lea end_ptr, [ref_ptr+ref_stride*2] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 - movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 - movdqa XMMWORD PTR [end_ptr], xmm4 - movdqa XMMWORD PTR [end_ptr + 16], xmm5 - movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 - movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 - - lea ref_ptr, [ref_ptr+ref_stride*4] - - sub height, 4 - cmp height, 4 - jge .block_copy_sse3_loopx4 - - ;Check to see if there is more rows need to be copied. - cmp height, 0 - je .copy_is_done - -.block_copy_sse3_loop: - movdqu xmm0, XMMWORD PTR [src_ptr] - movdqu xmm1, XMMWORD PTR [src_ptr + 16] - lea src_ptr, [src_ptr+src_stride] - - movdqa XMMWORD PTR [ref_ptr], xmm0 - movdqa XMMWORD PTR [ref_ptr + 16], xmm1 - lea ref_ptr, [ref_ptr+ref_stride] - - sub height, 1 - jne .block_copy_sse3_loop - -.copy_is_done: - STACK_FRAME_DESTROY_X3 diff --git a/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm b/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm deleted file mode 100644 index 4e551f00aa..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/dequantize_mmx.asm +++ /dev/null @@ -1,258 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) -global sym(vp8_dequantize_b_impl_mmx) PRIVATE -sym(vp8_dequantize_b_impl_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;sq - mov rdi, arg(1) ;dq - mov rax, arg(2) ;q - - movq mm1, [rsi] - pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. - movq [rdi], mm1 - - movq mm1, [rsi+8] - pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. - movq [rdi+8], mm1 - - movq mm1, [rsi+16] - pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. - movq [rdi+16], mm1 - - movq mm1, [rsi+24] - pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. - movq [rdi+24], mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void dequant_idct_add_mmx( -;short *input, 0 -;short *dq, 1 -;unsigned char *dest, 2 -;int stride) 3 -global sym(vp8_dequant_idct_add_mmx) PRIVATE -sym(vp8_dequant_idct_add_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - GET_GOT rbx - push rdi - ; end prolog - - mov rax, arg(0) ;input - mov rdx, arg(1) ;dq - - - movq mm0, [rax ] - pmullw mm0, [rdx] - - movq mm1, [rax +8] - pmullw mm1, [rdx +8] - - movq mm2, [rax+16] - pmullw mm2, [rdx+16] - - movq mm3, [rax+24] - pmullw mm3, [rdx+24] - - mov rdx, arg(2) ;dest - - pxor mm7, mm7 - - - movq [rax], mm7 - movq [rax+8], mm7 - - movq [rax+16],mm7 - movq [rax+24],mm7 - - - movsxd rdi, dword ptr arg(3) ;stride - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - movq mm3, mm5 ; 33 23 13 03 - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - paddw mm0, [GLOBAL(fours)] - - paddw mm2, [GLOBAL(fours)] - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - psraw mm2, 3 - - psraw mm0, 3 - psraw mm4, 3 - - psraw mm6, 3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - pxor mm7, mm7 - - movd mm4, [rdx] - punpcklbw mm4, mm7 - paddsw mm0, mm4 - packuswb mm0, mm7 - movd [rdx], mm0 - - movd mm4, [rdx+rdi] - punpcklbw mm4, mm7 - paddsw mm1, mm4 - packuswb mm1, mm7 - movd [rdx+rdi], mm1 - - movd mm4, [rdx+2*rdi] - punpcklbw mm4, mm7 - paddsw mm2, mm4 - packuswb mm2, mm7 - movd [rdx+rdi*2], mm2 - - add rdx, rdi - - movd mm4, [rdx+2*rdi] - punpcklbw mm4, mm7 - paddsw mm5, mm4 - packuswb mm5, mm7 - movd [rdx+rdi*2], mm5 - - ; begin epilog - pop rdi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -x_s1sqr2: - times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 4 dw 0x4E7B -align 16 -fours: - times 4 dw 0x0004 diff --git a/thirdparty/libvpx/vp8/common/x86/filter_x86.c b/thirdparty/libvpx/vp8/common/x86/filter_x86.c deleted file mode 100644 index 7f496ed7db..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/filter_x86.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp8/common/x86/filter_x86.h" - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = -{ - { 128, 128, 128, 128, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 112, 112, 112, 112 } -}; - -DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) = -{ - { 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 }, - { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, - { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, - { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, - { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, - { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, - { 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 } -}; diff --git a/thirdparty/libvpx/vp8/common/x86/filter_x86.h b/thirdparty/libvpx/vp8/common/x86/filter_x86.h deleted file mode 100644 index d282841bee..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/filter_x86.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_COMMON_X86_FILTER_X86_H_ -#define VP8_COMMON_X86_FILTER_X86_H_ - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with - * duplicated values */ - -/* duplicated 4x */ -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]); - -/* duplicated 8x */ -extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_COMMON_X86_FILTER_X86_H_ diff --git a/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c b/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c deleted file mode 100644 index f2532b34da..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idct_blk_mmx.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vp8/common/blockd.h" -#include "vpx_mem/vpx_mem.h" - -extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); - -void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) -{ - short *sq = (short *) d->qcoeff; - short *dq = (short *) d->dqcoeff; - - vp8_dequantize_b_impl_mmx(sq, dq, DQC); -} - -void vp8_dequant_idct_add_y_block_mmx - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dst, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride, - dst+4, stride); - memset(q + 16, 0, 2 * sizeof(q[0])); - } - - if (eobs[2] > 1) - vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride); - else if (eobs[2] == 1) - { - vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride, - dst+8, stride); - memset(q + 32, 0, 2 * sizeof(q[0])); - } - - if (eobs[3] > 1) - vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride); - else if (eobs[3] == 1) - { - vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride, - dst+12, stride); - memset(q + 48, 0, 2 * sizeof(q[0])); - } - - q += 64; - dst += 4*stride; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_mmx - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dstu, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride, - dstu+4, stride); - memset(q + 16, 0, 2 * sizeof(q[0])); - } - - q += 32; - dstu += 4*stride; - eobs += 2; - } - - for (i = 0; i < 2; i++) - { - if (eobs[0] > 1) - vp8_dequant_idct_add_mmx (q, dq, dstv, stride); - else if (eobs[0] == 1) - { - vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride); - memset(q, 0, 2 * sizeof(q[0])); - } - - if (eobs[1] > 1) - vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride); - else if (eobs[1] == 1) - { - vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride, - dstv+4, stride); - memset(q + 16, 0, 2 * sizeof(q[0])); - } - - q += 32; - dstv += 4*stride; - eobs += 2; - } -} diff --git a/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c b/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c deleted file mode 100644 index ae96ec858c..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idct_blk_sse2.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_config.h" -#include "vp8_rtcd.h" - -void vp8_idct_dequant_0_2x_sse2 - (short *q, short *dq , - unsigned char *dst, int dst_stride); -void vp8_idct_dequant_full_2x_sse2 - (short *q, short *dq , - unsigned char *dst, int dst_stride); - -void vp8_dequant_idct_add_y_block_sse2 - (short *q, short *dq, - unsigned char *dst, int stride, char *eobs) -{ - int i; - - for (i = 0; i < 4; i++) - { - if (((short *)(eobs))[0]) - { - if (((short *)(eobs))[0] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride); - } - if (((short *)(eobs))[1]) - { - if (((short *)(eobs))[1] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride); - else - vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride); - } - q += 64; - dst += stride*4; - eobs += 4; - } -} - -void vp8_dequant_idct_add_uv_block_sse2 - (short *q, short *dq, - unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) -{ - if (((short *)(eobs))[0]) - { - if (((short *)(eobs))[0] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); - } - q += 32; - dstu += stride*4; - - if (((short *)(eobs))[1]) - { - if (((short *)(eobs))[1] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride); - } - q += 32; - - if (((short *)(eobs))[2]) - { - if (((short *)(eobs))[2] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); - } - q += 32; - dstv += stride*4; - - if (((short *)(eobs))[3]) - { - if (((short *)(eobs))[3] & 0xfefe) - vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride); - else - vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride); - } -} diff --git a/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm b/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm deleted file mode 100644 index 96fa2c60d0..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idctllm_mmx.asm +++ /dev/null @@ -1,295 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -; /**************************************************************************** -; * Notes: -; * -; * This implementation makes use of 16 bit fixed point version of two multiply -; * constants: -; * 1. sqrt(2) * cos (pi/8) -; * 2. sqrt(2) * sin (pi/8) -; * Because the first constant is bigger than 1, to maintain the same 16 bit -; * fixed point precision as the second one, we use a trick of -; * x * a = x + x*(a-1) -; * so -; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). -; * -; * For the second constant, because of the 16bit version is 35468, which -; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative -; * number. -; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x -; * -; **************************************************************************/ - - -;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, -;int pitch, unsigned char *dest,int stride) -global sym(vp8_short_idct4x4llm_mmx) PRIVATE -sym(vp8_short_idct4x4llm_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(0) ;input - mov rsi, arg(1) ;pred - - movq mm0, [rax ] - movq mm1, [rax+ 8] - movq mm2, [rax+16] - movq mm3, [rax+24] - -%if 0 - pxor mm7, mm7 - movq [rax], mm7 - movq [rax+8], mm7 - movq [rax+16],mm7 - movq [rax+24],mm7 -%endif - movsxd rax, dword ptr arg(2) ;pitch - mov rdx, arg(3) ;dest - movsxd rdi, dword ptr arg(4) ;stride - - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - movq mm3, mm5 ; 33 23 13 03 - - psubw mm0, mm2 ; b1= 0-2 - paddw mm2, mm2 ; - - movq mm5, mm1 - paddw mm2, mm0 ; a1 =0+2 - - pmulhw mm5, [GLOBAL(x_s1sqr2)]; - paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) - - movq mm7, mm3 ; - pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; - - paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw mm7, mm5 ; c1 - - movq mm5, mm1 - movq mm4, mm3 - - pmulhw mm5, [GLOBAL(x_c1sqr2less1)] - paddw mm5, mm1 - - pmulhw mm3, [GLOBAL(x_s1sqr2)] - paddw mm3, mm4 - - paddw mm3, mm5 ; d1 - paddw mm0, [GLOBAL(fours)] - - paddw mm2, [GLOBAL(fours)] - movq mm6, mm2 ; a1 - - movq mm4, mm0 ; b1 - paddw mm2, mm3 ;0 - - paddw mm4, mm7 ;1 - psubw mm0, mm7 ;2 - - psubw mm6, mm3 ;3 - psraw mm2, 3 - - psraw mm0, 3 - psraw mm4, 3 - - psraw mm6, 3 - - movq mm1, mm2 ; 03 02 01 00 - movq mm3, mm4 ; 23 22 21 20 - - punpcklwd mm1, mm0 ; 11 01 10 00 - punpckhwd mm2, mm0 ; 13 03 12 02 - - punpcklwd mm3, mm6 ; 31 21 30 20 - punpckhwd mm4, mm6 ; 33 23 32 22 - - movq mm0, mm1 ; 11 01 10 00 - movq mm5, mm2 ; 13 03 12 02 - - punpckldq mm0, mm3 ; 30 20 10 00 - punpckhdq mm1, mm3 ; 31 21 11 01 - - punpckldq mm2, mm4 ; 32 22 12 02 - punpckhdq mm5, mm4 ; 33 23 13 03 - - pxor mm7, mm7 - - movd mm4, [rsi] - punpcklbw mm4, mm7 - paddsw mm0, mm4 - packuswb mm0, mm7 - movd [rdx], mm0 - - movd mm4, [rsi+rax] - punpcklbw mm4, mm7 - paddsw mm1, mm4 - packuswb mm1, mm7 - movd [rdx+rdi], mm1 - - movd mm4, [rsi+2*rax] - punpcklbw mm4, mm7 - paddsw mm2, mm4 - packuswb mm2, mm7 - movd [rdx+rdi*2], mm2 - - add rdx, rdi - add rsi, rax - - movd mm4, [rsi+2*rax] - punpcklbw mm4, mm7 - paddsw mm5, mm4 - packuswb mm5, mm7 - movd [rdx+rdi*2], mm5 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_dc_only_idct_add_mmx( -;short input_dc, -;unsigned char *pred_ptr, -;int pred_stride, -;unsigned char *dst_ptr, -;int stride) -global sym(vp8_dc_only_idct_add_mmx) PRIVATE -sym(vp8_dc_only_idct_add_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - ; end prolog - - movd mm5, arg(0) ;input_dc - mov rax, arg(1) ;pred_ptr - movsxd rdx, dword ptr arg(2) ;pred_stride - - pxor mm0, mm0 - - paddw mm5, [GLOBAL(fours)] - lea rcx, [rdx + rdx*2] - - psraw mm5, 3 - - punpcklwd mm5, mm5 - - punpckldq mm5, mm5 - - movd mm1, [rax] - movd mm2, [rax+rdx] - movd mm3, [rax+2*rdx] - movd mm4, [rax+rcx] - - mov rax, arg(3) ;d -- destination - movsxd rdx, dword ptr arg(4) ;dst_stride - - punpcklbw mm1, mm0 - paddsw mm1, mm5 - packuswb mm1, mm0 ; pack and unpack to saturate - lea rcx, [rdx + rdx*2] - - punpcklbw mm2, mm0 - paddsw mm2, mm5 - packuswb mm2, mm0 ; pack and unpack to saturate - - punpcklbw mm3, mm0 - paddsw mm3, mm5 - packuswb mm3, mm0 ; pack and unpack to saturate - - punpcklbw mm4, mm0 - paddsw mm4, mm5 - packuswb mm4, mm0 ; pack and unpack to saturate - - movd [rax], mm1 - movd [rax+rdx], mm2 - movd [rax+2*rdx], mm3 - movd [rax+rcx], mm4 - - ; begin epilog - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -x_s1sqr2: - times 4 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 4 dw 0x4E7B -align 16 -fours: - times 4 dw 0x0004 diff --git a/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm b/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm deleted file mode 100644 index bf8e2c4021..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/idctllm_sse2.asm +++ /dev/null @@ -1,708 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_idct_dequant_0_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; ) - -global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE -sym(vp8_idct_dequant_0_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - GET_GOT rbx - ; end prolog - - mov rdx, arg(1) ; dequant - mov rax, arg(0) ; qcoeff - - movd xmm4, [rax] - movd xmm5, [rdx] - - pinsrw xmm4, [rax+32], 4 - pinsrw xmm5, [rdx], 4 - - pmullw xmm4, xmm5 - - ; Zero out xmm5, for use unpacking - pxor xmm5, xmm5 - - ; clear coeffs - movd [rax], xmm5 - movd [rax+32], xmm5 -;pshufb - mov rax, arg(2) ; dst - movsxd rdx, dword ptr arg(3) ; dst_stride - - pshuflw xmm4, xmm4, 00000000b - pshufhw xmm4, xmm4, 00000000b - - lea rcx, [rdx + rdx*2] - paddw xmm4, [GLOBAL(fours)] - - psraw xmm4, 3 - - movq xmm0, [rax] - movq xmm1, [rax+rdx] - movq xmm2, [rax+2*rdx] - movq xmm3, [rax+rcx] - - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - - - ; Add to predict buffer - paddw xmm0, xmm4 - paddw xmm1, xmm4 - paddw xmm2, xmm4 - paddw xmm3, xmm4 - - ; pack up before storing - packuswb xmm0, xmm5 - packuswb xmm1, xmm5 - packuswb xmm2, xmm5 - packuswb xmm3, xmm5 - - ; store blocks back out - movq [rax], xmm0 - movq [rax + rdx], xmm1 - - lea rax, [rax + 2*rdx] - - movq [rax], xmm2 - movq [rax + rdx], xmm3 - - ; begin epilog - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_idct_dequant_full_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; ) -global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE -sym(vp8_idct_dequant_full_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rdx, arg(1) ; dequant - mov rdi, arg(2) ; dst - - - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - - - ; note the transpose of xmm1 and xmm2, necessary for shuffle - ; to spit out sensicle data - movdqa xmm0, [rax] - movdqa xmm2, [rax+16] - movdqa xmm1, [rax+32] - movdqa xmm3, [rax+48] - - ; Clear out coeffs - movdqa [rax], xmm7 - movdqa [rax+16], xmm7 - movdqa [rax+32], xmm7 - movdqa [rax+48], xmm7 - - ; dequantize qcoeff buffer - pmullw xmm0, [rdx] - pmullw xmm2, [rdx+16] - pmullw xmm1, [rdx] - pmullw xmm3, [rdx+16] - movsxd rdx, dword ptr arg(3) ; dst_stride - - ; repack so block 0 row x and block 1 row x are together - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - punpckhdq xmm4, xmm1 - - pshufd xmm0, xmm0, 11011000b - pshufd xmm1, xmm4, 11011000b - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - pshufd xmm2, xmm2, 11011000b - pshufd xmm3, xmm4, 11011000b - - ; first pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 ; - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - lea rcx, [rdx + rdx*2] ;dst_stride * 3 - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - - ; transpose for the second pass - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - ; second pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - paddw xmm0, [GLOBAL(fours)] - - paddw xmm2, [GLOBAL(fours)] - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - psraw xmm2, 3 - - psraw xmm0, 3 - psraw xmm4, 3 - - psraw xmm6, 3 - - ; transpose to save - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - pxor xmm7, xmm7 - - ; Load up predict blocks - movq xmm4, [rdi] - movq xmm5, [rdi+rdx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - - movq xmm4, [rdi+2*rdx] - movq xmm5, [rdi+rcx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm2, xmm4 - paddw xmm3, xmm5 - -.finish: - - ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - movq [rdi + rdx*2], xmm2 - movq [rdi + rcx], xmm3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_idct_dequant_dc_0_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; short *dc - 4 -; ) -global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE -sym(vp8_idct_dequant_dc_0_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - - mov rdi, arg(2) ; dst - mov rdx, arg(4) ; dc - - ; Zero out xmm5, for use unpacking - pxor xmm5, xmm5 - - ; load up 2 dc words here == 2*16 = doubleword - movd xmm4, [rdx] - - movsxd rdx, dword ptr arg(3) ; dst_stride - lea rcx, [rdx + rdx*2] - ; Load up predict blocks - movq xmm0, [rdi] - movq xmm1, [rdi+rdx*1] - movq xmm2, [rdi+rdx*2] - movq xmm3, [rdi+rcx] - - ; Duplicate and expand dc across - punpcklwd xmm4, xmm4 - punpckldq xmm4, xmm4 - - ; Rounding to dequant and downshift - paddw xmm4, [GLOBAL(fours)] - psraw xmm4, 3 - - ; Predict buffer needs to be expanded from bytes to words - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - - ; Add to predict buffer - paddw xmm0, xmm4 - paddw xmm1, xmm4 - paddw xmm2, xmm4 - paddw xmm3, xmm4 - - ; pack up before storing - packuswb xmm0, xmm5 - packuswb xmm1, xmm5 - packuswb xmm2, xmm5 - packuswb xmm3, xmm5 - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - movq [rdi + rdx*2], xmm2 - movq [rdi + rcx], xmm3 - - ; begin epilog - pop rdi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret -;void vp8_idct_dequant_dc_full_2x_sse2 -; ( -; short *qcoeff - 0 -; short *dequant - 1 -; unsigned char *dst - 2 -; int dst_stride - 3 -; short *dc - 4 -; ) -global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE -sym(vp8_idct_dequant_dc_full_2x_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rdi - ; end prolog - - ; special case when 2 blocks have 0 or 1 coeffs - ; dc is set as first coeff, so no need to load qcoeff - mov rax, arg(0) ; qcoeff - mov rdx, arg(1) ; dequant - - mov rdi, arg(2) ; dst - - ; Zero out xmm7, for use unpacking - pxor xmm7, xmm7 - - - ; note the transpose of xmm1 and xmm2, necessary for shuffle - ; to spit out sensicle data - movdqa xmm0, [rax] - movdqa xmm2, [rax+16] - movdqa xmm1, [rax+32] - movdqa xmm3, [rax+48] - - ; Clear out coeffs - movdqa [rax], xmm7 - movdqa [rax+16], xmm7 - movdqa [rax+32], xmm7 - movdqa [rax+48], xmm7 - - ; dequantize qcoeff buffer - pmullw xmm0, [rdx] - pmullw xmm2, [rdx+16] - pmullw xmm1, [rdx] - pmullw xmm3, [rdx+16] - - ; DC component - mov rdx, arg(4) - - ; repack so block 0 row x and block 1 row x are together - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - punpckhdq xmm4, xmm1 - - pshufd xmm0, xmm0, 11011000b - pshufd xmm1, xmm4, 11011000b - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - pshufd xmm2, xmm2, 11011000b - pshufd xmm3, xmm4, 11011000b - - ; insert DC component - pinsrw xmm0, [rdx], 0 - pinsrw xmm0, [rdx+2], 4 - - ; first pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 ; - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - - ; transpose for the second pass - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - ; second pass - psubw xmm0, xmm2 ; b1 = 0-2 - paddw xmm2, xmm2 - - movdqa xmm5, xmm1 - paddw xmm2, xmm0 ; a1 = 0+2 - - pmulhw xmm5, [GLOBAL(x_s1sqr2)] - paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) - - movdqa xmm7, xmm3 - pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] - - paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) - psubw xmm7, xmm5 ; c1 - - movdqa xmm5, xmm1 - movdqa xmm4, xmm3 - - pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] - paddw xmm5, xmm1 - - pmulhw xmm3, [GLOBAL(x_s1sqr2)] - paddw xmm3, xmm4 - - paddw xmm3, xmm5 ; d1 - paddw xmm0, [GLOBAL(fours)] - - paddw xmm2, [GLOBAL(fours)] - movdqa xmm6, xmm2 ; a1 - - movdqa xmm4, xmm0 ; b1 - paddw xmm2, xmm3 ;0 - - paddw xmm4, xmm7 ;1 - psubw xmm0, xmm7 ;2 - - psubw xmm6, xmm3 ;3 - psraw xmm2, 3 - - psraw xmm0, 3 - psraw xmm4, 3 - - psraw xmm6, 3 - - ; transpose to save - movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 - punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 - punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 - - movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 - punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 - punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 - - - movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 - punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 - punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 - - movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 - punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 - punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 - - - movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 - punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 - punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 - - movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 - punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 - punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 - - pshufd xmm0, xmm2, 11011000b - pshufd xmm2, xmm1, 11011000b - - pshufd xmm1, xmm5, 11011000b - pshufd xmm3, xmm7, 11011000b - - pxor xmm7, xmm7 - - ; Load up predict blocks - movsxd rdx, dword ptr arg(3) ; dst_stride - movq xmm4, [rdi] - movq xmm5, [rdi+rdx] - lea rcx, [rdx + rdx*2] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - - movq xmm4, [rdi+rdx*2] - movq xmm5, [rdi+rcx] - - punpcklbw xmm4, xmm7 - punpcklbw xmm5, xmm7 - - paddw xmm2, xmm4 - paddw xmm3, xmm5 - -.finish: - - ; pack up before storing - packuswb xmm0, xmm7 - packuswb xmm1, xmm7 - packuswb xmm2, xmm7 - packuswb xmm3, xmm7 - - ; Load destination stride before writing out, - ; doesn't need to persist - movsxd rdx, dword ptr arg(3) ; dst_stride - - ; store blocks back out - movq [rdi], xmm0 - movq [rdi + rdx], xmm1 - - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm2 - movq [rdi + rdx], xmm3 - - - ; begin epilog - pop rdi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -fours: - times 8 dw 0x0004 -align 16 -x_s1sqr2: - times 8 dw 0x8A8C -align 16 -x_c1sqr2less1: - times 8 dw 0x4E7B diff --git a/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm b/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm deleted file mode 100644 index 158c3b7458..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/iwalsh_mmx.asm +++ /dev/null @@ -1,140 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE -sym(vp8_short_inv_walsh4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - ; end prolog - - mov rdx, arg(0) - mov rax, 30003h - - movq mm0, [rdx + 0] ;ip[0] - movq mm1, [rdx + 8] ;ip[4] - movq mm7, rax - - movq mm2, [rdx + 16] ;ip[8] - movq mm3, [rdx + 24] ;ip[12] - punpcklwd mm7, mm7 ;0003000300030003h - mov rdx, arg(1) - - movq mm4, mm0 - movq mm5, mm1 - - paddw mm4, mm3 ;ip[0] + ip[12] aka al - paddw mm5, mm2 ;ip[4] + ip[8] aka bl - - movq mm6, mm4 ;temp al - paddw mm4, mm5 ;al + bl - psubw mm6, mm5 ;al - bl - - psubw mm0, mm3 ;ip[0] - ip[12] aka d1 - psubw mm1, mm2 ;ip[4] - ip[8] aka c1 - - movq mm5, mm0 ;temp dl - paddw mm0, mm1 ;dl + cl - psubw mm5, mm1 ;dl - cl - - ; 03 02 01 00 - ; 13 12 11 10 - ; 23 22 21 20 - ; 33 32 31 30 - - movq mm3, mm4 ; 03 02 01 00 - punpcklwd mm4, mm0 ; 11 01 10 00 - punpckhwd mm3, mm0 ; 13 03 12 02 - - movq mm1, mm6 ; 23 22 21 20 - punpcklwd mm6, mm5 ; 31 21 30 20 - punpckhwd mm1, mm5 ; 33 23 32 22 - - movq mm0, mm4 ; 11 01 10 00 - movq mm2, mm3 ; 13 03 12 02 - - punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] - punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] - - punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] - punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] -;~~~~~~~~~~~~~~~~~~~~~ - movq mm1, mm0 - movq mm5, mm4 - paddw mm1, mm3 ;ip[0] + ip[12] aka al - paddw mm5, mm2 ;ip[4] + ip[8] aka bl - - movq mm6, mm1 ;temp al - paddw mm1, mm5 ;al + bl - psubw mm6, mm5 ;al - bl - paddw mm1, mm7 - paddw mm6, mm7 - psraw mm1, 3 - psraw mm6, 3 - - psubw mm0, mm3 ;ip[0] - ip[12] aka d1 - psubw mm4, mm2 ;ip[4] - ip[8] aka c1 - - movq mm5, mm0 ;temp dl - paddw mm0, mm4 ;dl + cl - psubw mm5, mm4 ;dl - cl - paddw mm0, mm7 - paddw mm5, mm7 - psraw mm0, 3 - psraw mm5, 3 -;~~~~~~~~~~~~~~~~~~~~~ - - movd eax, mm1 - movd ecx, mm0 - psrlq mm0, 32 - psrlq mm1, 32 - mov word ptr[rdx+32*0], ax - mov word ptr[rdx+32*1], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*4], ax - mov word ptr[rdx+32*5], cx - movd eax, mm1 - movd ecx, mm0 - mov word ptr[rdx+32*8], ax - mov word ptr[rdx+32*9], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*12], ax - mov word ptr[rdx+32*13], cx - - movd eax, mm6 - movd ecx, mm5 - psrlq mm5, 32 - psrlq mm6, 32 - mov word ptr[rdx+32*2], ax - mov word ptr[rdx+32*3], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*6], ax - mov word ptr[rdx+32*7], cx - movd eax, mm6 - movd ecx, mm5 - mov word ptr[rdx+32*10], ax - mov word ptr[rdx+32*11], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*14], ax - mov word ptr[rdx+32*15], cx - - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - diff --git a/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm b/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm deleted file mode 100644 index 06e86a80b6..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/iwalsh_sse2.asm +++ /dev/null @@ -1,121 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void vp8_short_inv_walsh4x4_sse2(short *input, short *output) -global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE -sym(vp8_short_inv_walsh4x4_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - ; end prolog - - mov rcx, arg(0) - mov rdx, arg(1) - mov rax, 30003h - - movdqa xmm0, [rcx + 0] ;ip[4] ip[0] - movdqa xmm1, [rcx + 16] ;ip[12] ip[8] - - - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm0 ;ip[4] ip[0] - - paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 - - movdqa xmm4, xmm0 - punpcklqdq xmm0, xmm3 ;d1 a1 - punpckhqdq xmm4, xmm3 ;c1 b1 - - movdqa xmm1, xmm4 ;c1 b1 - paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] - - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; 13 12 11 10 03 02 01 00 - ; - ; 33 32 31 30 23 22 21 20 - ; - movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 - punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 - punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 - movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 - punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - movd xmm0, eax - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm4 ;ip[4] ip[0] - - pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03 - - paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 - - movdqa xmm5, xmm4 - punpcklqdq xmm4, xmm3 ;d1 a1 - punpckhqdq xmm5, xmm3 ;c1 b1 - - movdqa xmm1, xmm5 ;c1 b1 - paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] - - paddw xmm5, xmm0 - paddw xmm4, xmm0 - psraw xmm5, 3 - psraw xmm4, 3 - - movd eax, xmm5 - movd ecx, xmm4 - psrldq xmm5, 4 - psrldq xmm4, 4 - mov word ptr[rdx+32*0], ax - mov word ptr[rdx+32*2], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*4], ax - mov word ptr[rdx+32*6], cx - movd eax, xmm5 - movd ecx, xmm4 - psrldq xmm5, 4 - psrldq xmm4, 4 - mov word ptr[rdx+32*8], ax - mov word ptr[rdx+32*10], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*12], ax - mov word ptr[rdx+32*14], cx - - movd eax, xmm5 - movd ecx, xmm4 - psrldq xmm5, 4 - psrldq xmm4, 4 - mov word ptr[rdx+32*1], ax - mov word ptr[rdx+32*3], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*5], ax - mov word ptr[rdx+32*7], cx - movd eax, xmm5 - movd ecx, xmm4 - mov word ptr[rdx+32*9], ax - mov word ptr[rdx+32*11], cx - shr eax, 16 - shr ecx, 16 - mov word ptr[rdx+32*13], ax - mov word ptr[rdx+32*15], cx - - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm b/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm deleted file mode 100644 index 6d5aaa19db..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/loopfilter_block_sse2_x86_64.asm +++ /dev/null @@ -1,815 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%macro LF_ABS 2 - ; %1 value not preserved - ; %2 value preserved - ; output in %1 - movdqa scratch1, %2 ; v2 - - psubusb scratch1, %1 ; v2 - v1 - psubusb %1, %2 ; v1 - v2 - por %1, scratch1 ; abs(v2 - v1) -%endmacro - -%macro LF_FILTER_HEV_MASK 8-9 - - LF_ABS %1, %2 ; abs(p3 - p2) - LF_ABS %2, %3 ; abs(p2 - p1) - pmaxub %1, %2 ; accumulate mask -%if %0 == 8 - movdqa scratch2, %3 ; save p1 - LF_ABS scratch2, %4 ; abs(p1 - p0) -%endif - LF_ABS %4, %5 ; abs(p0 - q0) - LF_ABS %5, %6 ; abs(q0 - q1) -%if %0 == 8 - pmaxub %5, scratch2 ; accumulate hev -%else - pmaxub %5, %9 -%endif - pmaxub %1, %5 ; accumulate mask - - LF_ABS %3, %6 ; abs(p1 - q1) - LF_ABS %6, %7 ; abs(q1 - q2) - pmaxub %1, %6 ; accumulate mask - LF_ABS %7, %8 ; abs(q2 - q3) - pmaxub %1, %7 ; accumulate mask - - paddusb %4, %4 ; 2 * abs(p0 - q0) - pand %3, [GLOBAL(tfe)] - psrlw %3, 1 ; abs(p1 - q1) / 2 - paddusb %4, %3 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - - psubusb %1, [limit] - psubusb %4, [blimit] - por %1, %4 - pcmpeqb %1, zero ; mask - - psubusb %5, [thresh] - pcmpeqb %5, zero ; ~hev -%endmacro - -%macro LF_FILTER 6 - ; %1-%4: p1-q1 - ; %5: mask - ; %6: hev - - movdqa scratch2, %6 ; save hev - - pxor %1, [GLOBAL(t80)] ; ps1 - pxor %4, [GLOBAL(t80)] ; qs1 - movdqa scratch1, %1 - psubsb scratch1, %4 ; signed_char_clamp(ps1 - qs1) - pandn scratch2, scratch1 ; vp8_filter &= hev - - pxor %2, [GLOBAL(t80)] ; ps0 - pxor %3, [GLOBAL(t80)] ; qs0 - movdqa scratch1, %3 - psubsb scratch1, %2 ; qs0 - ps0 - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) - pand %5, scratch2 ; &= mask - - movdqa scratch2, %5 - paddsb %5, [GLOBAL(t4)] ; Filter1 - paddsb scratch2, [GLOBAL(t3)] ; Filter2 - - ; Filter1 >> 3 - movdqa scratch1, zero - pcmpgtb scratch1, %5 - psrlw %5, 3 - pand scratch1, [GLOBAL(te0)] - pand %5, [GLOBAL(t1f)] - por %5, scratch1 - - psubsb %3, %5 ; qs0 - Filter1 - pxor %3, [GLOBAL(t80)] - - ; Filter2 >> 3 - movdqa scratch1, zero - pcmpgtb scratch1, scratch2 - psrlw scratch2, 3 - pand scratch1, [GLOBAL(te0)] - pand scratch2, [GLOBAL(t1f)] - por scratch2, scratch1 - - paddsb %2, scratch2 ; ps0 + Filter2 - pxor %2, [GLOBAL(t80)] - - ; outer tap adjustments - paddsb %5, [GLOBAL(t1)] - movdqa scratch1, zero - pcmpgtb scratch1, %5 - psrlw %5, 1 - pand scratch1, [GLOBAL(t80)] - pand %5, [GLOBAL(t7f)] - por %5, scratch1 - pand %5, %6 ; vp8_filter &= ~hev - - psubsb %4, %5 ; qs1 - vp8_filter - pxor %4, [GLOBAL(t80)] - - paddsb %1, %5 ; ps1 + vp8_filter - pxor %1, [GLOBAL(t80)] -%endmacro - -;void vp8_loop_filter_bh_y_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh -;) -global sym(vp8_loop_filter_bh_y_sse2) PRIVATE -sym(vp8_loop_filter_bh_y_sse2): - -%if LIBVPX_YASM_WIN64 - %define src rcx ; src_ptr - %define stride rdx ; src_pixel_step - %define blimit r8 - %define limit r9 - %define thresh r10 - - %define spp rax - %define stride3 r11 - %define stride5 r12 - %define stride7 r13 - - push rbp - mov rbp, rsp - SAVE_XMM 11 - push r12 - push r13 - mov thresh, arg(4) -%else - %define src rdi ; src_ptr - %define stride rsi ; src_pixel_step - %define blimit rdx - %define limit rcx - %define thresh r8 - - %define spp rax - %define stride3 r9 - %define stride5 r10 - %define stride7 r11 -%endif - - %define scratch1 xmm5 - %define scratch2 xmm6 - %define zero xmm7 - - %define i0 [src] - %define i1 [spp] - %define i2 [src + 2 * stride] - %define i3 [spp + 2 * stride] - %define i4 [src + 4 * stride] - %define i5 [spp + 4 * stride] - %define i6 [src + 2 * stride3] - %define i7 [spp + 2 * stride3] - %define i8 [src + 8 * stride] - %define i9 [spp + 8 * stride] - %define i10 [src + 2 * stride5] - %define i11 [spp + 2 * stride5] - %define i12 [src + 4 * stride3] - %define i13 [spp + 4 * stride3] - %define i14 [src + 2 * stride7] - %define i15 [spp + 2 * stride7] - - ; prep work - lea spp, [src + stride] - lea stride3, [stride + 2 * stride] - lea stride5, [stride3 + 2 * stride] - lea stride7, [stride3 + 4 * stride] - pxor zero, zero - - ; load the first set into registers - movdqa xmm0, i0 - movdqa xmm1, i1 - movdqa xmm2, i2 - movdqa xmm3, i3 - movdqa xmm4, i4 - movdqa xmm8, i5 - movdqa xmm9, i6 ; q2, will contain abs(p1-p0) - movdqa xmm10, i7 -LF_FILTER_HEV_MASK xmm0, xmm1, xmm2, xmm3, xmm4, xmm8, xmm9, xmm10 - - movdqa xmm1, i2 - movdqa xmm2, i3 - movdqa xmm3, i4 - movdqa xmm8, i5 -LF_FILTER xmm1, xmm2, xmm3, xmm8, xmm0, xmm4 - movdqa i2, xmm1 - movdqa i3, xmm2 - -; second set - movdqa i4, xmm3 - movdqa i5, xmm8 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm2, i8 - movdqa xmm4, i9 - movdqa xmm10, i10 ; q2, will contain abs(p1-p0) - movdqa xmm11, i11 -LF_FILTER_HEV_MASK xmm3, xmm8, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm9 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm4, i8 - movdqa xmm8, i9 -LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 - movdqa i6, xmm0 - movdqa i7, xmm1 - -; last set - movdqa i8, xmm4 - movdqa i9, xmm8 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm2, i12 - movdqa xmm3, i13 - movdqa xmm9, i14 ; q2, will contain abs(p1-p0) - movdqa xmm11, i15 -LF_FILTER_HEV_MASK xmm4, xmm8, xmm0, xmm1, xmm2, xmm3, xmm9, xmm11, xmm10 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm3, i12 - movdqa xmm8, i13 -LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 - movdqa i10, xmm0 - movdqa i11, xmm1 - movdqa i12, xmm3 - movdqa i13, xmm8 - -%if LIBVPX_YASM_WIN64 - pop r13 - pop r12 - RESTORE_XMM - pop rbp -%endif - - ret - - -;void vp8_loop_filter_bv_y_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh -;) - -global sym(vp8_loop_filter_bv_y_sse2) PRIVATE -sym(vp8_loop_filter_bv_y_sse2): - -%if LIBVPX_YASM_WIN64 - %define src rcx ; src_ptr - %define stride rdx ; src_pixel_step - %define blimit r8 - %define limit r9 - %define thresh r10 - - %define spp rax - %define stride3 r11 - %define stride5 r12 - %define stride7 r13 - - push rbp - mov rbp, rsp - SAVE_XMM 15 - push r12 - push r13 - mov thresh, arg(4) -%else - %define src rdi - %define stride rsi - %define blimit rdx - %define limit rcx - %define thresh r8 - - %define spp rax - %define stride3 r9 - %define stride5 r10 - %define stride7 r11 -%endif - - %define scratch1 xmm5 - %define scratch2 xmm6 - %define zero xmm7 - - %define s0 [src] - %define s1 [spp] - %define s2 [src + 2 * stride] - %define s3 [spp + 2 * stride] - %define s4 [src + 4 * stride] - %define s5 [spp + 4 * stride] - %define s6 [src + 2 * stride3] - %define s7 [spp + 2 * stride3] - %define s8 [src + 8 * stride] - %define s9 [spp + 8 * stride] - %define s10 [src + 2 * stride5] - %define s11 [spp + 2 * stride5] - %define s12 [src + 4 * stride3] - %define s13 [spp + 4 * stride3] - %define s14 [src + 2 * stride7] - %define s15 [spp + 2 * stride7] - - %define i0 [rsp] - %define i1 [rsp + 16] - %define i2 [rsp + 32] - %define i3 [rsp + 48] - %define i4 [rsp + 64] - %define i5 [rsp + 80] - %define i6 [rsp + 96] - %define i7 [rsp + 112] - %define i8 [rsp + 128] - %define i9 [rsp + 144] - %define i10 [rsp + 160] - %define i11 [rsp + 176] - %define i12 [rsp + 192] - %define i13 [rsp + 208] - %define i14 [rsp + 224] - %define i15 [rsp + 240] - - ALIGN_STACK 16, rax - - ; reserve stack space - %define temp_storage 0 ; size is 256 (16*16) - %define stack_size 256 - sub rsp, stack_size - - ; prep work - lea spp, [src + stride] - lea stride3, [stride + 2 * stride] - lea stride5, [stride3 + 2 * stride] - lea stride7, [stride3 + 4 * stride] - - ; 8-f - movdqa xmm0, s8 - movdqa xmm1, xmm0 - punpcklbw xmm0, s9 ; 80 90 - punpckhbw xmm1, s9 ; 88 98 - - movdqa xmm2, s10 - movdqa xmm3, xmm2 - punpcklbw xmm2, s11 ; a0 b0 - punpckhbw xmm3, s11 ; a8 b8 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 80 90 a0 b0 - punpckhwd xmm4, xmm2 ; 84 94 a4 b4 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 88 98 a8 b8 - punpckhwd xmm2, xmm3 ; 8c 9c ac bc - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, s12 - movdqa xmm5, xmm3 - punpcklbw xmm3, s13 ; c0 d0 - punpckhbw xmm5, s13 ; c8 d8 - - movdqa xmm6, s14 - movdqa xmm7, xmm6 - punpcklbw xmm6, s15 ; e0 f0 - punpckhbw xmm7, s15 ; e8 f8 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 - punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 - punpckhwd xmm6, xmm7 ; cc dc ec fc - - ; pull the third and fourth sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 - punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 - punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 - punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc - punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe - - ; save the calculations. we only have 15 registers ... - movdqa i0, xmm0 - movdqa i1, xmm7 - movdqa i2, xmm4 - movdqa i3, xmm3 - movdqa i4, xmm1 - movdqa i5, xmm8 - movdqa i6, xmm2 - movdqa i7, xmm5 - - ; 0-7 - movdqa xmm0, s0 - movdqa xmm1, xmm0 - punpcklbw xmm0, s1 ; 00 10 - punpckhbw xmm1, s1 ; 08 18 - - movdqa xmm2, s2 - movdqa xmm3, xmm2 - punpcklbw xmm2, s3 ; 20 30 - punpckhbw xmm3, s3 ; 28 38 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 00 10 20 30 - punpckhwd xmm4, xmm2 ; 04 14 24 34 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 08 18 28 38 - punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, s4 - movdqa xmm5, xmm3 - punpcklbw xmm3, s5 ; 40 50 - punpckhbw xmm5, s5 ; 48 58 - - movdqa xmm6, s6 - movdqa xmm7, xmm6 - punpcklbw xmm6, s7 ; 60 70 - punpckhbw xmm7, s7 ; 68 78 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; 40 50 60 70 - punpckhwd xmm8, xmm6 ; 44 54 64 74 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; 48 58 68 78 - punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c - - ; pull the first two sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 - punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 - punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 - punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c - punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e - ; final combination - - movdqa xmm6, xmm0 - punpcklqdq xmm0, i0 - punpckhqdq xmm6, i0 - - movdqa xmm9, xmm7 - punpcklqdq xmm7, i1 - punpckhqdq xmm9, i1 - - movdqa xmm10, xmm4 - punpcklqdq xmm4, i2 - punpckhqdq xmm10, i2 - - movdqa xmm11, xmm3 - punpcklqdq xmm3, i3 - punpckhqdq xmm11, i3 - - movdqa xmm12, xmm1 - punpcklqdq xmm1, i4 - punpckhqdq xmm12, i4 - - movdqa xmm13, xmm8 - punpcklqdq xmm8, i5 - punpckhqdq xmm13, i5 - - movdqa xmm14, xmm2 - punpcklqdq xmm2, i6 - punpckhqdq xmm14, i6 - - movdqa xmm15, xmm5 - punpcklqdq xmm5, i7 - punpckhqdq xmm15, i7 - - movdqa i0, xmm0 - movdqa i1, xmm6 - movdqa i2, xmm7 - movdqa i3, xmm9 - movdqa i4, xmm4 - movdqa i5, xmm10 - movdqa i6, xmm3 - movdqa i7, xmm11 - movdqa i8, xmm1 - movdqa i9, xmm12 - movdqa i10, xmm8 - movdqa i11, xmm13 - movdqa i12, xmm2 - movdqa i13, xmm14 - movdqa i14, xmm5 - movdqa i15, xmm15 - -; TRANSPOSED DATA AVAILABLE ON THE STACK - - movdqa xmm12, xmm6 - movdqa xmm13, xmm7 - - pxor zero, zero - -LF_FILTER_HEV_MASK xmm0, xmm12, xmm13, xmm9, xmm4, xmm10, xmm3, xmm11 - - movdqa xmm1, i2 - movdqa xmm2, i3 - movdqa xmm8, i4 - movdqa xmm9, i5 -LF_FILTER xmm1, xmm2, xmm8, xmm9, xmm0, xmm4 - movdqa i2, xmm1 - movdqa i3, xmm2 - -; second set - movdqa i4, xmm8 - movdqa i5, xmm9 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm2, i8 - movdqa xmm4, i9 - movdqa xmm10, i10 ; q2, will contain abs(p1-p0) - movdqa xmm11, i11 -LF_FILTER_HEV_MASK xmm8, xmm9, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm3 - - movdqa xmm0, i6 - movdqa xmm1, i7 - movdqa xmm3, i8 - movdqa xmm4, i9 -LF_FILTER xmm0, xmm1, xmm3, xmm4, xmm8, xmm2 - movdqa i6, xmm0 - movdqa i7, xmm1 - -; last set - movdqa i8, xmm3 - movdqa i9, xmm4 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm2, i12 - movdqa xmm8, i13 - movdqa xmm9, i14 ; q2, will contain abs(p1-p0) - movdqa xmm11, i15 -LF_FILTER_HEV_MASK xmm3, xmm4, xmm0, xmm1, xmm2, xmm8, xmm9, xmm11, xmm10 - - movdqa xmm0, i10 - movdqa xmm1, i11 - movdqa xmm4, i12 - movdqa xmm8, i13 -LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 - movdqa i10, xmm0 - movdqa i11, xmm1 - movdqa i12, xmm4 - movdqa i13, xmm8 - - -; RESHUFFLE AND WRITE OUT - ; 8-f - movdqa xmm0, i8 - movdqa xmm1, xmm0 - punpcklbw xmm0, i9 ; 80 90 - punpckhbw xmm1, i9 ; 88 98 - - movdqa xmm2, i10 - movdqa xmm3, xmm2 - punpcklbw xmm2, i11 ; a0 b0 - punpckhbw xmm3, i11 ; a8 b8 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 80 90 a0 b0 - punpckhwd xmm4, xmm2 ; 84 94 a4 b4 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 88 98 a8 b8 - punpckhwd xmm2, xmm3 ; 8c 9c ac bc - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, i12 - movdqa xmm5, xmm3 - punpcklbw xmm3, i13 ; c0 d0 - punpckhbw xmm5, i13 ; c8 d8 - - movdqa xmm6, i14 - movdqa xmm7, xmm6 - punpcklbw xmm6, i15 ; e0 f0 - punpckhbw xmm7, i15 ; e8 f8 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 - punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 - punpckhwd xmm6, xmm7 ; cc dc ec fc - - ; pull the third and fourth sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 - punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 - punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 - punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc - punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe - - ; save the calculations. we only have 15 registers ... - movdqa i8, xmm0 - movdqa i9, xmm7 - movdqa i10, xmm4 - movdqa i11, xmm3 - movdqa i12, xmm1 - movdqa i13, xmm8 - movdqa i14, xmm2 - movdqa i15, xmm5 - - ; 0-7 - movdqa xmm0, i0 - movdqa xmm1, xmm0 - punpcklbw xmm0, i1 ; 00 10 - punpckhbw xmm1, i1 ; 08 18 - - movdqa xmm2, i2 - movdqa xmm3, xmm2 - punpcklbw xmm2, i3 ; 20 30 - punpckhbw xmm3, i3 ; 28 38 - - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm2 ; 00 10 20 30 - punpckhwd xmm4, xmm2 ; 04 14 24 34 - - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm3 ; 08 18 28 38 - punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c - - ; using xmm[0124] - ; work on next 4 rows - - movdqa xmm3, i4 - movdqa xmm5, xmm3 - punpcklbw xmm3, i5 ; 40 50 - punpckhbw xmm5, i5 ; 48 58 - - movdqa xmm6, i6 - movdqa xmm7, xmm6 - punpcklbw xmm6, i7 ; 60 70 - punpckhbw xmm7, i7 ; 68 78 - - movdqa xmm8, xmm3 - punpcklwd xmm3, xmm6 ; 40 50 60 70 - punpckhwd xmm8, xmm6 ; 44 54 64 74 - - movdqa xmm6, xmm5 - punpcklwd xmm5, xmm7 ; 48 58 68 78 - punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c - - ; pull the first two sets together - - movdqa xmm7, xmm0 - punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 - punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 - - movdqa xmm3, xmm4 - punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 - punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 - - movdqa xmm8, xmm1 - punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 - punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a - - movdqa xmm5, xmm2 - punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c - punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e - ; final combination - - movdqa xmm6, xmm0 - punpcklqdq xmm0, i8 - punpckhqdq xmm6, i8 - - movdqa xmm9, xmm7 - punpcklqdq xmm7, i9 - punpckhqdq xmm9, i9 - - movdqa xmm10, xmm4 - punpcklqdq xmm4, i10 - punpckhqdq xmm10, i10 - - movdqa xmm11, xmm3 - punpcklqdq xmm3, i11 - punpckhqdq xmm11, i11 - - movdqa xmm12, xmm1 - punpcklqdq xmm1, i12 - punpckhqdq xmm12, i12 - - movdqa xmm13, xmm8 - punpcklqdq xmm8, i13 - punpckhqdq xmm13, i13 - - movdqa xmm14, xmm2 - punpcklqdq xmm2, i14 - punpckhqdq xmm14, i14 - - movdqa xmm15, xmm5 - punpcklqdq xmm5, i15 - punpckhqdq xmm15, i15 - - movdqa s0, xmm0 - movdqa s1, xmm6 - movdqa s2, xmm7 - movdqa s3, xmm9 - movdqa s4, xmm4 - movdqa s5, xmm10 - movdqa s6, xmm3 - movdqa s7, xmm11 - movdqa s8, xmm1 - movdqa s9, xmm12 - movdqa s10, xmm8 - movdqa s11, xmm13 - movdqa s12, xmm2 - movdqa s13, xmm14 - movdqa s14, xmm5 - movdqa s15, xmm15 - - ; free stack space - add rsp, stack_size - - ; un-ALIGN_STACK - pop rsp - -%if LIBVPX_YASM_WIN64 - pop r13 - pop r12 - RESTORE_XMM - pop rbp -%endif - - ret - -SECTION_RODATA -align 16 -te0: - times 16 db 0xe0 -align 16 -t7f: - times 16 db 0x7f -align 16 -tfe: - times 16 db 0xfe -align 16 -t1f: - times 16 db 0x1f -align 16 -t80: - times 16 db 0x80 -align 16 -t1: - times 16 db 0x01 -align 16 -t3: - times 16 db 0x03 -align 16 -t4: - times 16 db 0x04 diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm b/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm deleted file mode 100644 index 1913abc69b..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/loopfilter_sse2.asm +++ /dev/null @@ -1,1640 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -%define _t0 0 -%define _t1 _t0 + 16 -%define _p3 _t1 + 16 -%define _p2 _p3 + 16 -%define _p1 _p2 + 16 -%define _p0 _p1 + 16 -%define _q0 _p0 + 16 -%define _q1 _q0 + 16 -%define _q2 _q1 + 16 -%define _q3 _q2 + 16 -%define lf_var_size 160 - -; Use of pmaxub instead of psubusb to compute filter mask was seen -; in ffvp8 - -%macro LFH_FILTER_AND_HEV_MASK 1 -%if %1 - movdqa xmm2, [rdi+2*rax] ; q3 - movdqa xmm1, [rsi+2*rax] ; q2 - movdqa xmm4, [rsi+rax] ; q1 - movdqa xmm5, [rsi] ; q0 - neg rax ; negate pitch to deal with above border -%else - movlps xmm2, [rsi + rcx*2] ; q3 - movlps xmm1, [rsi + rcx] ; q2 - movlps xmm4, [rsi] ; q1 - movlps xmm5, [rsi + rax] ; q0 - - movhps xmm2, [rdi + rcx*2] - movhps xmm1, [rdi + rcx] - movhps xmm4, [rdi] - movhps xmm5, [rdi + rax] - - lea rsi, [rsi + rax*4] - lea rdi, [rdi + rax*4] - - movdqa [rsp+_q2], xmm1 ; store q2 - movdqa [rsp+_q1], xmm4 ; store q1 -%endif - movdqa xmm7, [rdx] ;limit - - movdqa xmm6, xmm1 ; q2 - movdqa xmm3, xmm4 ; q1 - - psubusb xmm1, xmm2 ; q2-=q3 - psubusb xmm2, xmm6 ; q3-=q2 - - psubusb xmm4, xmm6 ; q1-=q2 - psubusb xmm6, xmm3 ; q2-=q1 - - por xmm4, xmm6 ; abs(q2-q1) - por xmm1, xmm2 ; abs(q3-q2) - - movdqa xmm0, xmm5 ; q0 - pmaxub xmm1, xmm4 - - psubusb xmm5, xmm3 ; q0-=q1 - psubusb xmm3, xmm0 ; q1-=q0 - - por xmm5, xmm3 ; abs(q0-q1) - movdqa [rsp+_t0], xmm5 ; save to t0 - - pmaxub xmm1, xmm5 - -%if %1 - movdqa xmm2, [rsi+4*rax] ; p3 - movdqa xmm4, [rdi+4*rax] ; p2 - movdqa xmm6, [rsi+2*rax] ; p1 -%else - movlps xmm2, [rsi + rax] ; p3 - movlps xmm4, [rsi] ; p2 - movlps xmm6, [rsi + rcx] ; p1 - - movhps xmm2, [rdi + rax] - movhps xmm4, [rdi] - movhps xmm6, [rdi + rcx] - - movdqa [rsp+_p2], xmm4 ; store p2 - movdqa [rsp+_p1], xmm6 ; store p1 -%endif - - movdqa xmm5, xmm4 ; p2 - movdqa xmm3, xmm6 ; p1 - - psubusb xmm4, xmm2 ; p2-=p3 - psubusb xmm2, xmm5 ; p3-=p2 - - psubusb xmm3, xmm5 ; p1-=p2 - pmaxub xmm1, xmm4 ; abs(p3 - p2) - - psubusb xmm5, xmm6 ; p2-=p1 - pmaxub xmm1, xmm2 ; abs(p3 - p2) - - pmaxub xmm1, xmm5 ; abs(p2 - p1) - movdqa xmm2, xmm6 ; p1 - - pmaxub xmm1, xmm3 ; abs(p2 - p1) -%if %1 - movdqa xmm4, [rsi+rax] ; p0 - movdqa xmm3, [rdi] ; q1 -%else - movlps xmm4, [rsi + rcx*2] ; p0 - movhps xmm4, [rdi + rcx*2] - movdqa xmm3, [rsp+_q1] ; q1 -%endif - - movdqa xmm5, xmm4 ; p0 - psubusb xmm4, xmm6 ; p0-=p1 - - psubusb xmm6, xmm5 ; p1-=p0 - - por xmm6, xmm4 ; abs(p1 - p0) - mov rdx, arg(2) ; get blimit - - movdqa [rsp+_t1], xmm6 ; save to t1 - - movdqa xmm4, xmm3 ; q1 - pmaxub xmm1, xmm6 - - psubusb xmm3, xmm2 ; q1-=p1 - psubusb xmm2, xmm4 ; p1-=q1 - - psubusb xmm1, xmm7 - por xmm2, xmm3 ; abs(p1-q1) - - movdqa xmm7, [rdx] ; blimit - mov rdx, arg(4) ; hev get thresh - - movdqa xmm3, xmm0 ; q0 - pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - - movdqa xmm6, xmm5 ; p0 - psrlw xmm2, 1 ; abs(p1-q1)/2 - - psubusb xmm5, xmm3 ; p0-=q0 - psubusb xmm3, xmm6 ; q0-=p0 - por xmm5, xmm3 ; abs(p0 - q0) - - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - - movdqa xmm4, [rsp+_t0] ; hev get abs (q1 - q0) - movdqa xmm3, [rsp+_t1] ; get abs (p1 - p0) - - paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - movdqa xmm2, [rdx] ; hev - - psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - psubusb xmm4, xmm2 ; hev - - psubusb xmm3, xmm2 ; hev - por xmm1, xmm5 - - pxor xmm7, xmm7 - paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb xmm4, xmm5 ; hev - pcmpeqb xmm3, xmm3 ; hev - - pcmpeqb xmm1, xmm7 ; mask xmm1 - pxor xmm4, xmm3 ; hev -%endmacro - -%macro B_FILTER 1 - movdqa xmm3, [GLOBAL(t80)] -%if %1 == 0 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm7, [rsp+_q1] ; q1 -%elif %1 == 1 - movdqa xmm2, [rsi+2*rax] ; p1 - movdqa xmm7, [rdi] ; q1 -%elif %1 == 2 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm6, [rsp+_p0] ; p0 - movdqa xmm0, [rsp+_q0] ; q0 - movdqa xmm7, [rsp+_q1] ; q1 -%endif - - pxor xmm2, xmm3 ; p1 offset to convert to signed values - pxor xmm7, xmm3 ; q1 offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, xmm3 ; offset to convert to signed values - - pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) - pxor xmm0, xmm3 ; offset to convert to signed values - - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand xmm1, xmm2 ; mask filter values we don't care about - - movdqa xmm2, xmm1 - paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - - punpckhbw xmm5, xmm2 ; axbxcxdx - punpcklbw xmm2, xmm2 ; exfxgxhx - - punpcklbw xmm0, xmm1 ; exfxgxhx - psraw xmm5, 11 ; sign extended shift right by 3 - - punpckhbw xmm1, xmm1 ; axbxcxdx - psraw xmm2, 11 ; sign extended shift right by 3 - - packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - psraw xmm0, 11 ; sign extended shift right by 3 - - psraw xmm1, 11 ; sign extended shift right by 3 - movdqa xmm5, xmm0 ; save results - - packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - - paddsb xmm6, xmm2 ; p0+= p0 add - - movdqa xmm2, [GLOBAL(ones)] - paddsw xmm5, xmm2 - paddsw xmm1, xmm2 - psraw xmm5, 1 ; partial shifted one more time for 2nd tap - psraw xmm1, 1 ; partial shifted one more time for 2nd tap - packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - movdqa xmm2, [GLOBAL(t80)] - -%if %1 == 0 - movdqa xmm1, [rsp+_p1] ; p1 - lea rsi, [rsi + rcx*2] - lea rdi, [rdi + rcx*2] -%elif %1 == 1 - movdqa xmm1, [rsi+2*rax] ; p1 -%elif %1 == 2 - movdqa xmm1, [rsp+_p1] ; p1 -%endif - - pandn xmm4, xmm5 ; high edge variance additive - pxor xmm6, xmm2 ; unoffset - - pxor xmm1, xmm2 ; reoffset - psubsb xmm3, xmm0 ; q0-= q0 add - - paddsb xmm1, xmm4 ; p1+= p1 add - pxor xmm3, xmm2 ; unoffset - - pxor xmm1, xmm2 ; unoffset - psubsb xmm7, xmm4 ; q1-= q1 add - - pxor xmm7, xmm2 ; unoffset -%if %1 == 0 - movq [rsi], xmm6 ; p0 - movhps [rdi], xmm6 - movq [rsi + rax], xmm1 ; p1 - movhps [rdi + rax], xmm1 - movq [rsi + rcx], xmm3 ; q0 - movhps [rdi + rcx], xmm3 - movq [rsi + rcx*2], xmm7 ; q1 - movhps [rdi + rcx*2], xmm7 -%elif %1 == 1 - movdqa [rsi+rax], xmm6 ; write back - movdqa [rsi+2*rax], xmm1 ; write back - movdqa [rsi], xmm3 ; write back - movdqa [rdi], xmm7 ; write back -%endif - -%endmacro - -%if ABI_IS_32BIT - -;void vp8_loop_filter_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE -sym(vp8_loop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step - - mov rdx, arg(3) ;limit - - lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 1 - ; filter and write back the result - B_FILTER 1 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -%endif - -;void vp8_loop_filter_horizontal_edge_uv_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE -sym(vp8_loop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u - mov rdi, arg(5) ; v - movsxd rax, dword ptr arg(1) ; src_pixel_step - mov rcx, rax - neg rax ; negate pitch to deal with above border - - mov rdx, arg(3) ;limit - - lea rsi, [rsi + rcx] - lea rdi, [rdi + rcx] - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 0 - ; filter and write back the result - B_FILTER 0 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -%macro MB_FILTER_AND_WRITEBACK 1 - movdqa xmm3, [GLOBAL(t80)] -%if %1 == 0 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm7, [rsp+_q1] ; q1 -%elif %1 == 1 - movdqa xmm2, [rsi+2*rax] ; p1 - movdqa xmm7, [rdi] ; q1 - - mov rcx, rax - neg rcx -%elif %1 == 2 - movdqa xmm2, [rsp+_p1] ; p1 - movdqa xmm6, [rsp+_p0] ; p0 - movdqa xmm0, [rsp+_q0] ; q0 - movdqa xmm7, [rsp+_q1] ; q1 -%endif - - pxor xmm2, xmm3 ; p1 offset to convert to signed values - pxor xmm7, xmm3 ; q1 offset to convert to signed values - pxor xmm6, xmm3 ; offset to convert to signed values - pxor xmm0, xmm3 ; offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb xmm2, xmm0 ; 2 * (q0 - p0) - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) - pand xmm1, xmm2 ; mask filter values we don't care about - - movdqa xmm2, xmm1 ; vp8_filter - - pand xmm2, xmm4 ; Filter2 = vp8_filter & hev - pxor xmm0, xmm0 - - pandn xmm4, xmm1 ; vp8_filter&=~hev - pxor xmm1, xmm1 - - punpcklbw xmm0, xmm4 ; Filter 2 (hi) - punpckhbw xmm1, xmm4 ; Filter 2 (lo) - - movdqa xmm5, xmm2 - - movdqa xmm4, [GLOBAL(s9)] - paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) - paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - - pmulhw xmm1, xmm4 ; Filter 2 (lo) * 9 - pmulhw xmm0, xmm4 ; Filter 2 (hi) * 9 - - punpckhbw xmm7, xmm5 ; axbxcxdx - punpcklbw xmm5, xmm5 ; exfxgxhx - - psraw xmm7, 11 ; sign extended shift right by 3 - - psraw xmm5, 11 ; sign extended shift right by 3 - punpckhbw xmm4, xmm2 ; axbxcxdx - - punpcklbw xmm2, xmm2 ; exfxgxhx - psraw xmm4, 11 ; sign extended shift right by 3 - - packsswb xmm5, xmm7 ; Filter2 >>=3; - psraw xmm2, 11 ; sign extended shift right by 3 - - packsswb xmm2, xmm4 ; Filter1 >>=3; - - paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 - - psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1 - movdqa xmm7, xmm1 - - movdqa xmm4, [GLOBAL(s63)] - movdqa xmm5, xmm0 - movdqa xmm2, xmm5 - paddw xmm0, xmm4 ; Filter 2 (hi) * 9 + 63 - paddw xmm1, xmm4 ; Filter 2 (lo) * 9 + 63 - movdqa xmm4, xmm7 - - paddw xmm5, xmm5 ; Filter 2 (hi) * 18 - - paddw xmm7, xmm7 ; Filter 2 (lo) * 18 - paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63 - - paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63 - paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63 - psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7 - - paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63 - psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7 - psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7 - - packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - - psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7 - psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7 - psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7 - - packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - movdqa xmm7, [GLOBAL(t80)] - -%if %1 == 0 - movdqa xmm1, [rsp+_q1] ; q1 - movdqa xmm4, [rsp+_p1] ; p1 - lea rsi, [rsi+rcx*2] - lea rdi, [rdi+rcx*2] - -%elif %1 == 1 - movdqa xmm1, [rdi] ; q1 - movdqa xmm4, [rsi+rax*2] ; p1 -%elif %1 == 2 - movdqa xmm4, [rsp+_p1] ; p1 - movdqa xmm1, [rsp+_q1] ; q1 -%endif - - pxor xmm1, xmm7 - pxor xmm4, xmm7 - - psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3) - paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3) - psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) - paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) - -%if %1 == 1 - movdqa xmm2, [rdi+rax*4] ; p2 - movdqa xmm5, [rdi+rcx] ; q2 -%else - movdqa xmm2, [rsp+_p2] ; p2 - movdqa xmm5, [rsp+_q2] ; q2 -%endif - - pxor xmm1, xmm7 ; *oq1 = sq^0x80; - pxor xmm4, xmm7 ; *op1 = sp^0x80; - pxor xmm2, xmm7 - pxor xmm5, xmm7 - paddsb xmm2, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) - psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) - pxor xmm2, xmm7 ; *op2 = sp^0x80; - pxor xmm5, xmm7 ; *oq2 = sq^0x80; - pxor xmm3, xmm7 ; *oq0 = sq^0x80 - pxor xmm6, xmm7 ; *oq0 = sp^0x80 -%if %1 == 0 - movq [rsi], xmm6 ; p0 - movhps [rdi], xmm6 - movq [rsi + rcx], xmm3 ; q0 - movhps [rdi + rcx], xmm3 - lea rdx, [rcx + rcx*2] - movq [rsi+rcx*2], xmm1 ; q1 - movhps [rdi+rcx*2], xmm1 - - movq [rsi + rax], xmm4 ; p1 - movhps [rdi + rax], xmm4 - - movq [rsi+rax*2], xmm2 ; p2 - movhps [rdi+rax*2], xmm2 - - movq [rsi+rdx], xmm5 ; q2 - movhps [rdi+rdx], xmm5 -%elif %1 == 1 - movdqa [rdi+rcx], xmm5 ; q2 - movdqa [rdi], xmm1 ; q1 - movdqa [rsi], xmm3 ; q0 - movdqa [rsi+rax ], xmm6 ; p0 - movdqa [rsi+rax*2], xmm4 ; p1 - movdqa [rdi+rax*4], xmm2 ; p2 -%elif %1 == 2 - movdqa [rsp+_p1], xmm4 ; p1 - movdqa [rsp+_p0], xmm6 ; p0 - movdqa [rsp+_q0], xmm3 ; q0 - movdqa [rsp+_q1], xmm1 ; q1 -%endif - -%endmacro - - -;void vp8_mbloop_filter_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE -sym(vp8_mbloop_filter_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step - mov rdx, arg(3) ;limit - - lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 1 - ; filter and write back the results - MB_FILTER_AND_WRITEBACK 1 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_horizontal_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE -sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u - mov rdi, arg(5) ; v - movsxd rax, dword ptr arg(1) ; src_pixel_step - mov rcx, rax - neg rax ; negate pitch to deal with above border - mov rdx, arg(3) ;limit - - lea rsi, [rsi + rcx] - lea rdi, [rdi + rcx] - - ; calculate breakout conditions and high edge variance - LFH_FILTER_AND_HEV_MASK 0 - ; filter and write back the results - MB_FILTER_AND_WRITEBACK 0 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -%macro TRANSPOSE_16X8 2 - movq xmm4, [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 - movq xmm1, [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 - movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20 - movq xmm7, [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30 - movq xmm5, [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40 - movq xmm2, [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50 - - punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - - movq xmm1, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70 - - movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20 - - movq xmm7, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60 - - punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 -%if %1 - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] -%else - mov rsi, arg(5) ; v_ptr -%endif - - movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 - punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 - punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 - punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - -%if %1 == 0 - lea rdi, [rsi + rax - 4] ; rdi points to row +1 for indirect addressing - lea rsi, [rsi - 4] -%endif - - movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - - movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - - punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - - punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - - movdqa [rsp+_t0], xmm2 ; save to free XMM2 - - movq xmm2, [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80 - movq xmm6, [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90 - movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0 - movq xmm5, [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0 - movq xmm1, [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0 - - punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - movq xmm6, [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0 - - punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 - - movq xmm5, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0 - - punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0 - - movq xmm6, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0 - - punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 - - movdqa xmm6, xmm1 ; - punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 - - punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - - punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - movdqa xmm0, xmm5 - punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - - punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 - - punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 - movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - - punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - - punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 - -%if %2 == 0 - movdqa [rsp+_q3], xmm7 ; save 7 - movdqa [rsp+_q2], xmm6 ; save 6 -%endif - movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - movdqa [rsp+_p1], xmm2 ; save 2 - - movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - movdqa [rsp+_p0], xmm3 ; save 3 - - punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - - movdqa [rsp+_q0], xmm4 ; save 4 - movdqa [rsp+_q1], xmm5 ; save 5 - movdqa xmm1, [rsp+_t0] - - movdqa xmm2, xmm1 ; - punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - -%if %2 == 0 - movdqa [rsp+_p2], xmm1 - movdqa [rsp+_p3], xmm2 -%endif - -%endmacro - -%macro LFV_FILTER_MASK_HEV_MASK 0 - movdqa xmm0, xmm6 ; q2 - psubusb xmm0, xmm7 ; q2-q3 - - psubusb xmm7, xmm6 ; q3-q2 - movdqa xmm4, xmm5 ; q1 - - por xmm7, xmm0 ; abs (q3-q2) - psubusb xmm4, xmm6 ; q1-q2 - - movdqa xmm0, xmm1 - psubusb xmm6, xmm5 ; q2-q1 - - por xmm6, xmm4 ; abs (q2-q1) - psubusb xmm0, xmm2 ; p2 - p3; - - psubusb xmm2, xmm1 ; p3 - p2; - por xmm0, xmm2 ; abs(p2-p3) - - movdqa xmm5, [rsp+_p1] ; p1 - pmaxub xmm0, xmm7 - - movdqa xmm2, xmm5 ; p1 - psubusb xmm5, xmm1 ; p1-p2 - psubusb xmm1, xmm2 ; p2-p1 - - movdqa xmm7, xmm3 ; p0 - psubusb xmm7, xmm2 ; p0-p1 - - por xmm1, xmm5 ; abs(p2-p1) - pmaxub xmm0, xmm6 - - pmaxub xmm0, xmm1 - movdqa xmm1, xmm2 ; p1 - - psubusb xmm2, xmm3 ; p1-p0 - - por xmm2, xmm7 ; abs(p1-p0) - - pmaxub xmm0, xmm2 - - movdqa xmm5, [rsp+_q0] ; q0 - movdqa xmm7, [rsp+_q1] ; q1 - - mov rdx, arg(3) ; limit - - movdqa xmm6, xmm5 ; q0 - movdqa xmm4, xmm7 ; q1 - - psubusb xmm5, xmm7 ; q0-q1 - psubusb xmm7, xmm6 ; q1-q0 - - por xmm7, xmm5 ; abs(q1-q0) - - pmaxub xmm0, xmm7 - - psubusb xmm0, [rdx] ; limit - - mov rdx, arg(2) ; blimit - movdqa xmm5, xmm4 ; q1 - - psubusb xmm5, xmm1 ; q1-=p1 - psubusb xmm1, xmm4 ; p1-=q1 - - por xmm5, xmm1 ; abs(p1-q1) - movdqa xmm1, xmm3 ; p0 - - pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psubusb xmm1, xmm6 ; p0-q0 - - movdqa xmm4, [rdx] ; blimit - mov rdx, arg(4) ; get thresh - - psrlw xmm5, 1 ; abs(p1-q1)/2 - psubusb xmm6, xmm3 ; q0-p0 - - por xmm1, xmm6 ; abs(q0-p0) - paddusb xmm1, xmm1 ; abs(q0-p0)*2 - movdqa xmm3, [rdx] - - paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm2, xmm3 ; abs(q1 - q0) > thresh - - psubusb xmm7, xmm3 ; abs(p1 - p0)> thresh - - psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por xmm2, xmm7 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - por xmm1, xmm0 ; mask - pcmpeqb xmm2, xmm0 - - pxor xmm0, xmm0 - pcmpeqb xmm4, xmm4 - - pcmpeqb xmm1, xmm0 - pxor xmm4, xmm2 -%endmacro - -%macro BV_TRANSPOSE 0 - ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - - movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - - punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - - movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 - - punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 - movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 - - punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 - ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 - ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 - ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 - ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 -%endmacro - -%macro BV_WRITEBACK 2 - movd [rsi+2], %1 - movd [rsi+4*rax+2], %2 - psrldq %1, 4 - psrldq %2, 4 - movd [rdi+2], %1 - movd [rdi+4*rax+2], %2 - psrldq %1, 4 - psrldq %2, 4 - movd [rsi+2*rax+2], %1 - movd [rsi+2*rcx+2], %2 - psrldq %1, 4 - psrldq %2, 4 - movd [rdi+2*rax+2], %1 - movd [rdi+2*rcx+2], %2 -%endmacro - -%if ABI_IS_32BIT - -;void vp8_loop_filter_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE -sym(vp8_loop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; src_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax*2+rax] - - ;transpose 16x8 to 8x16, and store the 8-line result on stack. - TRANSPOSE_16X8 1, 1 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - ; start work on filters - B_FILTER 2 - - ; transpose and write back - only work on q1, q0, p0, p1 - BV_TRANSPOSE - ; store 16-line result - - lea rdx, [rax] - neg rdx - - BV_WRITEBACK xmm1, xmm5 - - lea rsi, [rsi+rdx*8] - lea rdi, [rdi+rdx*8] - BV_WRITEBACK xmm2, xmm6 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -%endif - -;void vp8_loop_filter_vertical_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE -sym(vp8_loop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax+2*rax] - - ;transpose 16x8 to 8x16, and store the 8-line result on stack. - TRANSPOSE_16X8 0, 1 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - ; start work on filters - B_FILTER 2 - - ; transpose and write back - only work on q1, q0, p0, p1 - BV_TRANSPOSE - - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - - ; store 16-line result - BV_WRITEBACK xmm1, xmm5 - - mov rsi, arg(0) ; u_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - BV_WRITEBACK xmm2, xmm6 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -%macro MBV_TRANSPOSE 0 - movdqa xmm0, [rsp+_p3] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - - punpcklbw xmm0, xmm2 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm1, xmm2 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - movdqa xmm7, [rsp+_p1] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - movdqa xmm6, xmm7 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - - punpcklbw xmm7, [rsp+_p0] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm6, [rsp+_p0] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpcklwd xmm0, xmm7 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckhwd xmm3, xmm7 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpcklbw xmm7, [rsp+_q1] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - - movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - punpcklbw xmm6, [rsp+_q3] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 - - movdqa xmm2, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - punpcklwd xmm7, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 - - punpckhwd xmm2, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 - movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckldq xmm0, xmm7 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 - punpckhdq xmm6, xmm7 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 -%endmacro - -%macro MBV_WRITEBACK_1 0 - movq [rsi], xmm0 - movhps [rdi], xmm0 - - movq [rsi+2*rax], xmm6 - movhps [rdi+2*rax], xmm6 - - movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - punpckldq xmm0, xmm2 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 - punpckhdq xmm3, xmm2 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 - - movq [rsi+4*rax], xmm0 - movhps [rdi+4*rax], xmm0 - - movq [rsi+2*rcx], xmm3 - movhps [rdi+2*rcx], xmm3 - - movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm7, [rsp+_q1] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - punpckhbw xmm5, [rsp+_q3] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 - - movdqa xmm0, xmm7 - punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 - punpckhwd xmm7, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 - - movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 - punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 -%endmacro - -%macro MBV_WRITEBACK_2 0 - movq [rsi], xmm1 - movhps [rdi], xmm1 - - movq [rsi+2*rax], xmm5 - movhps [rdi+2*rax], xmm5 - - movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - punpckldq xmm1, xmm7 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 - punpckhdq xmm4, xmm7 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 - - movq [rsi+4*rax], xmm1 - movhps [rdi+4*rax], xmm1 - - movq [rsi+2*rcx], xmm4 - movhps [rdi+2*rcx], xmm4 -%endmacro - - -;void vp8_mbloop_filter_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -;) -global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE -sym(vp8_mbloop_filter_vertical_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; src_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax*2+rax] - - ; Transpose - TRANSPOSE_16X8 1, 0 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - neg rax - ; start work on filters - MB_FILTER_AND_WRITEBACK 2 - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - - ; transpose and write back - MBV_TRANSPOSE - - neg rax - - MBV_WRITEBACK_1 - - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - MBV_WRITEBACK_2 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_vertical_edge_uv_sse2 -;( -; unsigned char *u, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; unsigned char *v -;) -global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE -sym(vp8_mbloop_filter_vertical_edge_uv_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, lf_var_size - - mov rsi, arg(0) ; u_ptr - movsxd rax, dword ptr arg(1) ; src_pixel_step - - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - lea rcx, [rax+2*rax] - - ; Transpose - TRANSPOSE_16X8 0, 0 - - ; calculate filter mask and high edge variance - LFV_FILTER_MASK_HEV_MASK - - ; start work on filters - MB_FILTER_AND_WRITEBACK 2 - - ; transpose and write back - MBV_TRANSPOSE - - mov rsi, arg(0) ;u_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] - MBV_WRITEBACK_1 - mov rsi, arg(5) ;v_ptr - lea rsi, [rsi - 4] - lea rdi, [rsi + rax] - MBV_WRITEBACK_2 - - add rsp, lf_var_size - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_horizontal_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -;) -global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE -sym(vp8_loop_filter_simple_horizontal_edge_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx - ; end prolog - - mov rcx, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - movdqa xmm6, [GLOBAL(tfe)] - lea rdx, [rcx + rax] - neg rax - - ; calculate mask - movdqa xmm0, [rdx] ; q1 - mov rdx, arg(2) ;blimit - movdqa xmm1, [rcx+2*rax] ; p1 - - movdqa xmm2, xmm1 - movdqa xmm3, xmm0 - - psubusb xmm0, xmm1 ; q1-=p1 - psubusb xmm1, xmm3 ; p1-=q1 - por xmm1, xmm0 ; abs(p1-q1) - pand xmm1, xmm6 ; set lsb of each byte to zero - psrlw xmm1, 1 ; abs(p1-q1)/2 - - movdqa xmm7, XMMWORD PTR [rdx] - - movdqa xmm5, [rcx+rax] ; p0 - movdqa xmm4, [rcx] ; q0 - movdqa xmm0, xmm4 ; q0 - movdqa xmm6, xmm5 ; p0 - psubusb xmm5, xmm4 ; p0-=q0 - psubusb xmm4, xmm6 ; q0-=p0 - por xmm5, xmm4 ; abs(p0 - q0) - - movdqa xmm4, [GLOBAL(t80)] - - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor xmm7, xmm7 - pcmpeqb xmm5, xmm7 - - - ; start work on filters - pxor xmm2, xmm4 ; p1 offset to convert to signed values - pxor xmm3, xmm4 ; q1 offset to convert to signed values - psubsb xmm2, xmm3 ; p1 - q1 - - pxor xmm6, xmm4 ; offset to convert to signed values - pxor xmm0, xmm4 ; offset to convert to signed values - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) - paddsb xmm2, xmm0 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) - pand xmm5, xmm2 ; mask filter values we don't care about - - movdqa xmm0, xmm5 - paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 - paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 - - movdqa xmm1, [GLOBAL(te0)] - movdqa xmm2, [GLOBAL(t1f)] - -; pxor xmm7, xmm7 - pcmpgtb xmm7, xmm0 ;save sign - pand xmm7, xmm1 ;preserve the upper 3 bits - psrlw xmm0, 3 - pand xmm0, xmm2 ;clear out upper 3 bits - por xmm0, xmm7 ;add sign - psubsb xmm3, xmm0 ; q0-= q0sz add - - pxor xmm7, xmm7 - pcmpgtb xmm7, xmm5 ;save sign - pand xmm7, xmm1 ;preserve the upper 3 bits - psrlw xmm5, 3 - pand xmm5, xmm2 ;clear out upper 3 bits - por xmm5, xmm7 ;add sign - paddsb xmm6, xmm5 ; p0+= p0 add - - pxor xmm3, xmm4 ; unoffset - movdqa [rcx], xmm3 ; write back - - pxor xmm6, xmm4 ; unoffset - movdqa [rcx+rax], xmm6 ; write back - - ; begin epilog - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_vertical_edge_sse2 -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -;) -global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE -sym(vp8_loop_filter_simple_vertical_edge_sse2): - push rbp ; save old base pointer value. - mov rbp, rsp ; set new base pointer value. - SHADOW_ARGS_TO_STACK 3 - SAVE_XMM 7 - GET_GOT rbx ; save callee-saved reg - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi - 2 ] - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd xmm0, [rsi] ; (high 96 bits unused) 03 02 01 00 - movd xmm1, [rdx] ; (high 96 bits unused) 43 42 41 40 - movd xmm2, [rdi] ; 13 12 11 10 - movd xmm3, [rcx] ; 53 52 51 50 - punpckldq xmm0, xmm1 ; (high 64 bits unused) 43 42 41 40 03 02 01 00 - punpckldq xmm2, xmm3 ; 53 52 51 50 13 12 11 10 - - movd xmm4, [rsi + rax*2] ; 23 22 21 20 - movd xmm5, [rdx + rax*2] ; 63 62 61 60 - movd xmm6, [rdi + rax*2] ; 33 32 31 30 - movd xmm7, [rcx + rax*2] ; 73 72 71 70 - punpckldq xmm4, xmm5 ; 63 62 61 60 23 22 21 20 - punpckldq xmm6, xmm7 ; 73 72 71 70 33 32 31 30 - - punpcklbw xmm0, xmm2 ; 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 - punpcklbw xmm4, xmm6 ; 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 - - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm4 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm4 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - - movdqa xmm2, xmm0 - punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - lea rsi, [rsi + rax*8] - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd xmm4, [rsi] ; 83 82 81 80 - movd xmm1, [rdx] ; c3 c2 c1 c0 - movd xmm6, [rdi] ; 93 92 91 90 - movd xmm3, [rcx] ; d3 d2 d1 d0 - punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 - punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 - - movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0 - movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 - movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0 - movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 - punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 - punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 - - punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 - punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 - - movdqa xmm7, xmm4 - punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - - movdqa xmm6, xmm4 - punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - - punpcklqdq xmm0, xmm4 ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - punpckhqdq xmm1, xmm4 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - - mov rdx, arg(2) ;blimit - - ; calculate mask - movdqa xmm6, xmm0 ; p1 - movdqa xmm7, xmm3 ; q1 - psubusb xmm7, xmm0 ; q1-=p1 - psubusb xmm6, xmm3 ; p1-=q1 - por xmm6, xmm7 ; abs(p1-q1) - pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw xmm6, 1 ; abs(p1-q1)/2 - - movdqa xmm7, [rdx] - - movdqa xmm5, xmm1 ; p0 - movdqa xmm4, xmm2 ; q0 - psubusb xmm5, xmm2 ; p0-=q0 - psubusb xmm4, xmm1 ; q0-=p0 - por xmm5, xmm4 ; abs(p0 - q0) - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - movdqa xmm4, [GLOBAL(t80)] - - psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor xmm7, xmm7 - pcmpeqb xmm5, xmm7 ; mm5 = mask - - ; start work on filters - movdqa t0, xmm0 - movdqa t1, xmm3 - - pxor xmm0, xmm4 ; p1 offset to convert to signed values - pxor xmm3, xmm4 ; q1 offset to convert to signed values - psubsb xmm0, xmm3 ; p1 - q1 - - pxor xmm1, xmm4 ; offset to convert to signed values - pxor xmm2, xmm4 ; offset to convert to signed values - - movdqa xmm3, xmm2 ; offseted ; q0 - psubsb xmm2, xmm1 ; q0 - p0 - paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0) - paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0) - paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0) - pand xmm5, xmm0 ; mask filter values we don't care about - - movdqa xmm0, xmm5 - paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 - paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 - - movdqa xmm6, [GLOBAL(te0)] - movdqa xmm2, [GLOBAL(t1f)] - -; pxor xmm7, xmm7 - pcmpgtb xmm7, xmm0 ;save sign - pand xmm7, xmm6 ;preserve the upper 3 bits - psrlw xmm0, 3 - pand xmm0, xmm2 ;clear out upper 3 bits - por xmm0, xmm7 ;add sign - psubsb xmm3, xmm0 ; q0-= q0sz add - - pxor xmm7, xmm7 - pcmpgtb xmm7, xmm5 ;save sign - pand xmm7, xmm6 ;preserve the upper 3 bits - psrlw xmm5, 3 - pand xmm5, xmm2 ;clear out upper 3 bits - por xmm5, xmm7 ;add sign - paddsb xmm1, xmm5 ; p0+= p0 add - - pxor xmm3, xmm4 ; unoffset q0 - pxor xmm1, xmm4 ; unoffset p0 - - movdqa xmm0, t0 ; p1 - movdqa xmm4, t1 ; q1 - - ; write out order: xmm0 xmm2 xmm1 xmm3 - lea rdx, [rsi + rax*4] - - ; transpose back to write out - ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - movdqa xmm6, xmm0 - punpcklbw xmm0, xmm1 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm6, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - movdqa xmm5, xmm3 - punpcklbw xmm3, xmm4 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm5, xmm4 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm3 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - punpckhwd xmm2, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - - movdqa xmm3, xmm6 - punpcklwd xmm6, xmm5 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm3, xmm5 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - movd [rsi], xmm6 ; write the second 8-line result - movd [rdx], xmm3 - psrldq xmm6, 4 - psrldq xmm3, 4 - movd [rdi], xmm6 - movd [rcx], xmm3 - psrldq xmm6, 4 - psrldq xmm3, 4 - movd [rsi + rax*2], xmm6 - movd [rdx + rax*2], xmm3 - psrldq xmm6, 4 - psrldq xmm3, 4 - movd [rdi + rax*2], xmm6 - movd [rcx + rax*2], xmm3 - - neg rax - lea rsi, [rsi + rax*8] - neg rax - lea rdi, [rsi + rax] - lea rdx, [rsi + rax*4] - lea rcx, [rdx + rax] - - movd [rsi], xmm0 ; write the first 8-line result - movd [rdx], xmm2 - psrldq xmm0, 4 - psrldq xmm2, 4 - movd [rdi], xmm0 - movd [rcx], xmm2 - psrldq xmm0, 4 - psrldq xmm2, 4 - movd [rsi + rax*2], xmm0 - movd [rdx + rax*2], xmm2 - psrldq xmm0, 4 - psrldq xmm2, 4 - movd [rdi + rax*2], xmm0 - movd [rcx + rax*2], xmm2 - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -tfe: - times 16 db 0xfe -align 16 -t80: - times 16 db 0x80 -align 16 -t1s: - times 16 db 0x01 -align 16 -t3: - times 16 db 0x03 -align 16 -t4: - times 16 db 0x04 -align 16 -ones: - times 8 dw 0x0001 -align 16 -s9: - times 8 dw 0x0900 -align 16 -s63: - times 8 dw 0x003f -align 16 -te0: - times 16 db 0xe0 -align 16 -t1f: - times 16 db 0x1f diff --git a/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c b/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c deleted file mode 100644 index 6586004600..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/loopfilter_x86.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8/common/loopfilter.h" - -#define prototype_loopfilter(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ - const unsigned char *limit, const unsigned char *thresh, int count) - -#define prototype_loopfilter_nc(sym) \ - void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ - const unsigned char *limit, const unsigned char *thresh) - -#define prototype_simple_loopfilter(sym) \ - void sym(unsigned char *y, int ystride, const unsigned char *blimit) - -prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx); -prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx); -prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx); -prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx); -prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx); -prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx); - -#if HAVE_SSE2 && ARCH_X86_64 -prototype_loopfilter(vp8_loop_filter_bv_y_sse2); -prototype_loopfilter(vp8_loop_filter_bh_y_sse2); -#else -prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2); -prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2); -#endif -prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2); -prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2); - -extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; -extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; -extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2; -extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; - -#if HAVE_MMX -/* Horizontal MB filtering */ -void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); -} - - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - - -void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit); -} - - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); - - if (u_ptr) - vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); -} - - -void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit); -} -#endif - - -/* Horizontal MB filtering */ -#if HAVE_SSE2 -void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); -} - - -/* Vertical MB Filtering */ -void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); -} - - -/* Horizontal B Filtering */ -void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ -#if ARCH_X86_64 - vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); -#else - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); -#endif - - if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride); -} - - -void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit); - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit); -} - - -/* Vertical B Filtering */ -void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ -#if ARCH_X86_64 - vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); -#else - vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); - vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); -#endif - - if (u_ptr) - vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4); -} - - -void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) -{ - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit); -} - -#endif diff --git a/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm b/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm deleted file mode 100644 index 15e98713c7..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/recon_mmx.asm +++ /dev/null @@ -1,274 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void copy_mem8x8_mmx( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem8x8_mmx) PRIVATE -sym(vp8_copy_mem8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movq mm0, [rsi] - - movsxd rax, dword ptr arg(1) ;src_stride; - mov rdi, arg(2) ;dst; - - movq mm1, [rsi+rax] - movq mm2, [rsi+rax*2] - - movsxd rcx, dword ptr arg(3) ;dst_stride - lea rsi, [rsi+rax*2] - - movq [rdi], mm0 - add rsi, rax - - movq [rdi+rcx], mm1 - movq [rdi+rcx*2], mm2 - - - lea rdi, [rdi+rcx*2] - movq mm3, [rsi] - - add rdi, rcx - movq mm4, [rsi+rax] - - movq mm5, [rsi+rax*2] - movq [rdi], mm3 - - lea rsi, [rsi+rax*2] - movq [rdi+rcx], mm4 - - movq [rdi+rcx*2], mm5 - lea rdi, [rdi+rcx*2] - - movq mm0, [rsi+rax] - movq mm1, [rsi+rax*2] - - movq [rdi+rcx], mm0 - movq [rdi+rcx*2],mm1 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void copy_mem8x4_mmx( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem8x4_mmx) PRIVATE -sym(vp8_copy_mem8x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movq mm0, [rsi] - - movsxd rax, dword ptr arg(1) ;src_stride; - mov rdi, arg(2) ;dst; - - movq mm1, [rsi+rax] - movq mm2, [rsi+rax*2] - - movsxd rcx, dword ptr arg(3) ;dst_stride - lea rsi, [rsi+rax*2] - - movq [rdi], mm0 - movq [rdi+rcx], mm1 - - movq [rdi+rcx*2], mm2 - lea rdi, [rdi+rcx*2] - - movq mm3, [rsi+rax] - movq [rdi+rcx], mm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void copy_mem16x16_mmx( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem16x16_mmx) PRIVATE -sym(vp8_copy_mem16x16_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movsxd rax, dword ptr arg(1) ;src_stride; - - mov rdi, arg(2) ;dst; - movsxd rcx, dword ptr arg(3) ;dst_stride - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq mm1, [rsi+rax] - movq mm4, [rsi+rax+8] - - movq mm2, [rsi+rax*2] - movq mm5, [rsi+rax*2+8] - - lea rsi, [rsi+rax*2] - add rsi, rax - - movq [rdi], mm0 - movq [rdi+8], mm3 - - movq [rdi+rcx], mm1 - movq [rdi+rcx+8], mm4 - - movq [rdi+rcx*2], mm2 - movq [rdi+rcx*2+8], mm5 - - lea rdi, [rdi+rcx*2] - add rdi, rcx - - movq mm0, [rsi] - movq mm3, [rsi+8]; - - movq [rdi], mm0 - movq [rdi+8], mm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm b/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm deleted file mode 100644 index cb89537f76..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/recon_sse2.asm +++ /dev/null @@ -1,116 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;void copy_mem16x16_sse2( -; unsigned char *src, -; int src_stride, -; unsigned char *dst, -; int dst_stride -; ) -global sym(vp8_copy_mem16x16_sse2) PRIVATE -sym(vp8_copy_mem16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src; - movdqu xmm0, [rsi] - - movsxd rax, dword ptr arg(1) ;src_stride; - mov rdi, arg(2) ;dst; - - movdqu xmm1, [rsi+rax] - movdqu xmm2, [rsi+rax*2] - - movsxd rcx, dword ptr arg(3) ;dst_stride - lea rsi, [rsi+rax*2] - - movdqa [rdi], xmm0 - add rsi, rax - - movdqa [rdi+rcx], xmm1 - movdqa [rdi+rcx*2],xmm2 - - lea rdi, [rdi+rcx*2] - movdqu xmm3, [rsi] - - add rdi, rcx - movdqu xmm4, [rsi+rax] - - movdqu xmm5, [rsi+rax*2] - lea rsi, [rsi+rax*2] - - movdqa [rdi], xmm3 - add rsi, rax - - movdqa [rdi+rcx], xmm4 - movdqa [rdi+rcx*2],xmm5 - - lea rdi, [rdi+rcx*2] - movdqu xmm0, [rsi] - - add rdi, rcx - movdqu xmm1, [rsi+rax] - - movdqu xmm2, [rsi+rax*2] - lea rsi, [rsi+rax*2] - - movdqa [rdi], xmm0 - add rsi, rax - - movdqa [rdi+rcx], xmm1 - - movdqa [rdi+rcx*2], xmm2 - movdqu xmm3, [rsi] - - movdqu xmm4, [rsi+rax] - lea rdi, [rdi+rcx*2] - - add rdi, rcx - movdqu xmm5, [rsi+rax*2] - - lea rsi, [rsi+rax*2] - movdqa [rdi], xmm3 - - add rsi, rax - movdqa [rdi+rcx], xmm4 - - movdqa [rdi+rcx*2],xmm5 - movdqu xmm0, [rsi] - - lea rdi, [rdi+rcx*2] - movdqu xmm1, [rsi+rax] - - add rdi, rcx - movdqu xmm2, [rsi+rax*2] - - lea rsi, [rsi+rax*2] - movdqa [rdi], xmm0 - - movdqa [rdi+rcx], xmm1 - movdqa [rdi+rcx*2],xmm2 - - movdqu xmm3, [rsi+rax] - lea rdi, [rdi+rcx*2] - - movdqa [rdi+rcx], xmm3 - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm deleted file mode 100644 index 47dd452297..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/subpixel_mmx.asm +++ /dev/null @@ -1,702 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -extern sym(vp8_bilinear_filters_x86_8) - - -%define BLOCK_HEIGHT_WIDTH 4 -%define vp8_filter_weight 128 -%define VP8_FILTER_SHIFT 7 - - -;void vp8_filter_block1d_h6_mmx -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short * vp8_filter -;) -global sym(vp8_filter_block1d_h6_mmx) PRIVATE -sym(vp8_filter_block1d_h6_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(6) ;vp8_filter - - movq mm1, [rdx + 16] ; do both the negative taps first!!! - movq mm2, [rdx + 32] ; - movq mm6, [rdx + 48] ; - movq mm7, [rdx + 64] ; - - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(5) ;output_width ; destination pitch? - pxor mm0, mm0 ; mm0 = 00000000 - -.nextrow: - movq mm3, [rsi-2] ; mm3 = p-2..p5 - movq mm4, mm3 ; mm4 = p-2..p5 - psrlq mm3, 8 ; mm3 = p-1..p5 - punpcklbw mm3, mm0 ; mm3 = p-1..p2 - pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. - - movq mm5, mm4 ; mm5 = p-2..p5 - punpckhbw mm4, mm0 ; mm5 = p2..p5 - pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers - paddsw mm3, mm4 ; mm3 += mm5 - - movq mm4, mm5 ; mm4 = p-2..p5; - psrlq mm5, 16 ; mm5 = p0..p5; - punpcklbw mm5, mm0 ; mm5 = p0..p3 - pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers - paddsw mm3, mm5 ; mm3 += mm5 - - movq mm5, mm4 ; mm5 = p-2..p5 - psrlq mm4, 24 ; mm4 = p1..p5 - punpcklbw mm4, mm0 ; mm4 = p1..p4 - pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers - paddsw mm3, mm4 ; mm3 += mm5 - - ; do outer positive taps - movd mm4, [rsi+3] - punpcklbw mm4, mm0 ; mm5 = p3..p6 - pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers - paddsw mm3, mm4 ; mm3 += mm5 - - punpcklbw mm5, mm0 ; mm5 = p-2..p1 - pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers - paddsw mm3, mm5 ; mm3 += mm5 - - paddsw mm3, [GLOBAL(rd)] ; mm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 - packuswb mm3, mm0 ; pack and unpack to saturate - punpcklbw mm3, mm0 ; - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line - add rdi, rax; -%else - movsxd r8, dword ptr arg(2) ;src_pixels_per_line - add rdi, rax; - - add rsi, r8 ; next line -%endif - - dec rcx ; decrement count - jnz .nextrow ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1dc_v6_mmx -;( -; short *src_ptr, -; unsigned char *output_ptr, -; int output_pitch, -; unsigned int pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short * vp8_filter -;) -global sym(vp8_filter_block1dc_v6_mmx) PRIVATE -sym(vp8_filter_block1dc_v6_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movq mm5, [GLOBAL(rd)] - push rbx - mov rbx, arg(7) ;vp8_filter - movq mm1, [rbx + 16] ; do both the negative taps first!!! - movq mm2, [rbx + 32] ; - movq mm6, [rbx + 48] ; - movq mm7, [rbx + 64] ; - - movsxd rdx, dword ptr arg(3) ;pixels_per_line - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - sub rsi, rdx - sub rsi, rdx - movsxd rcx, DWORD PTR arg(5) ;output_height - movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch? - pxor mm0, mm0 ; mm0 = 00000000 - - -.nextrow_cv: - movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1 - pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. - - - movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2 - pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0 - pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - movq mm4, [rsi] ; mm4 = p0..p3 = row -2 - pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - - add rsi, rdx ; move source forward 1 line to avoid 3 * pitch - movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1 - pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3 - pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers. - paddsw mm3, mm4 ; mm3 += mm4 - - - paddsw mm3, mm5 ; mm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 - packuswb mm3, mm0 ; pack and saturate - - movd [rdi],mm3 ; store the results in the destination - ; the subsequent iterations repeat 3 out of 4 of these reads. Since the - ; recon block should be in cache this shouldn't cost much. Its obviously - ; avoidable!!!. - lea rdi, [rdi+rax] ; - dec rcx ; decrement count - jnz .nextrow_cv ; next row - - pop rbx - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict8x8_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_mmx) PRIVATE -sym(vp8_bilinear_predict8x8_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - shl rax, 5 ; offset * 32 - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - - shl rax, 5 ; offset*32 - add rax, rcx ; VFilter - - lea rcx, [rdi+rdx*8] ; - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x8: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 ;dst_pitch -%endif - cmp rdi, rcx ; - jne .next_row_8x8 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict8x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x4_mmx) PRIVATE -sym(vp8_bilinear_predict8x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - shl rax, 5 - - mov rsi, arg(0) ;src_ptr ; - add rax, rcx - - movsxd rdx, dword ptr arg(5) ;dst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - add rsi, rdx ; next line -.next_row_8x4: - movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movq mm4, mm3 ; make a copy of current line - - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - punpckhbw mm4, mm0 ; - - pmullw mm3, mm1 ; - pmullw mm4, mm1 ; - - movq mm5, [rsi+1] ; - movq mm6, mm5 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 ; - - pmullw mm5, mm2 ; - pmullw mm6, mm2 ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - movq mm5, mm7 ; - movq mm6, mm7 ; - - punpcklbw mm5, mm0 ; - punpckhbw mm6, mm0 - - pmullw mm5, [rax] ; - pmullw mm6, [rax] ; - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - movq mm7, mm3 ; - packuswb mm7, mm4 ; - - - pmullw mm3, [rax+16] ; - pmullw mm4, [rax+16] ; - - paddw mm3, mm5 ; - paddw mm4, mm6 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw mm4, [GLOBAL(rd)] ; - psraw mm4, VP8_FILTER_SHIFT ; - - packuswb mm3, mm4 - - movq [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch - add rsi, rdx ; next line - add rdi, r8 -%endif - cmp rdi, rcx ; - jne .next_row_8x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void bilinear_predict4x4_mmx -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict4x4_mmx) PRIVATE -sym(vp8_bilinear_predict4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]; - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]; - - movsxd rax, dword ptr arg(2) ;xoffset - mov rdi, arg(4) ;dst_ptr ; - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - shl rax, 5 - - add rax, rcx ; HFilter - mov rsi, arg(0) ;src_ptr ; - - movsxd rdx, dword ptr arg(5) ;ldst_pitch - movq mm1, [rax] ; - - movq mm2, [rax+16] ; - movsxd rax, dword ptr arg(3) ;yoffset - - pxor mm0, mm0 ; - shl rax, 5 - - add rax, rcx - lea rcx, [rdi+rdx*4] ; - - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ; - - ; get the first horizontal line done ; - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movq mm7, mm3 ; - packuswb mm7, mm0 ; - - add rsi, rdx ; next line -.next_row_4x4: - movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06 - - pmullw mm3, mm1 ; - movd mm5, [rsi+1] ; - - punpcklbw mm5, mm0 ; - pmullw mm5, mm2 ; - - paddw mm3, mm5 ; - - movq mm5, mm7 ; - punpcklbw mm5, mm0 ; - - pmullw mm5, [rax] ; - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - movq mm7, mm3 ; - - packuswb mm7, mm0 ; - - pmullw mm3, [rax+16] ; - paddw mm3, mm5 ; - - - paddw mm3, [GLOBAL(rd)] ; xmm3 += round value - psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - packuswb mm3, mm0 - movd [rdi], mm3 ; store the results in the destination - -%if ABI_IS_32BIT - add rsi, rdx ; next line - add rdi, dword ptr arg(5) ;dst_pitch ; -%else - movsxd r8, dword ptr arg(5) ;dst_pitch ; - add rsi, rdx ; next line - add rdi, r8 -%endif - - cmp rdi, rcx ; - jne .next_row_4x4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - -SECTION_RODATA -align 16 -rd: - times 4 dw 0x40 - -align 16 -global HIDDEN_DATA(sym(vp8_six_tap_mmx)) -sym(vp8_six_tap_mmx): - times 8 dw 0 - times 8 dw 0 - times 8 dw 128 - times 8 dw 0 - times 8 dw 0 - times 8 dw 0 - - times 8 dw 0 - times 8 dw -6 - times 8 dw 123 - times 8 dw 12 - times 8 dw -1 - times 8 dw 0 - - times 8 dw 2 - times 8 dw -11 - times 8 dw 108 - times 8 dw 36 - times 8 dw -8 - times 8 dw 1 - - times 8 dw 0 - times 8 dw -9 - times 8 dw 93 - times 8 dw 50 - times 8 dw -6 - times 8 dw 0 - - times 8 dw 3 - times 8 dw -16 - times 8 dw 77 - times 8 dw 77 - times 8 dw -16 - times 8 dw 3 - - times 8 dw 0 - times 8 dw -6 - times 8 dw 50 - times 8 dw 93 - times 8 dw -9 - times 8 dw 0 - - times 8 dw 1 - times 8 dw -8 - times 8 dw 36 - times 8 dw 108 - times 8 dw -11 - times 8 dw 2 - - times 8 dw 0 - times 8 dw -1 - times 8 dw 12 - times 8 dw 123 - times 8 dw -6 - times 8 dw 0 - - diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm deleted file mode 100644 index 69f8d103c1..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/subpixel_sse2.asm +++ /dev/null @@ -1,1372 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" -extern sym(vp8_bilinear_filters_x86_8) - -%define BLOCK_HEIGHT_WIDTH 4 -%define VP8_FILTER_WEIGHT 128 -%define VP8_FILTER_SHIFT 7 - - -;/************************************************************************************ -; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The -; input pixel array has output_height rows. This routine assumes that output_height is an -; even number. This function handles 8 pixels in horizontal direction, calculating ONE -; rows each iteration to take advantage of the 128 bits operations. -;*************************************************************************************/ -;void vp8_filter_block1d8_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short *vp8_filter -;) -global sym(vp8_filter_block1d8_h6_sse2) PRIVATE -sym(vp8_filter_block1d8_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(6) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;output_width -%endif - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d8_h6_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - punpcklbw xmm4, xmm0 - - movdqa XMMWORD Ptr [rdi], xmm4 - lea rsi, [rsi + rax] - -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(5) ;[output_width] -%else - add rdi, r8 -%endif - dec rcx - - jnz .filter_block1d8_h6_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d16_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short *vp8_filter -;) -;/************************************************************************************ -; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The -; input pixel array has output_height rows. This routine assumes that output_height is an -; even number. This function handles 8 pixels in horizontal direction, calculating ONE -; rows each iteration to take advantage of the 128 bits operations. -;*************************************************************************************/ -global sym(vp8_filter_block1d16_h6_sse2) PRIVATE -sym(vp8_filter_block1d16_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(6) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;output_width -%endif - - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d16_h6_sse2_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - movq xmm2, MMWORD PTR [rsi +14] - pslldq xmm2, 8 - - por xmm2, xmm1 - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - punpcklbw xmm4, xmm0 - - movdqa XMMWORD Ptr [rdi], xmm4 - - movdqa xmm3, xmm2 - movdqa xmm4, xmm2 - - movdqa xmm5, xmm2 - movdqa xmm6, xmm2 - - movdqa xmm7, xmm2 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm2 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - punpcklbw xmm4, xmm0 - - movdqa XMMWORD Ptr [rdi+16], xmm4 - - lea rsi, [rsi + rax] -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(5) ;[output_width] -%else - add rdi, r8 -%endif - - dec rcx - jnz .filter_block1d16_h6_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d8_v6_sse2 -;( -; short *src_ptr, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; short * vp8_filter -;) -;/************************************************************************************ -; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The -; input pixel array has output_height rows. -;*************************************************************************************/ -global sym(vp8_filter_block1d8_v6_sse2) PRIVATE -sym(vp8_filter_block1d8_v6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(7) ;vp8_filter - movsxd rdx, dword ptr arg(3) ;pixels_per_line - - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - - sub rsi, rdx - sub rsi, rdx - - movsxd rcx, DWORD PTR arg(5) ;[output_height] - pxor xmm0, xmm0 ; clear xmm0 - - movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(2) ; dst_ptich -%endif - -.vp8_filter_block1d8_v6_sse2_loop: - movdqa xmm1, XMMWORD PTR [rsi] - pmullw xmm1, [rax] - - movdqa xmm2, XMMWORD PTR [rsi + rdx] - pmullw xmm2, [rax + 16] - - movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] - pmullw xmm3, [rax + 32] - - movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] - pmullw xmm5, [rax + 64] - - add rsi, rdx - movdqa xmm4, XMMWORD PTR [rsi + rdx * 2] - - pmullw xmm4, [rax + 48] - movdqa xmm6, XMMWORD PTR [rsi + rdx * 4] - - pmullw xmm6, [rax + 80] - - paddsw xmm2, xmm5 - paddsw xmm2, xmm3 - - paddsw xmm2, xmm1 - paddsw xmm2, xmm4 - - paddsw xmm2, xmm6 - paddsw xmm2, xmm7 - - psraw xmm2, 7 - packuswb xmm2, xmm0 ; pack and saturate - - movq QWORD PTR [rdi], xmm2 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(2) ;[dst_ptich] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz .vp8_filter_block1d8_v6_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d16_v6_sse2 -;( -; unsigned short *src_ptr, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int pixels_per_line, -; unsigned int pixel_step, -; unsigned int output_height, -; unsigned int output_width, -; const short *vp8_filter -;) -;/************************************************************************************ -; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The -; input pixel array has output_height rows. -;*************************************************************************************/ -global sym(vp8_filter_block1d16_v6_sse2) PRIVATE -sym(vp8_filter_block1d16_v6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rax, arg(7) ;vp8_filter - movsxd rdx, dword ptr arg(3) ;pixels_per_line - - mov rdi, arg(1) ;output_ptr - mov rsi, arg(0) ;src_ptr - - sub rsi, rdx - sub rsi, rdx - - movsxd rcx, DWORD PTR arg(5) ;[output_height] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(2) ; dst_ptich -%endif - -.vp8_filter_block1d16_v6_sse2_loop: -; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order. - movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2 - movdqa xmm2, XMMWORD PTR [rsi + rdx + 16] - pmullw xmm1, [rax + 16] - pmullw xmm2, [rax + 16] - - movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5 - movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16] - pmullw xmm3, [rax + 64] - pmullw xmm4, [rax + 64] - - movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3 - movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16] - pmullw xmm5, [rax + 32] - pmullw xmm6, [rax + 32] - - movdqa xmm7, XMMWORD PTR [rsi] ; line 1 - movdqa xmm0, XMMWORD PTR [rsi + 16] - pmullw xmm7, [rax] - pmullw xmm0, [rax] - - paddsw xmm1, xmm3 - paddsw xmm2, xmm4 - paddsw xmm1, xmm5 - paddsw xmm2, xmm6 - paddsw xmm1, xmm7 - paddsw xmm2, xmm0 - - add rsi, rdx - - movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4 - movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16] - pmullw xmm3, [rax + 48] - pmullw xmm4, [rax + 48] - - movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6 - movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16] - pmullw xmm5, [rax + 80] - pmullw xmm6, [rax + 80] - - movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] - pxor xmm0, xmm0 ; clear xmm0 - - paddsw xmm1, xmm3 - paddsw xmm2, xmm4 - paddsw xmm1, xmm5 - paddsw xmm2, xmm6 - - paddsw xmm1, xmm7 - paddsw xmm2, xmm7 - - psraw xmm1, 7 - psraw xmm2, 7 - - packuswb xmm1, xmm2 ; pack and saturate - movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(2) ;[dst_ptich] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz .vp8_filter_block1d16_v6_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d8_h6_only_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int output_height, -; const short *vp8_filter -;) -; First-pass filter only when yoffset==0 -global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE -sym(vp8_filter_block1d8_h6_only_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(5) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(2) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(3) ;dst_ptich -%endif - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d8_h6_only_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 - - movq QWORD PTR [rdi], xmm4 ; store the results in the destination - lea rsi, [rsi + rax] - -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(3) ;dst_ptich -%else - add rdi, r8 -%endif - dec rcx - - jnz .filter_block1d8_h6_only_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d16_h6_only_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int output_height, -; const short *vp8_filter -;) -; First-pass filter only when yoffset==0 -global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE -sym(vp8_filter_block1d16_h6_only_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rdx, arg(5) ;vp8_filter - mov rsi, arg(0) ;src_ptr - - mov rdi, arg(2) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(3) ;dst_ptich -%endif - - pxor xmm0, xmm0 ; clear xmm0 for unpack - -.filter_block1d16_h6_only_sse2_rowloop: - movq xmm3, MMWORD PTR [rsi - 2] - movq xmm1, MMWORD PTR [rsi + 6] - - movq xmm2, MMWORD PTR [rsi +14] - pslldq xmm2, 8 - - por xmm2, xmm1 - prefetcht2 [rsi+rax-2] - - pslldq xmm1, 8 - por xmm1, xmm3 - - movdqa xmm4, xmm1 - movdqa xmm5, xmm1 - - movdqa xmm6, xmm1 - movdqa xmm7, xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm1 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 ; lower 8 bytes - - movq QWORD Ptr [rdi], xmm4 ; store the results in the destination - - movdqa xmm3, xmm2 - movdqa xmm4, xmm2 - - movdqa xmm5, xmm2 - movdqa xmm6, xmm2 - - movdqa xmm7, xmm2 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 - - pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 - punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 - - psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 - - punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 - psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 - - pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 - - punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 - psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 - - pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 - - punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 - psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 - - pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 - - punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 - pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 - - paddsw xmm4, xmm7 - paddsw xmm4, xmm5 - - paddsw xmm4, xmm3 - paddsw xmm4, xmm6 - - paddsw xmm4, xmm2 - paddsw xmm4, [GLOBAL(rd)] - - psraw xmm4, 7 - - packuswb xmm4, xmm0 ; higher 8 bytes - - movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination - - lea rsi, [rsi + rax] -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(3) ;dst_ptich -%else - add rdi, r8 -%endif - - dec rcx - jnz .filter_block1d16_h6_only_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_filter_block1d8_v6_only_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int output_height, -; const short *vp8_filter -;) -; Second-pass filter only when xoffset==0 -global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE -sym(vp8_filter_block1d8_v6_only_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - mov rax, arg(5) ;vp8_filter - - pxor xmm0, xmm0 ; clear xmm0 - - movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(3) ; dst_ptich -%endif - -.vp8_filter_block1d8_v6_only_sse2_loop: - movq xmm1, MMWORD PTR [rsi] - movq xmm2, MMWORD PTR [rsi + rdx] - movq xmm3, MMWORD PTR [rsi + rdx * 2] - movq xmm5, MMWORD PTR [rsi + rdx * 4] - add rsi, rdx - movq xmm4, MMWORD PTR [rsi + rdx * 2] - movq xmm6, MMWORD PTR [rsi + rdx * 4] - - punpcklbw xmm1, xmm0 - pmullw xmm1, [rax] - - punpcklbw xmm2, xmm0 - pmullw xmm2, [rax + 16] - - punpcklbw xmm3, xmm0 - pmullw xmm3, [rax + 32] - - punpcklbw xmm5, xmm0 - pmullw xmm5, [rax + 64] - - punpcklbw xmm4, xmm0 - pmullw xmm4, [rax + 48] - - punpcklbw xmm6, xmm0 - pmullw xmm6, [rax + 80] - - paddsw xmm2, xmm5 - paddsw xmm2, xmm3 - - paddsw xmm2, xmm1 - paddsw xmm2, xmm4 - - paddsw xmm2, xmm6 - paddsw xmm2, xmm7 - - psraw xmm2, 7 - packuswb xmm2, xmm0 ; pack and saturate - - movq QWORD PTR [rdi], xmm2 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[dst_ptich] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz .vp8_filter_block1d8_v6_only_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_unpack_block1d16_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int output_height, -; unsigned int output_width -;) -global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE -sym(vp8_unpack_block1d16_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(3) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source - - pxor xmm0, xmm0 ; clear xmm0 for unpack -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source -%endif - -.unpack_block1d16_h6_sse2_rowloop: - movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2 - movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1 - - punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 - punpcklbw xmm1, xmm0 - - movdqa XMMWORD Ptr [rdi], xmm1 - movdqa XMMWORD Ptr [rdi + 16], xmm3 - - lea rsi, [rsi + rax] -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(4) ;[output_width] -%else - add rdi, r8 -%endif - dec rcx - jnz .unpack_block1d16_h6_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_bilinear_predict16x16_sse2 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -extern sym(vp8_bilinear_filters_x86_8) -global sym(vp8_bilinear_predict16x16_sse2) PRIVATE -sym(vp8_bilinear_predict16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - movsxd rax, dword ptr arg(2) ;xoffset - - cmp rax, 0 ;skip first_pass filter if xoffset=0 - je .b16x16_sp_only - - shl rax, 5 - add rax, rcx ;HFilter - - mov rdi, arg(4) ;dst_ptr - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - movsxd rax, dword ptr arg(3) ;yoffset - - cmp rax, 0 ;skip second_pass filter if yoffset=0 - je .b16x16_fp_only - - shl rax, 5 - add rax, rcx ;VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - pxor xmm0, xmm0 - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;dst_pitch -%endif - ; get the first horizontal line done - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 - - add rsi, rdx ; next line -.next_row: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - movdqa xmm5, xmm7 - movdqa xmm6, xmm7 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, [rax] - pmullw xmm6, [rax] - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 - - pmullw xmm3, [rax+16] - pmullw xmm4, [rax+16] - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rdx ; next line -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(5) ;dst_pitch -%else - add rdi, r8 -%endif - - cmp rdi, rcx - jne .next_row - - jmp .done - -.b16x16_sp_only: - movsxd rax, dword ptr arg(3) ;yoffset - shl rax, 5 - add rax, rcx ;VFilter - - mov rdi, arg(4) ;dst_ptr - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - - pxor xmm0, xmm0 - - ; get the first horizontal line done - movdqu xmm7, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - - add rsi, rax ; next line -.next_row_spo: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - - movdqa xmm5, xmm7 - movdqa xmm6, xmm7 - - movdqa xmm4, xmm3 ; make a copy of current line - movdqa xmm7, xmm3 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm5, xmm1 - pmullw xmm6, xmm1 - pmullw xmm3, xmm2 - pmullw xmm4, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rax ; next line - add rdi, rdx ;dst_pitch - cmp rdi, rcx - jne .next_row_spo - - jmp .done - -.b16x16_fp_only: - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - pxor xmm0, xmm0 - -.next_row_fpo: - movdqu xmm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 - movdqa xmm4, xmm3 ; make a copy of current line - - punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 - punpckhbw xmm4, xmm0 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm1 - - movdqu xmm5, [rsi+1] - movdqa xmm6, xmm5 - - punpcklbw xmm5, xmm0 - punpckhbw xmm6, xmm0 - - pmullw xmm5, xmm2 - pmullw xmm6, xmm2 - - paddw xmm3, xmm5 - paddw xmm4, xmm6 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - packuswb xmm3, xmm4 - movdqa [rdi], xmm3 ; store the results in the destination - - add rsi, rax ; next line - add rdi, rdx ; dst_pitch - cmp rdi, rcx - jne .next_row_fpo - -.done: - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_bilinear_predict8x8_sse2 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_sse2) PRIVATE -sym(vp8_bilinear_predict8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 144 ; reserve 144 bytes - - ;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset] - ;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset] - lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))] - - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - ;Read 9-line unaligned data in and put them on stack. This gives a big - ;performance boost. - movdqu xmm0, [rsi] - lea rax, [rdx + rdx*2] - movdqu xmm1, [rsi+rdx] - movdqu xmm2, [rsi+rdx*2] - add rsi, rax - movdqu xmm3, [rsi] - movdqu xmm4, [rsi+rdx] - movdqu xmm5, [rsi+rdx*2] - add rsi, rax - movdqu xmm6, [rsi] - movdqu xmm7, [rsi+rdx] - - movdqa XMMWORD PTR [rsp], xmm0 - - movdqu xmm0, [rsi+rdx*2] - - movdqa XMMWORD PTR [rsp+16], xmm1 - movdqa XMMWORD PTR [rsp+32], xmm2 - movdqa XMMWORD PTR [rsp+48], xmm3 - movdqa XMMWORD PTR [rsp+64], xmm4 - movdqa XMMWORD PTR [rsp+80], xmm5 - movdqa XMMWORD PTR [rsp+96], xmm6 - movdqa XMMWORD PTR [rsp+112], xmm7 - movdqa XMMWORD PTR [rsp+128], xmm0 - - movsxd rax, dword ptr arg(2) ;xoffset - shl rax, 5 - add rax, rcx ;HFilter - - mov rdi, arg(4) ;dst_ptr - movsxd rdx, dword ptr arg(5) ;dst_pitch - - movdqa xmm1, [rax] - movdqa xmm2, [rax+16] - - movsxd rax, dword ptr arg(3) ;yoffset - shl rax, 5 - add rax, rcx ;VFilter - - lea rcx, [rdi+rdx*8] - - movdqa xmm5, [rax] - movdqa xmm6, [rax+16] - - pxor xmm0, xmm0 - - ; get the first horizontal line done - movdqa xmm3, XMMWORD PTR [rsp] - movdqa xmm4, xmm3 ; make a copy of current line - psrldq xmm4, 1 - - punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 - punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm2 - - paddw xmm3, xmm4 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm7, xmm3 - add rsp, 16 ; next line -.next_row8x8: - movdqa xmm3, XMMWORD PTR [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - movdqa xmm4, xmm3 ; make a copy of current line - psrldq xmm4, 1 - - punpcklbw xmm3, xmm0 ; 00 01 02 03 04 05 06 07 - punpcklbw xmm4, xmm0 ; 01 02 03 04 05 06 07 08 - - pmullw xmm3, xmm1 - pmullw xmm4, xmm2 - - paddw xmm3, xmm4 - pmullw xmm7, xmm5 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm4, xmm3 - - pmullw xmm3, xmm6 - paddw xmm3, xmm7 - - movdqa xmm7, xmm4 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - packuswb xmm3, xmm0 - movq [rdi], xmm3 ; store the results in the destination - - add rsp, 16 ; next line - add rdi, rdx - - cmp rdi, rcx - jne .next_row8x8 - - ;add rsp, 144 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - -SECTION_RODATA -align 16 -rd: - times 8 dw 0x40 diff --git a/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm b/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm deleted file mode 100644 index c06f24556e..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/subpixel_ssse3.asm +++ /dev/null @@ -1,1508 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -%define BLOCK_HEIGHT_WIDTH 4 -%define VP8_FILTER_WEIGHT 128 -%define VP8_FILTER_SHIFT 7 - - -;/************************************************************************************ -; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The -; input pixel array has output_height rows. This routine assumes that output_height is an -; even number. This function handles 8 pixels in horizontal direction, calculating ONE -; rows each iteration to take advantage of the 128 bits operations. -; -; This is an implementation of some of the SSE optimizations first seen in ffvp8 -; -;*************************************************************************************/ -;void vp8_filter_block1d8_h6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE -sym(vp8_filter_block1d8_h6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 - - movdqa xmm7, [GLOBAL(rd)] - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - mov rdi, arg(2) ;output_ptr - - cmp esi, DWORD PTR [rax] - je vp8_filter_block1d8_h4_ssse3 - - movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - - sub rdi, rdx -;xmm3 free -.filter_block1d8_h6_rowloop_ssse3: - movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 - - movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 - - punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 - - movdqa xmm1, xmm0 - pmaddubsw xmm0, xmm4 - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2bfrom1)] - - pshufb xmm2, [GLOBAL(shuf3bfrom1)] - pmaddubsw xmm1, xmm5 - - lea rdi, [rdi + rdx] - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] - dec rcx - - paddsw xmm0, xmm1 - paddsw xmm2, xmm7 - - paddsw xmm0, xmm2 - - psraw xmm0, 7 - - packuswb xmm0, xmm0 - - movq MMWORD Ptr [rdi], xmm0 - jnz .filter_block1d8_h6_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -vp8_filter_block1d8_h4_ssse3: - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] - movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] - - mov rsi, arg(0) ;src_ptr - - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - - sub rdi, rdx - -.filter_block1d8_h4_rowloop_ssse3: - movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 - - movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 - - punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 - - movdqa xmm2, xmm0 - pshufb xmm0, xmm3 - - pshufb xmm2, xmm4 - pmaddubsw xmm0, xmm5 - - lea rdi, [rdi + rdx] - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] - dec rcx - - paddsw xmm0, xmm7 - - paddsw xmm0, xmm2 - - psraw xmm0, 7 - - packuswb xmm0, xmm0 - - movq MMWORD Ptr [rdi], xmm0 - - jnz .filter_block1d8_h4_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -;void vp8_filter_block1d16_h6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE -sym(vp8_filter_block1d16_h6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - mov rdi, arg(2) ;output_ptr - - mov rsi, arg(0) ;src_ptr - - movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - movsxd rdx, dword ptr arg(3) ;output_pitch - -.filter_block1d16_h6_rowloop_ssse3: - movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 - - movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 - - punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 - - movdqa xmm1, xmm0 - pmaddubsw xmm0, xmm4 - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2bfrom1)] - - pshufb xmm2, [GLOBAL(shuf3bfrom1)] - movq xmm3, MMWORD PTR [rsi + 6] - - pmaddubsw xmm1, xmm5 - movq xmm7, MMWORD PTR [rsi + 11] - - pmaddubsw xmm2, xmm6 - punpcklbw xmm3, xmm7 - - paddsw xmm0, xmm1 - movdqa xmm1, xmm3 - - pmaddubsw xmm3, xmm4 - paddsw xmm0, xmm2 - - movdqa xmm2, xmm1 - paddsw xmm0, [GLOBAL(rd)] - - pshufb xmm1, [GLOBAL(shuf2bfrom1)] - pshufb xmm2, [GLOBAL(shuf3bfrom1)] - - psraw xmm0, 7 - pmaddubsw xmm1, xmm5 - - pmaddubsw xmm2, xmm6 - packuswb xmm0, xmm0 - - lea rsi, [rsi + rax] - paddsw xmm3, xmm1 - - paddsw xmm3, xmm2 - - paddsw xmm3, [GLOBAL(rd)] - - psraw xmm3, 7 - - packuswb xmm3, xmm3 - - punpcklqdq xmm0, xmm3 - - movdqa XMMWORD Ptr [rdi], xmm0 - - lea rdi, [rdi + rdx] - dec rcx - jnz .filter_block1d16_h6_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_filter_block1d4_h6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE -sym(vp8_filter_block1d4_h6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - movdqa xmm7, [GLOBAL(rd)] - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d4_h4_ssse3 - - movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - -;xmm3 free -.filter_block1d4_h6_rowloop_ssse3: - movdqu xmm0, XMMWORD PTR [rsi - 2] - - movdqa xmm1, xmm0 - pshufb xmm0, [GLOBAL(shuf1b)] - - movdqa xmm2, xmm1 - pshufb xmm1, [GLOBAL(shuf2b)] - pmaddubsw xmm0, xmm4 - pshufb xmm2, [GLOBAL(shuf3b)] - pmaddubsw xmm1, xmm5 - -;-- - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] -;-- - paddsw xmm0, xmm1 - paddsw xmm0, xmm7 - pxor xmm1, xmm1 - paddsw xmm0, xmm2 - psraw xmm0, 7 - packuswb xmm0, xmm0 - - movd DWORD PTR [rdi], xmm0 - - add rdi, rdx - dec rcx - jnz .filter_block1d4_h6_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d4_h4_ssse3: - movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] - movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - movsxd rax, dword ptr arg(1) ;src_pixels_per_line - movsxd rcx, dword ptr arg(4) ;output_height - - movsxd rdx, dword ptr arg(3) ;output_pitch - -.filter_block1d4_h4_rowloop_ssse3: - movdqu xmm1, XMMWORD PTR [rsi - 2] - - movdqa xmm2, xmm1 - pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] - pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] - pmaddubsw xmm1, xmm5 - -;-- - pmaddubsw xmm2, xmm6 - - lea rsi, [rsi + rax] -;-- - paddsw xmm1, xmm7 - paddsw xmm1, xmm2 - psraw xmm1, 7 - packuswb xmm1, xmm1 - - movd DWORD PTR [rdi], xmm1 - - add rdi, rdx - dec rcx - jnz .filter_block1d4_h4_rowloop_ssse3 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - - - -;void vp8_filter_block1d16_v6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE -sym(vp8_filter_block1d16_v6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d16_v4_ssse3 - - movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch -%endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - - -.vp8_filter_block1d16_v6_ssse3_loop: - movq xmm1, MMWORD PTR [rsi] ;A - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - movq xmm0, MMWORD PTR [rax + rdx * 4] ;F - - pmaddubsw xmm3, xmm6 - punpcklbw xmm1, xmm0 ;A F - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm5 - - paddsw xmm2, xmm3 - paddsw xmm2, xmm1 - paddsw xmm2, [GLOBAL(rd)] - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi], xmm2 ;store the results - - movq xmm1, MMWORD PTR [rsi + 8] ;A - movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F - pmaddubsw xmm3, xmm6 - punpcklbw xmm1, xmm0 ;A F - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm5 - - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm2, xmm3 - paddsw xmm2, xmm1 - paddsw xmm2, [GLOBAL(rd)] - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi+8], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d16_v6_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d16_v4_ssse3: - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr - -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ;out_pitch -%endif - mov rax, rsi - movsxd rcx, DWORD PTR arg(4) ;output_height - add rax, rdx - -.vp8_filter_block1d16_v4_ssse3_loop: - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - pmaddubsw xmm3, xmm6 - pmaddubsw xmm2, xmm7 - movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B - movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E - - paddsw xmm2, [GLOBAL(rd)] - paddsw xmm2, xmm3 - psraw xmm2, 7 - packuswb xmm2, xmm2 - - punpcklbw xmm5, xmm4 ;B D - punpcklbw xmm1, xmm0 ;C E - - pmaddubsw xmm1, xmm6 - pmaddubsw xmm5, xmm7 - - movdqa xmm4, [GLOBAL(rd)] - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm5, xmm1 - paddsw xmm5, xmm4 - psraw xmm5, 7 - packuswb xmm5, xmm5 - - punpcklqdq xmm2, xmm5 - - movdqa XMMWORD PTR [rdi], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;out_pitch -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d16_v4_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_filter_block1d8_v6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE -sym(vp8_filter_block1d8_v6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ; out_pitch -%endif - movsxd rcx, DWORD PTR arg(4) ;[output_height] - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d8_v4_ssse3 - - movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d8_v6_ssse3_loop: - movq xmm1, MMWORD PTR [rsi] ;A - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - movq xmm0, MMWORD PTR [rax + rdx * 4] ;F - movdqa xmm4, [GLOBAL(rd)] - - pmaddubsw xmm3, xmm6 - punpcklbw xmm1, xmm0 ;A F - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm5 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm2, xmm3 - paddsw xmm2, xmm1 - paddsw xmm2, xmm4 - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d8_v6_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d8_v4_ssse3: - movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 - movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 - movdqa xmm5, [GLOBAL(rd)] - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d8_v4_ssse3_loop: - movq xmm2, MMWORD PTR [rsi + rdx] ;B - movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C - movq xmm4, MMWORD PTR [rax + rdx * 2] ;D - movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E - - punpcklbw xmm2, xmm4 ;B D - punpcklbw xmm3, xmm0 ;C E - - pmaddubsw xmm3, xmm6 - pmaddubsw xmm2, xmm7 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw xmm2, xmm3 - paddsw xmm2, xmm5 - psraw xmm2, 7 - packuswb xmm2, xmm2 - - movq MMWORD PTR [rdi], xmm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d8_v4_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret -;void vp8_filter_block1d4_v6_ssse3 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; unsigned int vp8_filter_index -;) -global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE -sym(vp8_filter_block1d4_v6_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, DWORD PTR arg(5) ;table index - xor rsi, rsi - shl rdx, 4 ; - - lea rax, [GLOBAL(k0_k5)] - add rax, rdx - - movsxd rdx, DWORD PTR arg(1) ;pixels_per_line - mov rdi, arg(2) ;output_ptr -%if ABI_IS_32BIT=0 - movsxd r8, DWORD PTR arg(3) ; out_pitch -%endif - movsxd rcx, DWORD PTR arg(4) ;[output_height] - - cmp esi, DWORD PTR [rax] - je .vp8_filter_block1d4_v4_ssse3 - - movq mm5, MMWORD PTR [rax] ;k0_k5 - movq mm6, MMWORD PTR [rax+256] ;k2_k4 - movq mm7, MMWORD PTR [rax+128] ;k1_k3 - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d4_v6_ssse3_loop: - movd mm1, DWORD PTR [rsi] ;A - movd mm2, DWORD PTR [rsi + rdx] ;B - movd mm3, DWORD PTR [rsi + rdx * 2] ;C - movd mm4, DWORD PTR [rax + rdx * 2] ;D - movd mm0, DWORD PTR [rsi + rdx * 4] ;E - - punpcklbw mm2, mm4 ;B D - punpcklbw mm3, mm0 ;C E - - movd mm0, DWORD PTR [rax + rdx * 4] ;F - - movq mm4, [GLOBAL(rd)] - - pmaddubsw mm3, mm6 - punpcklbw mm1, mm0 ;A F - pmaddubsw mm2, mm7 - pmaddubsw mm1, mm5 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw mm2, mm3 - paddsw mm2, mm1 - paddsw mm2, mm4 - psraw mm2, 7 - packuswb mm2, mm2 - - movd DWORD PTR [rdi], mm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d4_v6_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -.vp8_filter_block1d4_v4_ssse3: - movq mm6, MMWORD PTR [rax+256] ;k2_k4 - movq mm7, MMWORD PTR [rax+128] ;k1_k3 - movq mm5, MMWORD PTR [GLOBAL(rd)] - - mov rsi, arg(0) ;src_ptr - - mov rax, rsi - add rax, rdx - -.vp8_filter_block1d4_v4_ssse3_loop: - movd mm2, DWORD PTR [rsi + rdx] ;B - movd mm3, DWORD PTR [rsi + rdx * 2] ;C - movd mm4, DWORD PTR [rax + rdx * 2] ;D - movd mm0, DWORD PTR [rsi + rdx * 4] ;E - - punpcklbw mm2, mm4 ;B D - punpcklbw mm3, mm0 ;C E - - pmaddubsw mm3, mm6 - pmaddubsw mm2, mm7 - add rsi, rdx - add rax, rdx -;-- -;-- - paddsw mm2, mm3 - paddsw mm2, mm5 - psraw mm2, 7 - packuswb mm2, mm2 - - movd DWORD PTR [rdi], mm2 - -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(3) ;[out_pitch] -%else - add rdi, r8 -%endif - dec rcx - jnz .vp8_filter_block1d4_v4_ssse3_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_bilinear_predict16x16_ssse3 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE -sym(vp8_bilinear_predict16x16_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] - movsxd rax, dword ptr arg(2) ; xoffset - - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je .b16x16_sp_only - - shl rax, 4 - lea rax, [rax + rcx] ; HFilter - - mov rdi, arg(4) ; dst_ptr - mov rsi, arg(0) ; src_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm1, [rax] - - movsxd rax, dword ptr arg(3) ; yoffset - - cmp rax, 0 ; skip second_pass filter if yoffset=0 - je .b16x16_fp_only - - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rdx, dword ptr arg(1) ; src_pixels_per_line - - movdqa xmm2, [rax] - -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ; dst_pitch -%endif - movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07 - movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 - - punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 - movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 - - movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 - - lea rsi, [rsi + rdx] ; next line - - pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14 - - punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 - pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value - psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 - - movdqa xmm7, xmm3 - packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - -.next_row: - movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07 - movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 - - punpcklbw xmm6, xmm5 - movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 - - movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 - lea rsi, [rsi + rdx] ; next line - - pmaddubsw xmm6, xmm1 - - punpcklbw xmm4, xmm5 - pmaddubsw xmm4, xmm1 - - paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value - psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 - - paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value - psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 - - packuswb xmm6, xmm4 - movdqa xmm5, xmm7 - - punpcklbw xmm5, xmm6 - pmaddubsw xmm5, xmm2 - - punpckhbw xmm7, xmm6 - pmaddubsw xmm7, xmm2 - - paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value - psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 - - paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value - psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 - - packuswb xmm5, xmm7 - movdqa xmm7, xmm6 - - movdqa [rdi], xmm5 ; store the results in the destination -%if ABI_IS_32BIT - add rdi, DWORD PTR arg(5) ; dst_pitch -%else - add rdi, r8 -%endif - - cmp rdi, rcx - jne .next_row - - jmp .done - -.b16x16_sp_only: - movsxd rax, dword ptr arg(3) ; yoffset - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - mov rdi, arg(4) ; dst_ptr - mov rsi, arg(0) ; src_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm1, [rax] ; VFilter - - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ; src_pixels_per_line - - ; get the first horizontal line done - movq xmm4, [rsi] ; load row 0 - movq xmm2, [rsi + 8] ; load row 0 - - lea rsi, [rsi + rax] ; next line -.next_row_sp: - movq xmm3, [rsi] ; load row + 1 - movq xmm5, [rsi + 8] ; load row + 1 - - punpcklbw xmm4, xmm3 - punpcklbw xmm2, xmm5 - - pmaddubsw xmm4, xmm1 - movq xmm7, [rsi + rax] ; load row + 2 - - pmaddubsw xmm2, xmm1 - movq xmm6, [rsi + rax + 8] ; load row + 2 - - punpcklbw xmm3, xmm7 - punpcklbw xmm5, xmm6 - - pmaddubsw xmm3, xmm1 - paddw xmm4, [GLOBAL(rd)] - - pmaddubsw xmm5, xmm1 - paddw xmm2, [GLOBAL(rd)] - - psraw xmm4, VP8_FILTER_SHIFT - psraw xmm2, VP8_FILTER_SHIFT - - packuswb xmm4, xmm2 - paddw xmm3, [GLOBAL(rd)] - - movdqa [rdi], xmm4 ; store row 0 - paddw xmm5, [GLOBAL(rd)] - - psraw xmm3, VP8_FILTER_SHIFT - psraw xmm5, VP8_FILTER_SHIFT - - packuswb xmm3, xmm5 - movdqa xmm4, xmm7 - - movdqa [rdi + rdx],xmm3 ; store row 1 - lea rsi, [rsi + 2*rax] - - movdqa xmm2, xmm6 - lea rdi, [rdi + 2*rdx] - - cmp rdi, rcx - jne .next_row_sp - - jmp .done - -.b16x16_fp_only: - lea rcx, [rdi+rdx*8] - lea rcx, [rcx+rdx*8] - movsxd rax, dword ptr arg(1) ; src_pixels_per_line - -.next_row_fp: - movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07 - movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08 - - punpcklbw xmm2, xmm4 - movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15 - - pmaddubsw xmm2, xmm1 - movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16 - - lea rsi, [rsi + rax] ; next line - punpcklbw xmm3, xmm4 - - pmaddubsw xmm3, xmm1 - movq xmm5, [rsi] - - paddw xmm2, [GLOBAL(rd)] - movq xmm7, [rsi+1] - - movq xmm6, [rsi+8] - psraw xmm2, VP8_FILTER_SHIFT - - punpcklbw xmm5, xmm7 - movq xmm7, [rsi+9] - - paddw xmm3, [GLOBAL(rd)] - pmaddubsw xmm5, xmm1 - - psraw xmm3, VP8_FILTER_SHIFT - punpcklbw xmm6, xmm7 - - packuswb xmm2, xmm3 - pmaddubsw xmm6, xmm1 - - movdqa [rdi], xmm2 ; store the results in the destination - paddw xmm5, [GLOBAL(rd)] - - lea rdi, [rdi + rdx] ; dst_pitch - psraw xmm5, VP8_FILTER_SHIFT - - paddw xmm6, [GLOBAL(rd)] - psraw xmm6, VP8_FILTER_SHIFT - - packuswb xmm5, xmm6 - lea rsi, [rsi + rax] ; next line - - movdqa [rdi], xmm5 ; store the results in the destination - lea rdi, [rdi + rdx] ; dst_pitch - - cmp rdi, rcx - - jne .next_row_fp - -.done: - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vp8_bilinear_predict8x8_ssse3 -;( -; unsigned char *src_ptr, -; int src_pixels_per_line, -; int xoffset, -; int yoffset, -; unsigned char *dst_ptr, -; int dst_pitch -;) -global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE -sym(vp8_bilinear_predict8x8_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 144 ; reserve 144 bytes - - lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] - - mov rsi, arg(0) ;src_ptr - movsxd rdx, dword ptr arg(1) ;src_pixels_per_line - - ;Read 9-line unaligned data in and put them on stack. This gives a big - ;performance boost. - movdqu xmm0, [rsi] - lea rax, [rdx + rdx*2] - movdqu xmm1, [rsi+rdx] - movdqu xmm2, [rsi+rdx*2] - add rsi, rax - movdqu xmm3, [rsi] - movdqu xmm4, [rsi+rdx] - movdqu xmm5, [rsi+rdx*2] - add rsi, rax - movdqu xmm6, [rsi] - movdqu xmm7, [rsi+rdx] - - movdqa XMMWORD PTR [rsp], xmm0 - - movdqu xmm0, [rsi+rdx*2] - - movdqa XMMWORD PTR [rsp+16], xmm1 - movdqa XMMWORD PTR [rsp+32], xmm2 - movdqa XMMWORD PTR [rsp+48], xmm3 - movdqa XMMWORD PTR [rsp+64], xmm4 - movdqa XMMWORD PTR [rsp+80], xmm5 - movdqa XMMWORD PTR [rsp+96], xmm6 - movdqa XMMWORD PTR [rsp+112], xmm7 - movdqa XMMWORD PTR [rsp+128], xmm0 - - movsxd rax, dword ptr arg(2) ; xoffset - cmp rax, 0 ; skip first_pass filter if xoffset=0 - je .b8x8_sp_only - - shl rax, 4 - add rax, rcx ; HFilter - - mov rdi, arg(4) ; dst_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm0, [rax] - - movsxd rax, dword ptr arg(3) ; yoffset - cmp rax, 0 ; skip second_pass filter if yoffset=0 - je .b8x8_fp_only - - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - lea rcx, [rdi+rdx*8] - - movdqa xmm1, [rax] - - ; get the first horizontal line done - movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx - - psrldq xmm5, 1 - lea rsp, [rsp + 16] ; next line - - punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 - pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 - - paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value - psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - - movdqa xmm7, xmm3 - packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - -.next_row: - movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 - lea rsp, [rsp + 16] ; next line - - movdqa xmm5, xmm6 - - psrldq xmm5, 1 - - punpcklbw xmm6, xmm5 - pmaddubsw xmm6, xmm0 - - paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value - psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 - - packuswb xmm6, xmm6 - - punpcklbw xmm7, xmm6 - pmaddubsw xmm7, xmm1 - - paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value - psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 - - packuswb xmm7, xmm7 - - movq [rdi], xmm7 ; store the results in the destination - lea rdi, [rdi + rdx] - - movdqa xmm7, xmm6 - - cmp rdi, rcx - jne .next_row - - jmp .done8x8 - -.b8x8_sp_only: - movsxd rax, dword ptr arg(3) ; yoffset - shl rax, 4 - lea rax, [rax + rcx] ; VFilter - - mov rdi, arg(4) ;dst_ptr - movsxd rdx, dword ptr arg(5) ; dst_pitch - - movdqa xmm0, [rax] ; VFilter - - movq xmm1, XMMWORD PTR [rsp] - movq xmm2, XMMWORD PTR [rsp+16] - - movq xmm3, XMMWORD PTR [rsp+32] - punpcklbw xmm1, xmm2 - - movq xmm4, XMMWORD PTR [rsp+48] - punpcklbw xmm2, xmm3 - - movq xmm5, XMMWORD PTR [rsp+64] - punpcklbw xmm3, xmm4 - - movq xmm6, XMMWORD PTR [rsp+80] - punpcklbw xmm4, xmm5 - - movq xmm7, XMMWORD PTR [rsp+96] - punpcklbw xmm5, xmm6 - - pmaddubsw xmm1, xmm0 - pmaddubsw xmm2, xmm0 - - pmaddubsw xmm3, xmm0 - pmaddubsw xmm4, xmm0 - - pmaddubsw xmm5, xmm0 - punpcklbw xmm6, xmm7 - - pmaddubsw xmm6, xmm0 - paddw xmm1, [GLOBAL(rd)] - - paddw xmm2, [GLOBAL(rd)] - psraw xmm1, VP8_FILTER_SHIFT - - paddw xmm3, [GLOBAL(rd)] - psraw xmm2, VP8_FILTER_SHIFT - - paddw xmm4, [GLOBAL(rd)] - psraw xmm3, VP8_FILTER_SHIFT - - paddw xmm5, [GLOBAL(rd)] - psraw xmm4, VP8_FILTER_SHIFT - - paddw xmm6, [GLOBAL(rd)] - psraw xmm5, VP8_FILTER_SHIFT - - psraw xmm6, VP8_FILTER_SHIFT - packuswb xmm1, xmm1 - - packuswb xmm2, xmm2 - movq [rdi], xmm1 - - packuswb xmm3, xmm3 - movq [rdi+rdx], xmm2 - - packuswb xmm4, xmm4 - movq xmm1, XMMWORD PTR [rsp+112] - - lea rdi, [rdi + 2*rdx] - movq xmm2, XMMWORD PTR [rsp+128] - - packuswb xmm5, xmm5 - movq [rdi], xmm3 - - packuswb xmm6, xmm6 - movq [rdi+rdx], xmm4 - - lea rdi, [rdi + 2*rdx] - punpcklbw xmm7, xmm1 - - movq [rdi], xmm5 - pmaddubsw xmm7, xmm0 - - movq [rdi+rdx], xmm6 - punpcklbw xmm1, xmm2 - - pmaddubsw xmm1, xmm0 - paddw xmm7, [GLOBAL(rd)] - - psraw xmm7, VP8_FILTER_SHIFT - paddw xmm1, [GLOBAL(rd)] - - psraw xmm1, VP8_FILTER_SHIFT - packuswb xmm7, xmm7 - - packuswb xmm1, xmm1 - lea rdi, [rdi + 2*rdx] - - movq [rdi], xmm7 - - movq [rdi+rdx], xmm1 - lea rsp, [rsp + 144] - - jmp .done8x8 - -.b8x8_fp_only: - lea rcx, [rdi+rdx*8] - -.next_row_fp: - movdqa xmm1, XMMWORD PTR [rsp] - movdqa xmm3, XMMWORD PTR [rsp+16] - - movdqa xmm2, xmm1 - movdqa xmm5, XMMWORD PTR [rsp+32] - - psrldq xmm2, 1 - movdqa xmm7, XMMWORD PTR [rsp+48] - - movdqa xmm4, xmm3 - psrldq xmm4, 1 - - movdqa xmm6, xmm5 - psrldq xmm6, 1 - - punpcklbw xmm1, xmm2 - pmaddubsw xmm1, xmm0 - - punpcklbw xmm3, xmm4 - pmaddubsw xmm3, xmm0 - - punpcklbw xmm5, xmm6 - pmaddubsw xmm5, xmm0 - - movdqa xmm2, xmm7 - psrldq xmm2, 1 - - punpcklbw xmm7, xmm2 - pmaddubsw xmm7, xmm0 - - paddw xmm1, [GLOBAL(rd)] - psraw xmm1, VP8_FILTER_SHIFT - - paddw xmm3, [GLOBAL(rd)] - psraw xmm3, VP8_FILTER_SHIFT - - paddw xmm5, [GLOBAL(rd)] - psraw xmm5, VP8_FILTER_SHIFT - - paddw xmm7, [GLOBAL(rd)] - psraw xmm7, VP8_FILTER_SHIFT - - packuswb xmm1, xmm1 - packuswb xmm3, xmm3 - - packuswb xmm5, xmm5 - movq [rdi], xmm1 - - packuswb xmm7, xmm7 - movq [rdi+rdx], xmm3 - - lea rdi, [rdi + 2*rdx] - movq [rdi], xmm5 - - lea rsp, [rsp + 4*16] - movq [rdi+rdx], xmm7 - - lea rdi, [rdi + 2*rdx] - cmp rdi, rcx - - jne .next_row_fp - - lea rsp, [rsp + 16] - -.done8x8: - ;add rsp, 144 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -shuf1b: - db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 -shuf2b: - db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11 -shuf3b: - db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10 - -align 16 -shuf2bfrom1: - db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13 -align 16 -shuf3bfrom1: - db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11 - -align 16 -rd: - times 8 dw 0x40 - -align 16 -k0_k5: - times 8 db 0, 0 ;placeholder - times 8 db 0, 0 - times 8 db 2, 1 - times 8 db 0, 0 - times 8 db 3, 3 - times 8 db 0, 0 - times 8 db 1, 2 - times 8 db 0, 0 -k1_k3: - times 8 db 0, 0 ;placeholder - times 8 db -6, 12 - times 8 db -11, 36 - times 8 db -9, 50 - times 8 db -16, 77 - times 8 db -6, 93 - times 8 db -8, 108 - times 8 db -1, 123 -k2_k4: - times 8 db 128, 0 ;placeholder - times 8 db 123, -1 - times 8 db 108, -8 - times 8 db 93, -6 - times 8 db 77, -16 - times 8 db 50, -9 - times 8 db 36, -11 - times 8 db 12, -6 -align 16 -vp8_bilinear_filters_ssse3: - times 8 db 128, 0 - times 8 db 112, 16 - times 8 db 96, 32 - times 8 db 80, 48 - times 8 db 64, 64 - times 8 db 48, 80 - times 8 db 32, 96 - times 8 db 16, 112 - diff --git a/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c b/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c deleted file mode 100644 index fb0b57eb1c..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/vp8_asm_stubs.c +++ /dev/null @@ -1,625 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "vpx_ports/mem.h" -#include "filter_x86.h" - -extern const short vp8_six_tap_mmx[8][6*8]; - -extern void vp8_filter_block1d_h6_mmx -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1dc_v6_mmx -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int output_pitch, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d8_h6_sse2 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d16_h6_sse2 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d8_v6_sse2 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int dst_ptich, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_filter_block1d16_v6_sse2 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int dst_ptich, - unsigned int pixels_per_line, - unsigned int pixel_step, - unsigned int output_height, - unsigned int output_width, - const short *vp8_filter -); -extern void vp8_unpack_block1d16_h6_sse2 -( - unsigned char *src_ptr, - unsigned short *output_ptr, - unsigned int src_pixels_per_line, - unsigned int output_height, - unsigned int output_width -); -extern void vp8_filter_block1d8_h6_only_sse2 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - int dst_ptich, - unsigned int output_height, - const short *vp8_filter -); -extern void vp8_filter_block1d16_h6_only_sse2 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - int dst_ptich, - unsigned int output_height, - const short *vp8_filter -); -extern void vp8_filter_block1d8_v6_only_sse2 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - int dst_ptich, - unsigned int output_height, - const short *vp8_filter -); - - -#if HAVE_MMX -void vp8_sixtap_predict4x4_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */ - const short *HFilter, *VFilter; - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter); - -} - - -void vp8_sixtap_predict16x16_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - - DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - - HFilter = vp8_six_tap_mmx[xoffset]; - - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter); - - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter); - -} - - -void vp8_sixtap_predict8x8_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter); - - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter); - -} - - -void vp8_sixtap_predict8x4_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); - vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter); - - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter); - vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter); - -} - - - -void vp8_bilinear_predict16x16_mmx -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch); - vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch); - vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch); - vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch); -} -#endif - - -#if HAVE_SSE2 -void vp8_sixtap_predict16x16_sse2 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch - -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */ - - const short *HFilter, *VFilter; - - if (xoffset) - { - if (yoffset) - { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); - } - else - { - /* First-pass only */ - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter); - } - } - else - { - /* Second-pass only */ - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32); - vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); - } -} - - -void vp8_sixtap_predict8x8_sse2 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - const short *HFilter, *VFilter; - - if (xoffset) - { - if (yoffset) - { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter); - } - else - { - /* First-pass only */ - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter); - } - } - else - { - /* Second-pass only */ - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter); - } -} - - -void vp8_sixtap_predict8x4_sse2 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ - const short *HFilter, *VFilter; - - if (xoffset) - { - if (yoffset) - { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter); - } - else - { - /* First-pass only */ - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter); - } - } - else - { - /* Second-pass only */ - VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter); - } -} - -#endif - -#if HAVE_SSSE3 - -extern void vp8_filter_block1d8_h6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d16_h6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d16_v6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d8_v6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d4_h6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pixels_per_line, - unsigned char *output_ptr, - unsigned int output_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -extern void vp8_filter_block1d4_v6_ssse3 -( - unsigned char *src_ptr, - unsigned int src_pitch, - unsigned char *output_ptr, - unsigned int out_pitch, - unsigned int output_height, - unsigned int vp8_filter_index -); - -void vp8_sixtap_predict16x16_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch - -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[24*24]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, FData2, - 16, 21, xoffset); - vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, - 16, yoffset); - } - else - { - /* First-pass only */ - vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 16, xoffset); - } - } - else - { - if (yoffset) - { - /* Second-pass only */ - vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 16, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); - } - } -} - -void vp8_sixtap_predict8x8_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[256]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, FData2, - 8, 13, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, - 8, yoffset); - } - else - { - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 8, xoffset); - } - } - else - { - if (yoffset) - { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 8, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); - } - } -} - - -void vp8_sixtap_predict8x4_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[256]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, FData2, - 8, 9, xoffset); - vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, - 4, yoffset); - } - else - { - /* First-pass only */ - vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 4, xoffset); - } - } - else - { - if (yoffset) - { - /* Second-pass only */ - vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 4, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); - } - } -} - -void vp8_sixtap_predict4x4_ssse3 -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pitch -) -{ - DECLARE_ALIGNED(16, unsigned char, FData2[4*9]); - - if (xoffset) - { - if (yoffset) - { - vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - FData2, 4, 9, xoffset); - vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, - 4, yoffset); - } - else - { - vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, - dst_ptr, dst_pitch, 4, xoffset); - } - } - else - { - if (yoffset) - { - vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), - src_pixels_per_line, - dst_ptr, dst_pitch, 4, yoffset); - } - else - { - /* ssse3 second-pass only function couldn't handle (xoffset==0 && - * yoffset==0) case correctly. Add copy function here to guarantee - * six-tap function handles all possible offsets. */ - int r; - - for (r = 0; r < 4; r++) - { - dst_ptr[0] = src_ptr[0]; - dst_ptr[1] = src_ptr[1]; - dst_ptr[2] = src_ptr[2]; - dst_ptr[3] = src_ptr[3]; - dst_ptr += dst_pitch; - src_ptr += src_pixels_per_line; - } - } - } -} - -#endif diff --git a/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm b/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm deleted file mode 100644 index 88a07b9f3f..0000000000 --- a/thirdparty/libvpx/vp8/common/x86/vp8_loopfilter_mmx.asm +++ /dev/null @@ -1,1753 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -;void vp8_loop_filter_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_loop_filter_horizontal_edge_mmx) PRIVATE -sym(vp8_loop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - movsxd rcx, dword ptr arg(5) ;count -.next8_h: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - ; calculate breakout conditions - movq mm2, [rdi+2*rax] ; q3 - movq mm1, [rsi+2*rax] ; q2 - movq mm6, mm1 ; q2 - psubusb mm1, mm2 ; q2-=q3 - psubusb mm2, mm6 ; q3-=q2 - por mm1, mm2 ; abs(q3-q2) - psubusb mm1, mm7 ; - - - movq mm4, [rsi+rax] ; q1 - movq mm3, mm4 ; q1 - psubusb mm4, mm6 ; q1-=q2 - psubusb mm6, mm3 ; q2-=q1 - por mm4, mm6 ; abs(q2-q1) - - psubusb mm4, mm7 - por mm1, mm4 - - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - psubusb mm4, mm3 ; q0-=q1 - psubusb mm3, mm0 ; q1-=q0 - por mm4, mm3 ; abs(q0-q1) - movq t0, mm4 ; save to t0 - psubusb mm4, mm7 - por mm1, mm4 - - - neg rax ; negate pitch to deal with above border - - movq mm2, [rsi+4*rax] ; p3 - movq mm4, [rdi+4*rax] ; p2 - movq mm5, mm4 ; p2 - psubusb mm4, mm2 ; p2-=p3 - psubusb mm2, mm5 ; p3-=p2 - por mm4, mm2 ; abs(p3 - p2) - psubusb mm4, mm7 - por mm1, mm4 - - - movq mm4, [rsi+2*rax] ; p1 - movq mm3, mm4 ; p1 - psubusb mm4, mm5 ; p1-=p2 - psubusb mm5, mm3 ; p2-=p1 - por mm4, mm5 ; abs(p2 - p1) - psubusb mm4, mm7 - por mm1, mm4 - - movq mm2, mm3 ; p1 - - movq mm4, [rsi+rax] ; p0 - movq mm5, mm4 ; p0 - psubusb mm4, mm3 ; p0-=p1 - psubusb mm3, mm5 ; p1-=p0 - por mm4, mm3 ; abs(p1 - p0) - movq t1, mm4 ; save to t1 - psubusb mm4, mm7 - por mm1, mm4 - - movq mm3, [rdi] ; q1 - movq mm4, mm3 ; q1 - psubusb mm3, mm2 ; q1-=p1 - psubusb mm2, mm4 ; p1-=q1 - por mm2, mm3 ; abs(p1-q1) - pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm2, 1 ; abs(p1-q1)/2 - - movq mm6, mm5 ; p0 - movq mm3, [rsi] ; q0 - psubusb mm5, mm3 ; p0-=q0 - psubusb mm3, mm6 ; q0-=p0 - por mm5, mm3 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] ; blimit - - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm5 - pxor mm5, mm5 - pcmpeqb mm1, mm5 ; mask mm1 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb mm4, mm5 - - pcmpeqb mm5, mm5 - pxor mm4, mm5 - - - ; start work on filters - movq mm2, [rsi+2*rax] ; p1 - movq mm7, [rdi] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - movq mm2, mm1 - paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - - pxor mm0, mm0 ; - pxor mm5, mm5 - punpcklbw mm0, mm2 ; - punpckhbw mm5, mm2 ; - psraw mm0, 11 ; - psraw mm5, 11 - packsswb mm0, mm5 - movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor mm0, mm0 ; 0 - movq mm5, mm1 ; abcdefgh - punpcklbw mm0, mm1 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - pxor mm1, mm1 ; 0 - punpckhbw mm1, mm5 ; a0b0c0d0 - psraw mm1, 11 ; sign extended shift right by 3 - movq mm5, mm0 ; save results - - packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [GLOBAL(ones)] - paddsw mm1, [GLOBAL(ones)] - psraw mm5, 1 ; partial shifted one more time for 2nd tap - psraw mm1, 1 ; partial shifted one more time for 2nd tap - packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - pandn mm4, mm5 ; high edge variance additive - - paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+rax], mm6 ; write back - - movq mm6, [rsi+2*rax] ; p1 - pxor mm6, [GLOBAL(t80)] ; reoffset - paddsb mm6, mm4 ; p1+= p1 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+2*rax], mm6 ; write back - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - movq [rsi], mm3 ; write back - - psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [GLOBAL(t80)] ; unoffset - movq [rdi], mm7 ; write back - - add rsi,8 - neg rax - dec rcx - jnz .next8_h - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_loop_filter_vertical_edge_mmx) PRIVATE -sym(vp8_loop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 64 ; reserve 64 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[32]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - - movsxd rcx, dword ptr arg(5) ;count -.next8_v: - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - - ;transpose - movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - movq mm7, mm6 ; 77 76 75 74 73 72 71 70 - - punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64 - punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60 - - movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 - movq mm5, mm4 ; 47 46 45 44 43 42 41 40 - - punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44 - punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40 - - movq mm3, mm5 ; 57 47 56 46 55 45 54 44 - punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 - - punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 - movq mm2, mm4 ; 53 43 52 42 51 41 50 40 - - punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 - punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 - - neg rax - movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq mm1, mm6 ; 27 26 25 24 23 22 21 20 - punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24 - - punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20 - movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 - - punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 - movq mm0, mm7 ; 17 07 16 06 15 05 14 04 - - punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 - punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 - - movq mm6, mm7 ; 37 27 17 07 36 26 16 06 - punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 - - punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 - - movq mm5, mm6 ; 76 66 56 46 36 26 16 06 - psubusb mm5, mm7 ; q2-q3 - - psubusb mm7, mm6 ; q3-q2 - por mm7, mm5; ; mm7=abs (q3-q2) - - movq mm5, mm0 ; 35 25 15 05 34 24 14 04 - punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 - - punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 - movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 - - psubusb mm3, mm6 ; q1-q2 - psubusb mm6, mm5 ; q2-q1 - - por mm6, mm3 ; mm6=abs(q2-q1) - lea rdx, srct - - movq [rdx+24], mm5 ; save q1 - movq [rdx+16], mm0 ; save q0 - - movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 - - movq mm0, mm3 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 31 21 11 01 30 20 10 00 - - punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 - punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 - - movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 - psubusb mm2, mm0 ; p2-p3 - - psubusb mm0, mm1 ; p3-p2 - por mm0, mm2 ; mm0=abs(p3-p2) - - movq mm2, mm3 ; 33 23 13 03 32 22 12 02 - punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 - - punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 - movq [rdx+8], mm3 ; save p0 - - movq [rdx], mm2 ; save p1 - movq mm5, mm2 ; mm5 = p1 - - psubusb mm2, mm1 ; p1-p2 - psubusb mm1, mm5 ; p2-p1 - - por mm1, mm2 ; mm1=abs(p2-p1) - mov rdx, arg(3) ;limit - - movq mm4, [rdx] ; mm4 = limit - psubusb mm7, mm4 - - psubusb mm0, mm4 - psubusb mm1, mm4 - - psubusb mm6, mm4 - por mm7, mm6 - - por mm0, mm1 - por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movq mm1, mm5 ; p1 - - movq mm7, mm3 ; mm3=mm7=p0 - psubusb mm7, mm5 ; p0 - p1 - - psubusb mm5, mm3 ; p1 - p0 - por mm5, mm7 ; abs(p1-p0) - - movq t0, mm5 ; save abs(p1-p0) - lea rdx, srct - - psubusb mm5, mm4 - por mm0, mm5 ; mm0=mask - - movq mm5, [rdx+16] ; mm5=q0 - movq mm7, [rdx+24] ; mm7=q1 - - movq mm6, mm5 ; mm6=q0 - movq mm2, mm7 ; q1 - psubusb mm5, mm7 ; q0-q1 - - psubusb mm7, mm6 ; q1-q0 - por mm7, mm5 ; abs(q1-q0) - - movq t1, mm7 ; save abs(q1-q0) - psubusb mm7, mm4 - - por mm0, mm7 ; mask - - movq mm5, mm2 ; q1 - psubusb mm5, mm1 ; q1-=p1 - psubusb mm1, mm2 ; p1-=q1 - por mm5, mm1 ; abs(p1-q1) - pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; - - movq mm4, [rdx] ;blimit - movq mm1, mm3 ; mm1=mm3=p0 - - movq mm7, mm6 ; mm7=mm6=q0 - psubusb mm1, mm7 ; p0-q0 - - psubusb mm7, mm3 ; q0-p0 - por mm1, mm7 ; abs(q0-p0) - paddusb mm1, mm1 ; abs(q0-p0)*2 - paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm0; ; mask - - pxor mm0, mm0 - pcmpeqb mm1, mm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] - ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - - por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb mm4, mm0 - - pcmpeqb mm0, mm0 - pxor mm4, mm0 - - - - ; start work on filters - lea rdx, srct - - movq mm2, [rdx] ; p1 - movq mm7, [rdx+24] ; q1 - - movq mm6, [rdx+8] ; p0 - movq mm0, [rdx+16] ; q0 - - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb mm2, mm7 ; p1 - q1 - pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - - paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - - paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - movq mm2, mm1 - paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - - paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - pxor mm0, mm0 ; - - pxor mm5, mm5 - punpcklbw mm0, mm2 ; - - punpckhbw mm5, mm2 ; - psraw mm0, 11 ; - - psraw mm5, 11 - packsswb mm0, mm5 - - movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor mm0, mm0 ; 0 - movq mm5, mm1 ; abcdefgh - - punpcklbw mm0, mm1 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - - pxor mm1, mm1 ; 0 - punpckhbw mm1, mm5 ; a0b0c0d0 - - psraw mm1, 11 ; sign extended shift right by 3 - movq mm5, mm0 ; save results - - packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [GLOBAL(ones)] - - paddsw mm1, [GLOBAL(ones)] - psraw mm5, 1 ; partial shifted one more time for 2nd tap - - psraw mm1, 1 ; partial shifted one more time for 2nd tap - packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - - pandn mm4, mm5 ; high edge variance additive - - paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - - ; mm6=p0 ; - movq mm1, [rdx] ; p1 - pxor mm1, [GLOBAL(t80)] ; reoffset - - paddsb mm1, mm4 ; p1+= p1 add - pxor mm1, [GLOBAL(t80)] ; unoffset - ; mm6 = p0 mm1 = p1 - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - - ; mm3 = q0 - psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [GLOBAL(t80)] ; unoffset - ; mm7 = q1 - - ; transpose and write back - ; mm1 = 72 62 52 42 32 22 12 02 - ; mm6 = 73 63 53 43 33 23 13 03 - ; mm3 = 74 64 54 44 34 24 14 04 - ; mm7 = 75 65 55 45 35 25 15 05 - - movq mm2, mm1 ; 72 62 52 42 32 22 12 02 - punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02 - - movq mm4, mm3 ; 74 64 54 44 34 24 14 04 - punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42 - - punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04 - punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44 - - movq mm6, mm2 ; 33 32 23 22 13 12 03 02 - punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02 - - punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22 - movq mm5, mm1 ; 73 72 63 62 53 52 43 42 - - punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42 - punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62 - - - ; mm2 = 15 14 13 12 05 04 03 02 - ; mm6 = 35 34 33 32 25 24 23 22 - ; mm5 = 55 54 53 52 45 44 43 42 - ; mm1 = 75 74 73 72 65 64 63 62 - - - - movd [rsi+rax*4+2], mm2 - psrlq mm2, 32 - - movd [rdi+rax*4+2], mm2 - movd [rsi+rax*2+2], mm6 - - psrlq mm6, 32 - movd [rsi+rax+2],mm6 - - movd [rsi+2], mm1 - psrlq mm1, 32 - - movd [rdi+2], mm1 - neg rax - - movd [rdi+rax+2],mm5 - psrlq mm5, 32 - - movd [rdi+rax*2+2], mm5 - - lea rsi, [rsi+rax*8] - dec rcx - jnz .next8_v - - add rsp, 64 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_horizontal_edge_mmx) PRIVATE -sym(vp8_mbloop_filter_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - movsxd rcx, dword ptr arg(5) ;count -.next8_mbh: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - - ; calculate breakout conditions - movq mm2, [rdi+2*rax] ; q3 - - movq mm1, [rsi+2*rax] ; q2 - movq mm6, mm1 ; q2 - psubusb mm1, mm2 ; q2-=q3 - psubusb mm2, mm6 ; q3-=q2 - por mm1, mm2 ; abs(q3-q2) - psubusb mm1, mm7 - - - ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit - movq mm4, [rsi+rax] ; q1 - movq mm3, mm4 ; q1 - psubusb mm4, mm6 ; q1-=q2 - psubusb mm6, mm3 ; q2-=q1 - por mm4, mm6 ; abs(q2-q1) - psubusb mm4, mm7 - por mm1, mm4 - - - ; mm1 = mask, mm3=q1, mm7 = limit - - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - psubusb mm4, mm3 ; q0-=q1 - psubusb mm3, mm0 ; q1-=q0 - por mm4, mm3 ; abs(q0-q1) - movq t0, mm4 ; save to t0 - psubusb mm4, mm7 - por mm1, mm4 - - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - neg rax ; negate pitch to deal with above border - - movq mm2, [rsi+4*rax] ; p3 - movq mm4, [rdi+4*rax] ; p2 - movq mm5, mm4 ; p2 - psubusb mm4, mm2 ; p2-=p3 - psubusb mm2, mm5 ; p3-=p2 - por mm4, mm2 ; abs(p3 - p2) - psubusb mm4, mm7 - por mm1, mm4 - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - movq mm4, [rsi+2*rax] ; p1 - movq mm3, mm4 ; p1 - psubusb mm4, mm5 ; p1-=p2 - psubusb mm5, mm3 ; p2-=p1 - por mm4, mm5 ; abs(p2 - p1) - psubusb mm4, mm7 - por mm1, mm4 - - movq mm2, mm3 ; p1 - - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - - movq mm4, [rsi+rax] ; p0 - movq mm5, mm4 ; p0 - psubusb mm4, mm3 ; p0-=p1 - psubusb mm3, mm5 ; p1-=p0 - por mm4, mm3 ; abs(p1 - p0) - movq t1, mm4 ; save to t1 - psubusb mm4, mm7 - por mm1, mm4 - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm5 = p0 - movq mm3, [rdi] ; q1 - movq mm4, mm3 ; q1 - psubusb mm3, mm2 ; q1-=p1 - psubusb mm2, mm4 ; p1-=q1 - por mm2, mm3 ; abs(p1-q1) - pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm2, 1 ; abs(p1-q1)/2 - - movq mm6, mm5 ; p0 - movq mm3, mm0 ; q0 - psubusb mm5, mm3 ; p0-=q0 - psubusb mm3, mm6 ; q0-=p0 - por mm5, mm3 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] ; blimit - - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm5 - pxor mm5, mm5 - pcmpeqb mm1, mm5 ; mask mm1 - - ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 - paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - - pcmpeqb mm4, mm5 - - pcmpeqb mm5, mm5 - pxor mm4, mm5 - - - - ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, mm4=hev - ; start work on filters - movq mm2, [rsi+2*rax] ; p1 - movq mm7, [rdi] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) - paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movq mm2, mm1 ; vp8_filter - pand mm2, mm4; ; Filter2 = vp8_filter & hev - - movq mm5, mm2 ; - paddsb mm5, [GLOBAL(t3)]; - - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm5 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm5 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - movq mm5, mm0 ; Filter2 - - paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm2 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm2 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb mm3, mm0 ; qs0 =qs0 - filter1 - paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn mm4, mm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor mm0, mm0 - - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s27)] - pmulhw mm2, [GLOBAL(s27)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - psubsb mm3, mm1 - paddsb mm6, mm1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - movq [rsi+rax], mm6 - movq [rsi], mm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s18)] - pmulhw mm2, [GLOBAL(s18)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm3, [rdi] - movq mm6, [rsi+rax*2] ; p1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdi], mm3 - movq [rsi+rax*2], mm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s9)] - pmulhw mm2, [GLOBAL(s9)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - - movq mm6, [rdi+rax*4] - neg rax - movq mm3, [rdi+rax ] - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdi+rax ], mm3 - neg rax - movq [rdi+rax*4], mm6 - -;EARLY_BREAK_OUT: - neg rax - add rsi,8 - dec rcx - jnz .next8_mbh - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_mbloop_filter_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit, -; const char *limit, -; const char *thresh, -; int count -;) -global sym(vp8_mbloop_filter_vertical_edge_mmx) PRIVATE -sym(vp8_mbloop_filter_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 96 ; reserve 96 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - - movsxd rcx, dword ptr arg(5) ;count -.next8_mbv: - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - - ;transpose - movq mm0, [rdi+2*rax] ; 77 76 75 74 73 72 71 70 - movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - - movq mm7, mm6 ; 77 76 75 74 73 72 71 70 - punpckhbw mm7, mm0 ; 77 67 76 66 75 65 74 64 - - punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 - movq mm0, [rsi+rax] ; 57 56 55 54 53 52 51 50 - - movq mm4, [rsi] ; 47 46 45 44 43 42 41 40 - movq mm5, mm4 ; 47 46 45 44 43 42 41 40 - - punpckhbw mm5, mm0 ; 57 47 56 46 55 45 54 44 - punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 - - movq mm3, mm5 ; 57 47 56 46 55 45 54 44 - punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46 - - punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44 - movq mm2, mm4 ; 53 43 52 42 51 41 50 40 - - punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42 - punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40 - - neg rax - - movq mm7, [rsi+rax] ; 37 36 35 34 33 32 31 30 - movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq mm1, mm6 ; 27 26 25 24 23 22 21 20 - punpckhbw mm6, mm7 ; 37 27 36 36 35 25 34 24 - - punpcklbw mm1, mm7 ; 33 23 32 22 31 21 30 20 - - movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00 - punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04 - - movq mm0, mm7 ; 17 07 16 06 15 05 14 04 - punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06 - - punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04 - movq mm6, mm7 ; 37 27 17 07 36 26 16 06 - - punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3 - punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2 - - lea rdx, srct - movq mm5, mm6 ; 76 66 56 46 36 26 16 06 - - movq [rdx+56], mm7 - psubusb mm5, mm7 ; q2-q3 - - - movq [rdx+48], mm6 - psubusb mm7, mm6 ; q3-q2 - - por mm7, mm5; ; mm7=abs (q3-q2) - movq mm5, mm0 ; 35 25 15 05 34 24 14 04 - - punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1 - punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0 - - movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1 - psubusb mm3, mm6 ; q1-q2 - - psubusb mm6, mm5 ; q2-q1 - por mm6, mm3 ; mm6=abs(q2-q1) - - movq [rdx+40], mm5 ; save q1 - movq [rdx+32], mm0 ; save q0 - - movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00 - - movq mm0, mm3 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 31 21 11 01 30 20 10 00 - - punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3 - punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2 - - movq [rdx], mm0 ; save p3 - movq [rdx+8], mm1 ; save p2 - - movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2 - psubusb mm2, mm0 ; p2-p3 - - psubusb mm0, mm1 ; p3-p2 - por mm0, mm2 ; mm0=abs(p3-p2) - - movq mm2, mm3 ; 33 23 13 03 32 22 12 02 - punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1 - - punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0 - movq [rdx+24], mm3 ; save p0 - - movq [rdx+16], mm2 ; save p1 - movq mm5, mm2 ; mm5 = p1 - - psubusb mm2, mm1 ; p1-p2 - psubusb mm1, mm5 ; p2-p1 - - por mm1, mm2 ; mm1=abs(p2-p1) - mov rdx, arg(3) ;limit - - movq mm4, [rdx] ; mm4 = limit - psubusb mm7, mm4 ; abs(q3-q2) > limit - - psubusb mm0, mm4 ; abs(p3-p2) > limit - psubusb mm1, mm4 ; abs(p2-p1) > limit - - psubusb mm6, mm4 ; abs(q2-q1) > limit - por mm7, mm6 ; or - - por mm0, mm1 ; - por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movq mm1, mm5 ; p1 - - movq mm7, mm3 ; mm3=mm7=p0 - psubusb mm7, mm5 ; p0 - p1 - - psubusb mm5, mm3 ; p1 - p0 - por mm5, mm7 ; abs(p1-p0) - - movq t0, mm5 ; save abs(p1-p0) - lea rdx, srct - - psubusb mm5, mm4 ; mm5 = abs(p1-p0) > limit - por mm0, mm5 ; mm0=mask - - movq mm5, [rdx+32] ; mm5=q0 - movq mm7, [rdx+40] ; mm7=q1 - - movq mm6, mm5 ; mm6=q0 - movq mm2, mm7 ; q1 - psubusb mm5, mm7 ; q0-q1 - - psubusb mm7, mm6 ; q1-q0 - por mm7, mm5 ; abs(q1-q0) - - movq t1, mm7 ; save abs(q1-q0) - psubusb mm7, mm4 ; mm7=abs(q1-q0)> limit - - por mm0, mm7 ; mask - - movq mm5, mm2 ; q1 - psubusb mm5, mm1 ; q1-=p1 - psubusb mm1, mm2 ; p1-=q1 - por mm5, mm1 ; abs(p1-q1) - pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; - - movq mm4, [rdx] ;blimit - movq mm1, mm3 ; mm1=mm3=p0 - - movq mm7, mm6 ; mm7=mm6=q0 - psubusb mm1, mm7 ; p0-q0 - - psubusb mm7, mm3 ; q0-p0 - por mm1, mm7 ; abs(q0-p0) - paddusb mm1, mm1 ; abs(q0-p0)*2 - paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit - por mm1, mm0; ; mask - - pxor mm0, mm0 - pcmpeqb mm1, mm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movq mm7, [rdx] - ; - movq mm4, t0 ; get abs (q1 - q0) - psubusb mm4, mm7 ; abs(q1 - q0) > thresh - - movq mm3, t1 ; get abs (p1 - p0) - psubusb mm3, mm7 ; abs(p1 - p0)> thresh - - por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb mm4, mm0 - - pcmpeqb mm0, mm0 - pxor mm4, mm0 - - - - - ; start work on filters - lea rdx, srct - - ; start work on filters - movq mm2, [rdx+16] ; p1 - movq mm7, [rdx+40] ; q1 - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - movq mm6, [rdx+24] ; p0 - movq mm0, [rdx+32] ; q0 - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) - paddsb mm2, mm0 ; 2 * (q0 - p0) - paddsb mm2, mm0 ; 3 * (q0 - p0) + (p1 - q1) - pand mm1, mm2 ; mask filter values we don't care about - - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movq mm2, mm1 ; vp8_filter - pand mm2, mm4; ; Filter2 = vp8_filter & hev - - movq mm5, mm2 ; - paddsb mm5, [GLOBAL(t3)]; - - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm5 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm5 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - movq mm5, mm0 ; Filter2 - - paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) - pxor mm0, mm0 ; 0 - pxor mm7, mm7 ; 0 - - punpcklbw mm0, mm2 ; e0f0g0h0 - psraw mm0, 11 ; sign extended shift right by 3 - punpckhbw mm7, mm2 ; a0b0c0d0 - psraw mm7, 11 ; sign extended shift right by 3 - packsswb mm0, mm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb mm3, mm0 ; qs0 =qs0 - filter1 - paddsb mm6, mm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn mm4, mm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor mm0, mm0 - - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s27)] - pmulhw mm2, [GLOBAL(s27)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - psubsb mm3, mm1 - paddsb mm6, mm1 - - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - movq [rdx+24], mm6 - movq [rdx+32], mm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s18)] - pmulhw mm2, [GLOBAL(s18)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm3, [rdx + 40] - movq mm6, [rdx + 16] ; p1 - pxor mm3, [GLOBAL(t80)] - pxor mm6, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - movq [rdx + 40], mm3 - movq [rdx + 16], mm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor mm1, mm1 - pxor mm2, mm2 - punpcklbw mm1, mm4 - punpckhbw mm2, mm4 - pmulhw mm1, [GLOBAL(s9)] - pmulhw mm2, [GLOBAL(s9)] - paddw mm1, [GLOBAL(s63)] - paddw mm2, [GLOBAL(s63)] - psraw mm1, 7 - psraw mm2, 7 - packsswb mm1, mm2 - - movq mm6, [rdx+ 8] - movq mm3, [rdx+48] - - pxor mm6, [GLOBAL(t80)] - pxor mm3, [GLOBAL(t80)] - - paddsb mm6, mm1 - psubsb mm3, mm1 - - pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01 - pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06 - - ; transpose and write back - movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00 - movq mm1, mm0 ; mm0 = 70 60 50 40 30 20 10 00 - - punpcklbw mm0, mm6 ; mm0 = 31 30 21 20 11 10 01 00 - punpckhbw mm1, mm6 ; mm3 = 71 70 61 60 51 50 41 40 - - movq mm2, [rdx+16] ; mm2 = 72 62 52 42 32 22 12 02 - movq mm6, mm2 ; mm3 = 72 62 52 42 32 22 12 02 - - punpcklbw mm2, [rdx+24] ; mm2 = 33 32 23 22 13 12 03 02 - punpckhbw mm6, [rdx+24] ; mm3 = 73 72 63 62 53 52 43 42 - - movq mm5, mm0 ; mm5 = 31 30 21 20 11 10 01 00 - punpcklwd mm0, mm2 ; mm0 = 13 12 11 10 03 02 01 00 - - punpckhwd mm5, mm2 ; mm5 = 33 32 31 30 23 22 21 20 - movq mm4, mm1 ; mm4 = 71 70 61 60 51 50 41 40 - - punpcklwd mm1, mm6 ; mm1 = 53 52 51 50 43 42 41 40 - punpckhwd mm4, mm6 ; mm4 = 73 72 71 70 63 62 61 60 - - movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 - punpcklbw mm2, [rdx+40] ; mm2 = 35 34 25 24 15 14 05 04 - - movq mm6, mm3 ; mm6 = 76 66 56 46 36 26 15 06 - punpcklbw mm6, [rdx+56] ; mm6 = 37 36 27 26 17 16 07 06 - - movq mm7, mm2 ; mm7 = 35 34 25 24 15 14 05 04 - punpcklwd mm2, mm6 ; mm2 = 17 16 15 14 07 06 05 04 - - punpckhwd mm7, mm6 ; mm7 = 37 36 35 34 27 26 25 24 - movq mm6, mm0 ; mm6 = 13 12 11 10 03 02 01 00 - - punpckldq mm0, mm2 ; mm0 = 07 06 05 04 03 02 01 00 - punpckhdq mm6, mm2 ; mm6 = 17 16 15 14 13 12 11 10 - - movq [rsi+rax*4], mm0 ; write out - movq [rdi+rax*4], mm6 ; write out - - movq mm0, mm5 ; mm0 = 33 32 31 30 23 22 21 20 - punpckldq mm0, mm7 ; mm0 = 27 26 25 24 23 22 20 20 - - punpckhdq mm5, mm7 ; mm5 = 37 36 35 34 33 32 31 30 - movq [rsi+rax*2], mm0 ; write out - - movq [rdi+rax*2], mm5 ; write out - movq mm2, [rdx+32] ; mm2 = 74 64 54 44 34 24 14 04 - - punpckhbw mm2, [rdx+40] ; mm2 = 75 74 65 64 54 54 45 44 - punpckhbw mm3, [rdx+56] ; mm3 = 77 76 67 66 57 56 47 46 - - movq mm5, mm2 ; mm5 = 75 74 65 64 54 54 45 44 - punpcklwd mm2, mm3 ; mm2 = 57 56 55 54 47 46 45 44 - - punpckhwd mm5, mm3 ; mm5 = 77 76 75 74 67 66 65 64 - movq mm0, mm1 ; mm0= 53 52 51 50 43 42 41 40 - - movq mm3, mm4 ; mm4 = 73 72 71 70 63 62 61 60 - punpckldq mm0, mm2 ; mm0 = 47 46 45 44 43 42 41 40 - - punpckhdq mm1, mm2 ; mm1 = 57 56 55 54 53 52 51 50 - movq [rsi], mm0 ; write out - - movq [rdi], mm1 ; write out - neg rax - - punpckldq mm3, mm5 ; mm3 = 67 66 65 64 63 62 61 60 - punpckhdq mm4, mm5 ; mm4 = 77 76 75 74 73 72 71 60 - - movq [rsi+rax*2], mm3 - movq [rdi+rax*2], mm4 - - lea rsi, [rsi+rax*8] - dec rcx - - jnz .next8_mbv - - add rsp, 96 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_horizontal_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit -;) -global sym(vp8_loop_filter_simple_horizontal_edge_mmx) PRIVATE -sym(vp8_loop_filter_simple_horizontal_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - mov rcx, 2 ; count -.nexts8_h: - mov rdx, arg(2) ;blimit ; get blimit - movq mm3, [rdx] ; - - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax - neg rax - - ; calculate mask - movq mm1, [rsi+2*rax] ; p1 - movq mm0, [rdi] ; q1 - movq mm2, mm1 - movq mm7, mm0 - movq mm4, mm0 - psubusb mm0, mm1 ; q1-=p1 - psubusb mm1, mm4 ; p1-=q1 - por mm1, mm0 ; abs(p1-q1) - pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm1, 1 ; abs(p1-q1)/2 - - movq mm5, [rsi+rax] ; p0 - movq mm4, [rsi] ; q0 - movq mm0, mm4 ; q0 - movq mm6, mm5 ; p0 - psubusb mm5, mm4 ; p0-=q0 - psubusb mm4, mm6 ; q0-=p0 - por mm5, mm4 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor mm3, mm3 - pcmpeqb mm5, mm3 - - ; start work on filters - pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values - psubsb mm2, mm7 ; p1 - q1 - - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm0 ; q0 - psubsb mm0, mm6 ; q0 - p0 - paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0) - paddsb mm2, mm0 ; p1 - q1 + 2 * (q0 - p0) - paddsb mm2, mm0 ; p1 - q1 + 3 * (q0 - p0) - pand mm5, mm2 ; mask filter values we don't care about - - ; do + 4 side - paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - movq mm1, mm5 ; get a copy of filters - psraw mm1, 11 ; arithmetic shift right 11 - psllw mm1, 8 ; shift left 8 to put it back - - por mm0, mm1 ; put the two together to get result - - psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [GLOBAL(t80)] ; unoffset - movq [rsi], mm3 ; write back - - - ; now do +3 side - psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - psraw mm5, 11 ; arithmetic shift right 11 - psllw mm5, 8 ; shift left 8 to put it back - por mm0, mm5 ; put the two together to get result - - - paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - movq [rsi+rax], mm6 ; write back - - add rsi,8 - neg rax - dec rcx - jnz .nexts8_h - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_loop_filter_simple_vertical_edge_mmx -;( -; unsigned char *src_ptr, -; int src_pixel_step, -; const char *blimit -;) -global sym(vp8_loop_filter_simple_vertical_edge_mmx) PRIVATE -sym(vp8_loop_filter_simple_vertical_edge_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4- 2]; ; - mov rcx, 2 ; count -.nexts8_v: - - lea rdi, [rsi + rax]; - movd mm0, [rdi + rax * 2] ; xx xx xx xx 73 72 71 70 - - movd mm6, [rsi + rax * 2] ; xx xx xx xx 63 62 61 60 - punpcklbw mm6, mm0 ; 73 63 72 62 71 61 70 60 - - movd mm0, [rsi + rax] ; xx xx xx xx 53 52 51 50 - movd mm4, [rsi] ; xx xx xx xx 43 42 41 40 - - punpcklbw mm4, mm0 ; 53 43 52 42 51 41 50 40 - movq mm5, mm4 ; 53 43 52 42 51 41 50 40 - - punpcklwd mm4, mm6 ; 71 61 51 41 70 60 50 40 - punpckhwd mm5, mm6 ; 73 63 53 43 72 62 52 42 - - neg rax - - movd mm7, [rsi + rax] ; xx xx xx xx 33 32 31 30 - movd mm6, [rsi + rax * 2] ; xx xx xx xx 23 22 21 20 - - punpcklbw mm6, mm7 ; 33 23 32 22 31 21 30 20 - movd mm1, [rdi + rax * 4] ; xx xx xx xx 13 12 11 10 - - movd mm0, [rsi + rax * 4] ; xx xx xx xx 03 02 01 00 - punpcklbw mm0, mm1 ; 13 03 12 02 11 01 10 00 - - movq mm2, mm0 ; 13 03 12 02 11 01 10 00 - punpcklwd mm0, mm6 ; 31 21 11 01 30 20 10 00 - - punpckhwd mm2, mm6 ; 33 23 13 03 32 22 12 02 - movq mm1, mm0 ; 13 03 12 02 11 01 10 00 - - punpckldq mm0, mm4 ; 70 60 50 40 30 20 10 00 = p1 - movq mm3, mm2 ; 33 23 13 03 32 22 12 02 - - punpckhdq mm1, mm4 ; 71 61 51 41 31 21 11 01 = p0 - punpckldq mm2, mm5 ; 72 62 52 42 32 22 12 02 = q0 - - punpckhdq mm3, mm5 ; 73 63 53 43 33 23 13 03 = q1 - - - ; calculate mask - movq mm6, mm0 ; p1 - movq mm7, mm3 ; q1 - psubusb mm7, mm6 ; q1-=p1 - psubusb mm6, mm3 ; p1-=q1 - por mm6, mm7 ; abs(p1-q1) - pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero - psrlw mm6, 1 ; abs(p1-q1)/2 - - movq mm5, mm1 ; p0 - movq mm4, mm2 ; q0 - - psubusb mm5, mm2 ; p0-=q0 - psubusb mm4, mm1 ; q0-=p0 - - por mm5, mm4 ; abs(p0 - q0) - paddusb mm5, mm5 ; abs(p0-q0)*2 - paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;blimit ; get blimit - movq mm7, [rdx] - - psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit - pxor mm7, mm7 - pcmpeqb mm5, mm7 ; mm5 = mask - - ; start work on filters - movq t0, mm0 - movq t1, mm3 - - pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values - pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values - - psubsb mm0, mm3 ; p1 - q1 - movq mm6, mm1 ; p0 - - movq mm7, mm2 ; q0 - pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - - pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values - movq mm3, mm7 ; offseted ; q0 - - psubsb mm7, mm6 ; q0 - p0 - paddsb mm0, mm7 ; p1 - q1 + 1 * (q0 - p0) - - paddsb mm0, mm7 ; p1 - q1 + 2 * (q0 - p0) - paddsb mm0, mm7 ; p1 - q1 + 3 * (q0 - p0) - - pand mm5, mm0 ; mask filter values we don't care about - - paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - - movq mm7, mm5 ; get a copy of filters - psraw mm7, 11 ; arithmetic shift right 11 - psllw mm7, 8 ; shift left 8 to put it back - - por mm0, mm7 ; put the two together to get result - - psubsb mm3, mm0 ; q0-= q0sz add - pxor mm3, [GLOBAL(t80)] ; unoffset - - ; now do +3 side - psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 - - movq mm0, mm5 ; get a copy of filters - psllw mm0, 8 ; shift left 8 - psraw mm0, 3 ; arithmetic shift right 11 - psrlw mm0, 8 - - psraw mm5, 11 ; arithmetic shift right 11 - psllw mm5, 8 ; shift left 8 to put it back - por mm0, mm5 ; put the two together to get result - - paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [GLOBAL(t80)] ; unoffset - - - movq mm0, t0 - movq mm4, t1 - - ; mm0 = 70 60 50 40 30 20 10 00 - ; mm6 = 71 61 51 41 31 21 11 01 - ; mm3 = 72 62 52 42 32 22 12 02 - ; mm4 = 73 63 53 43 33 23 13 03 - ; transpose back to write out - - movq mm1, mm0 ; - punpcklbw mm0, mm6 ; 31 30 21 20 11 10 01 00 - - punpckhbw mm1, mm6 ; 71 70 61 60 51 50 41 40 - movq mm2, mm3 ; - - punpcklbw mm2, mm4 ; 33 32 23 22 13 12 03 02 - movq mm5, mm1 ; 71 70 61 60 51 50 41 40 - - punpckhbw mm3, mm4 ; 73 72 63 62 53 52 43 42 - movq mm6, mm0 ; 31 30 21 20 11 10 01 00 - - punpcklwd mm0, mm2 ; 13 12 11 10 03 02 01 00 - punpckhwd mm6, mm2 ; 33 32 31 30 23 22 21 20 - - movd [rsi+rax*4], mm0 ; write 03 02 01 00 - punpcklwd mm1, mm3 ; 53 52 51 50 43 42 41 40 - - psrlq mm0, 32 ; xx xx xx xx 13 12 11 10 - punpckhwd mm5, mm3 ; 73 72 71 70 63 62 61 60 - - movd [rdi+rax*4], mm0 ; write 13 12 11 10 - movd [rsi+rax*2], mm6 ; write 23 22 21 20 - - psrlq mm6, 32 ; 33 32 31 30 - movd [rsi], mm1 ; write 43 42 41 40 - - movd [rsi + rax], mm6 ; write 33 32 31 30 - neg rax - - movd [rsi + rax*2], mm5 ; write 63 62 61 60 - psrlq mm1, 32 ; 53 52 51 50 - - movd [rdi], mm1 ; write out 53 52 51 50 - psrlq mm5, 32 ; 73 72 71 70 - - movd [rdi + rax*2], mm5 ; write 73 72 71 70 - - lea rsi, [rsi+rax*8] ; next 8 - - dec rcx - jnz .nexts8_v - - add rsp, 32 - pop rsp - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - - -;void fast_loop_filter_vertical_edges_mmx(unsigned char *y_ptr, -; int y_stride, -; loop_filter_info *lfi) -;{ -; -; -; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+4, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+8, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -; vp8_loop_filter_simple_vertical_edge_mmx(y_ptr+12, y_stride, lfi->flim,lfi->lim,lfi->thr,2); -;} - -SECTION_RODATA -align 16 -tfe: - times 8 db 0xfe -align 16 -t80: - times 8 db 0x80 -align 16 -t1s: - times 8 db 0x01 -align 16 -t3: - times 8 db 0x03 -align 16 -t4: - times 8 db 0x04 -align 16 -ones: - times 4 dw 0x0001 -align 16 -s27: - times 4 dw 0x1b00 -align 16 -s18: - times 4 dw 0x1200 -align 16 -s9: - times 4 dw 0x0900 -align 16 -s63: - times 4 dw 0x003f diff --git a/thirdparty/libvpx/vp8/decoder/dboolhuff.c b/thirdparty/libvpx/vp8/decoder/dboolhuff.c deleted file mode 100644 index 5cdd2a2491..0000000000 --- a/thirdparty/libvpx/vp8/decoder/dboolhuff.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "dboolhuff.h" -#include "vp8/common/common.h" -#include "vpx_dsp/vpx_dsp_common.h" - -int vp8dx_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) -{ - br->user_buffer_end = source+source_sz; - br->user_buffer = source; - br->value = 0; - br->count = -8; - br->range = 255; - br->decrypt_cb = decrypt_cb; - br->decrypt_state = decrypt_state; - - if (source_sz && !source) - return 1; - - /* Populate the buffer */ - vp8dx_bool_decoder_fill(br); - - return 0; -} - -void vp8dx_bool_decoder_fill(BOOL_DECODER *br) -{ - const unsigned char *bufptr = br->user_buffer; - VP8_BD_VALUE value = br->value; - int count = br->count; - int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); - size_t bytes_left = br->user_buffer_end - bufptr; - size_t bits_left = bytes_left * CHAR_BIT; - int x = shift + CHAR_BIT - (int)bits_left; - int loop_end = 0; - unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1]; - - if (br->decrypt_cb) { - size_t n = VPXMIN(sizeof(decrypted), bytes_left); - br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n); - bufptr = decrypted; - } - - if(x >= 0) - { - count += VP8_LOTS_OF_BITS; - loop_end = x; - } - - if (x < 0 || bits_left) - { - while(shift >= loop_end) - { - count += CHAR_BIT; - value |= (VP8_BD_VALUE)*bufptr << shift; - ++bufptr; - ++br->user_buffer; - shift -= CHAR_BIT; - } - } - - br->value = value; - br->count = count; -} diff --git a/thirdparty/libvpx/vp8/decoder/dboolhuff.h b/thirdparty/libvpx/vp8/decoder/dboolhuff.h deleted file mode 100644 index 1b1bbf868e..0000000000 --- a/thirdparty/libvpx/vp8/decoder/dboolhuff.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_DECODER_DBOOLHUFF_H_ -#define VP8_DECODER_DBOOLHUFF_H_ - -#include <stddef.h> -#include <limits.h> - -#include "./vpx_config.h" -#include "vpx_ports/mem.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef size_t VP8_BD_VALUE; - -#define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT) - -/*This is meant to be a large, positive constant that can still be efficiently - loaded as an immediate (on platforms like ARM, for example). - Even relatively modest values like 100 would work fine.*/ -#define VP8_LOTS_OF_BITS (0x40000000) - -typedef struct -{ - const unsigned char *user_buffer_end; - const unsigned char *user_buffer; - VP8_BD_VALUE value; - int count; - unsigned int range; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; -} BOOL_DECODER; - -DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); - -int vp8dx_start_decode(BOOL_DECODER *br, - const unsigned char *source, - unsigned int source_sz, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -void vp8dx_bool_decoder_fill(BOOL_DECODER *br); - - -static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { - unsigned int bit = 0; - VP8_BD_VALUE value; - unsigned int split; - VP8_BD_VALUE bigsplit; - int count; - unsigned int range; - - split = 1 + (((br->range - 1) * probability) >> 8); - - if(br->count < 0) - vp8dx_bool_decoder_fill(br); - - value = br->value; - count = br->count; - - bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); - - range = split; - - if (value >= bigsplit) - { - range = br->range - split; - value = value - bigsplit; - bit = 1; - } - - { - register int shift = vp8_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - } - br->value = value; - br->count = count; - br->range = range; - - return bit; -} - -static INLINE int vp8_decode_value(BOOL_DECODER *br, int bits) -{ - int z = 0; - int bit; - - for (bit = bits - 1; bit >= 0; bit--) - { - z |= (vp8dx_decode_bool(br, 0x80) << bit); - } - - return z; -} - -static INLINE int vp8dx_bool_error(BOOL_DECODER *br) -{ - /* Check if we have reached the end of the buffer. - * - * Variable 'count' stores the number of bits in the 'value' buffer, minus - * 8. The top byte is part of the algorithm, and the remainder is buffered - * to be shifted into it. So if count == 8, the top 16 bits of 'value' are - * occupied, 8 for the algorithm and 8 in the buffer. - * - * When reading a byte from the user's buffer, count is filled with 8 and - * one byte is filled into the value buffer. When we reach the end of the - * data, count is additionally filled with VP8_LOTS_OF_BITS. So when - * count == VP8_LOTS_OF_BITS - 1, the user's data has been exhausted. - */ - if ((br->count > VP8_BD_VALUE_SIZE) && (br->count < VP8_LOTS_OF_BITS)) - { - /* We have tried to decode bits after the end of - * stream was encountered. - */ - return 1; - } - - /* No error. */ - return 0; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DBOOLHUFF_H_ diff --git a/thirdparty/libvpx/vp8/decoder/decodeframe.c b/thirdparty/libvpx/vp8/decoder/decodeframe.c deleted file mode 100644 index 51acdbb9c8..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decodeframe.c +++ /dev/null @@ -1,1397 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "onyxd_int.h" -#include "vp8/common/header.h" -#include "vp8/common/reconintra4x4.h" -#include "vp8/common/reconinter.h" -#include "detokenize.h" -#include "vp8/common/common.h" -#include "vp8/common/invtrans.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/entropymode.h" -#include "vp8/common/quant_common.h" -#include "vpx_scale/vpx_scale.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/setupintrarecon.h" - -#include "decodemv.h" -#include "vp8/common/extend.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" -#include "dboolhuff.h" -#include "vpx_dsp/vpx_dsp_common.h" - -#include <assert.h> -#include <stdio.h> - -void vp8cx_init_de_quantizer(VP8D_COMP *pbi) -{ - int Q; - VP8_COMMON *const pc = & pbi->common; - - for (Q = 0; Q < QINDEX_RANGE; Q++) - { - pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); - pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); - - pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q); - pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); - } -} - -void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - int i; - int QIndex; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - VP8_COMMON *const pc = & pbi->common; - - /* Decide whether to use the default or alternate baseline Q value. */ - if (xd->segmentation_enabled) - { - /* Abs Value */ - if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - - /* Delta Value */ - else - QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ - } - else - QIndex = pc->base_qindex; - - /* Set up the macroblock dequant constants */ - xd->dequant_y1_dc[0] = 1; - xd->dequant_y1[0] = pc->Y1dequant[QIndex][0]; - xd->dequant_y2[0] = pc->Y2dequant[QIndex][0]; - xd->dequant_uv[0] = pc->UVdequant[QIndex][0]; - - for (i = 1; i < 16; i++) - { - xd->dequant_y1_dc[i] = - xd->dequant_y1[i] = pc->Y1dequant[QIndex][1]; - xd->dequant_y2[i] = pc->Y2dequant[QIndex][1]; - xd->dequant_uv[i] = pc->UVdequant[QIndex][1]; - } -} - -static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - unsigned int mb_idx) -{ - MB_PREDICTION_MODE mode; - int i; -#if CONFIG_ERROR_CONCEALMENT - int corruption_detected = 0; -#else - (void)mb_idx; -#endif - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else if (!vp8dx_bool_error(xd->current_bc)) - { - int eobtotal; - eobtotal = vp8_decode_mb_tokens(pbi, xd); - - /* Special case: Force the loopfilter to skip when eobtotal is zero */ - xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); - } - - mode = xd->mode_info_context->mbmi.mode; - - if (xd->segmentation_enabled) - vp8_mb_init_dequantizer(pbi, xd); - - -#if CONFIG_ERROR_CONCEALMENT - - if(pbi->ec_active) - { - int throw_residual; - /* When we have independent partitions we can apply residual even - * though other partitions within the frame are corrupt. - */ - throw_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual); - throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); - - if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) - { - /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ - pbi->frame_corrupt_residual = 1; - memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - corruption_detected = 1; - - /* force idct to be skipped for B_PRED and use the - * prediction only for reconstruction - * */ - memset(xd->eobs, 0, 25); - } - } -#endif - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8_build_intra_predictors_mbuv_s(xd, - xd->recon_above[1], - xd->recon_above[2], - xd->recon_left[1], - xd->recon_left[2], - xd->recon_left_stride[1], - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride); - - if (mode != B_PRED) - { - vp8_build_intra_predictors_mby_s(xd, - xd->recon_above[0], - xd->recon_left[0], - xd->recon_left_stride[0], - xd->dst.y_buffer, - xd->dst.y_stride); - } - else - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) - memset(xd->eobs, 0, 25); - - intra_prediction_down_copy(xd, xd->recon_above[0] + 16); - - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; - unsigned char *dst = xd->dst.y_buffer + b->offset; - B_PREDICTION_MODE b_mode = - xd->mode_info_context->bmi[i].as_mode; - unsigned char *Above = dst - dst_stride; - unsigned char *yleft = dst - 1; - int left_stride = dst_stride; - unsigned char top_left = Above[-1]; - - vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, - dst, dst_stride, top_left); - - if (xd->eobs[i]) - { - if (xd->eobs[i] > 1) - { - vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { - vp8_dc_only_idct_add - (b->qcoeff[0] * DQC[0], - dst, dst_stride, - dst, dst_stride); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - } - } - } - } - else - { - vp8_build_inter_predictors_mb(xd); - } - - -#if CONFIG_ERROR_CONCEALMENT - if (corruption_detected) - { - return; - } -#endif - - if(!xd->mode_info_context->mbmi.mb_skip_coeff) - { - /* dequantization and idct */ - if (mode != B_PRED) - { - short *DQC = xd->dequant_y1; - - if (mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_dequantize_b(b, xd->dequant_y2); - - vp8_short_inv_walsh4x4(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); - } - else - { - b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; - vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - - /* override the dc dequant constant in order to preserve the - * dc components - */ - DQC = xd->dequant_y1_dc; - } - - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); - } - - vp8_dequant_idct_add_uv_block - (xd->qcoeff+16*16, xd->dequant_uv, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); - } -} - -static int get_delta_q(vp8_reader *bc, int prev, int *q_update) -{ - int ret_val = 0; - - if (vp8_read_bit(bc)) - { - ret_val = vp8_read_literal(bc, 4); - - if (vp8_read_bit(bc)) - ret_val = -ret_val; - } - - /* Trigger a quantizer update if the delta-q value has changed */ - if (ret_val != prev) - *q_update = 1; - - return ret_val; -} - -#ifdef PACKET_TESTING -#include <stdio.h> -FILE *vpxlog = 0; -#endif - -static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1; - unsigned char *dest_ptr1; - - unsigned int Border; - int plane_stride; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - src_ptr1 = ybf->y_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)Border; i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - Border /= 2; - src_ptr1 = ybf->u_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - src_ptr1 = ybf->v_buffer - Border; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr1, src_ptr1, plane_stride); - dest_ptr1 += plane_stride; - } -} - -static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = ybf->uv_height; - Border /= 2; - - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr2 += plane_stride; - } -} - -static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, - unsigned char *y_src, - unsigned char *u_src, - unsigned char *v_src) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = 16; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = y_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = 8; - plane_width = ybf->uv_width; - Border /= 2; - - /* copy the left and right most columns out */ - src_ptr1 = u_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - /* copy the left and right most columns out */ - src_ptr1 = v_src; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - memset(dest_ptr1, src_ptr1[0], Border); - memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } -} - -static void decode_mb_rows(VP8D_COMP *pbi) -{ - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - - MODE_INFO *lf_mic = xd->mode_info_context; - - int ibc = 0; - int num_part = 1 << pc->multi_token_partition; - - int recon_yoffset, recon_uvoffset; - int mb_row, mb_col; - int mb_idx = 0; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - int recon_y_stride = yv12_fb_new->y_stride; - int recon_uv_stride = yv12_fb_new->uv_stride; - - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; - unsigned char *lf_dst[3]; - unsigned char *eb_dst[3]; - int i; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - - for(i = 1; i < MAX_REF_FRAMES; i++) - { - YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - - ref_buffer[i][0] = this_fb->y_buffer; - ref_buffer[i][1] = this_fb->u_buffer; - ref_buffer[i][2] = this_fb->v_buffer; - - ref_fb_corrupted[i] = this_fb->corrupted; - } - - /* Set up the buffer pointers */ - eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; - eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; - eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = 0; - - /* Initialize the loop filter for this frame. */ - if(pc->filter_level) - vp8_loop_filter_frame_init(pc, xd, pc->filter_level); - - vp8_setup_intra_recon_top_line(yv12_fb_new); - - /* Decode the individual macro block */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) - { - if (num_part > 1) - { - xd->current_bc = & pbi->mbc[ibc]; - ibc++; - - if (ibc == num_part) - ibc = 0; - } - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - - /* reset contexts */ - xd->above_context = pc->above_context; - memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - - xd->left_available = 0; - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - xd->recon_above[0] = dst_buffer[0] + recon_yoffset; - xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; - xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; - - xd->recon_left[0] = xd->recon_above[0] - 1; - xd->recon_left[1] = xd->recon_above[1] - 1; - xd->recon_left[2] = xd->recon_above[2] - 1; - - xd->recon_above[0] -= xd->dst.y_stride; - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; - - setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], - xd->recon_left[2], xd->dst.y_stride, - xd->dst.uv_stride); - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) - { - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to values - * that are in 1/8th pel units - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - -#if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME && - corrupt_residual) - { - /* We have an intra block with corrupt coefficients, better to - * conceal with an inter block. Interpolate MVs from neighboring - * MBs. - * - * Note that for the first mb with corrupt residual in a frame, - * we might not discover that before decoding the residual. That - * happens after this check, and therefore no inter concealment - * will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols); - } - } -#endif - - xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; - xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; - xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; - - if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { - const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; - xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; - xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; - xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; - } else { - // ref_frame is INTRA_FRAME, pre buffer should not be used. - xd->pre.y_buffer = 0; - xd->pre.u_buffer = 0; - xd->pre.v_buffer = 0; - } - - /* propagate errors from reference frames */ - xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - - decode_macroblock(pbi, xd, mb_idx); - - mb_idx++; - xd->left_available = 1; - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - xd->recon_above[0] += 16; - xd->recon_above[1] += 8; - xd->recon_above[2] += 8; - xd->recon_left[0] += 16; - xd->recon_left[1] += 8; - xd->recon_left[2] += 8; - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - } - - /* adjust to the next row of mbs */ - vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; - - if(pc->filter_level) - { - if(mb_row > 0) - { - if (pc->filter_type == NORMAL_LOOPFILTER) - vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, - recon_y_stride, recon_uv_stride, - lf_dst[0], lf_dst[1], lf_dst[2]); - else - vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, - recon_y_stride, recon_uv_stride, - lf_dst[0], lf_dst[1], lf_dst[2]); - if(mb_row > 1) - { - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - - lf_dst[0] += recon_y_stride * 16; - lf_dst[1] += recon_uv_stride * 8; - lf_dst[2] += recon_uv_stride * 8; - lf_mic += pc->mb_cols; - lf_mic++; /* Skip border mb */ - } - } - else - { - if(mb_row > 0) - { - /**/ - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - } - } - - if(pc->filter_level) - { - if (pc->filter_type == NORMAL_LOOPFILTER) - vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); - else - vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, - recon_uv_stride, lf_dst[0], lf_dst[1], - lf_dst[2]); - - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - eb_dst[0] += recon_y_stride * 16; - eb_dst[1] += recon_uv_stride * 8; - eb_dst[2] += recon_uv_stride * 8; - } - yv12_extend_frame_left_right_c(yv12_fb_new, - eb_dst[0], - eb_dst[1], - eb_dst[2]); - yv12_extend_frame_top_c(yv12_fb_new); - yv12_extend_frame_bottom_c(yv12_fb_new); - -} - -static unsigned int read_partition_size(VP8D_COMP *pbi, - const unsigned char *cx_size) -{ - unsigned char temp[3]; - if (pbi->decrypt_cb) - { - pbi->decrypt_cb(pbi->decrypt_state, cx_size, temp, 3); - cx_size = temp; - } - return cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16); -} - -static int read_is_valid(const unsigned char *start, - size_t len, - const unsigned char *end) -{ - return (start + len > start && start + len <= end); -} - -static unsigned int read_available_partition_size( - VP8D_COMP *pbi, - const unsigned char *token_part_sizes, - const unsigned char *fragment_start, - const unsigned char *first_fragment_end, - const unsigned char *fragment_end, - int i, - int num_part) -{ - VP8_COMMON* pc = &pbi->common; - const unsigned char *partition_size_ptr = token_part_sizes + i * 3; - unsigned int partition_size = 0; - ptrdiff_t bytes_left = fragment_end - fragment_start; - /* Calculate the length of this partition. The last partition - * size is implicit. If the partition size can't be read, then - * either use the remaining data in the buffer (for EC mode) - * or throw an error. - */ - if (i < num_part - 1) - { - if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) - partition_size = read_partition_size(pbi, partition_size_ptr); - else if (pbi->ec_active) - partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated partition size data"); - } - else - partition_size = (unsigned int)bytes_left; - - /* Validate the calculated partition length. If the buffer - * described by the partition can't be fully read, then restrict - * it to the portion that can be (for EC mode) or throw an error. - */ - if (!read_is_valid(fragment_start, partition_size, fragment_end)) - { - if (pbi->ec_active) - partition_size = (unsigned int)bytes_left; - else - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition " - "%d length", i + 1); - } - return partition_size; -} - - -static void setup_token_decoder(VP8D_COMP *pbi, - const unsigned char* token_part_sizes) -{ - vp8_reader *bool_decoder = &pbi->mbc[0]; - unsigned int partition_idx; - unsigned int fragment_idx; - unsigned int num_token_partitions; - const unsigned char *first_fragment_end = pbi->fragments.ptrs[0] + - pbi->fragments.sizes[0]; - - TOKEN_PARTITION multi_token_partition = - (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); - if (!vp8dx_bool_error(&pbi->mbc[8])) - pbi->common.multi_token_partition = multi_token_partition; - num_token_partitions = 1 << pbi->common.multi_token_partition; - - /* Check for partitions within the fragments and unpack the fragments - * so that each fragment pointer points to its corresponding partition. */ - for (fragment_idx = 0; fragment_idx < pbi->fragments.count; ++fragment_idx) - { - unsigned int fragment_size = pbi->fragments.sizes[fragment_idx]; - const unsigned char *fragment_end = pbi->fragments.ptrs[fragment_idx] + - fragment_size; - /* Special case for handling the first partition since we have already - * read its size. */ - if (fragment_idx == 0) - { - /* Size of first partition + token partition sizes element */ - ptrdiff_t ext_first_part_size = token_part_sizes - - pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); - fragment_size -= (unsigned int)ext_first_part_size; - if (fragment_size > 0) - { - pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; - /* The fragment contains an additional partition. Move to - * next. */ - fragment_idx++; - pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[0] + - pbi->fragments.sizes[0]; - } - } - /* Split the chunk into partitions read from the bitstream */ - while (fragment_size > 0) - { - ptrdiff_t partition_size = read_available_partition_size( - pbi, - token_part_sizes, - pbi->fragments.ptrs[fragment_idx], - first_fragment_end, - fragment_end, - fragment_idx - 1, - num_token_partitions); - pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; - fragment_size -= (unsigned int)partition_size; - assert(fragment_idx <= num_token_partitions); - if (fragment_size > 0) - { - /* The fragment contains an additional partition. - * Move to next. */ - fragment_idx++; - pbi->fragments.ptrs[fragment_idx] = - pbi->fragments.ptrs[fragment_idx - 1] + partition_size; - } - } - } - - pbi->fragments.count = num_token_partitions + 1; - - for (partition_idx = 1; partition_idx < pbi->fragments.count; ++partition_idx) - { - if (vp8dx_start_decode(bool_decoder, - pbi->fragments.ptrs[partition_idx], - pbi->fragments.sizes[partition_idx], - pbi->decrypt_cb, pbi->decrypt_state)) - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder %d", - partition_idx); - - bool_decoder++; - } - -#if CONFIG_MULTITHREAD - /* Clamp number of decoder threads */ - if (pbi->decoding_thread_count > num_token_partitions - 1) - pbi->decoding_thread_count = num_token_partitions - 1; -#endif -} - - -static void init_frame(VP8D_COMP *pbi) -{ - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; - - if (pc->frame_type == KEY_FRAME) - { - /* Various keyframe initializations */ - memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); - - vp8_init_mbmode_probs(pc); - - vp8_default_coef_probs(pc); - - /* reset the segment feature data to 0 with delta coding (Default state). */ - memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - xd->mb_segement_abs_delta = SEGMENT_DELTADATA; - - /* reset the mode ref deltasa for loop filter */ - memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); - memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - - /* All buffers are implicitly updated on key frames. */ - pc->refresh_golden_frame = 1; - pc->refresh_alt_ref_frame = 1; - pc->copy_buffer_to_gf = 0; - pc->copy_buffer_to_arf = 0; - - /* Note that Golden and Altref modes cannot be used on a key frame so - * ref_frame_sign_bias[] is undefined and meaningless - */ - pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; - pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; - } - else - { - /* To enable choice of different interploation filters */ - if (!pc->use_bilinear_mc_filter) - { - xd->subpixel_predict = vp8_sixtap_predict4x4; - xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; - xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; - xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; - } - else - { - xd->subpixel_predict = vp8_bilinear_predict4x4; - xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; - xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; - xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; - } - - if (pbi->decoded_key_frame && pbi->ec_enabled && !pbi->ec_active) - pbi->ec_active = 1; - } - - xd->left_context = &pc->left_context; - xd->mode_info_context = pc->mi; - xd->frame_type = pc->frame_type; - xd->mode_info_context->mbmi.mode = DC_PRED; - xd->mode_info_stride = pc->mode_info_stride; - xd->corrupted = 0; /* init without corruption */ - - xd->fullpixel_mask = 0xffffffff; - if(pc->full_pixel) - xd->fullpixel_mask = 0xfffffff8; - -} - -int vp8_decode_frame(VP8D_COMP *pbi) -{ - vp8_reader *const bc = &pbi->mbc[8]; - VP8_COMMON *const pc = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - const unsigned char *data = pbi->fragments.ptrs[0]; - const unsigned int data_sz = pbi->fragments.sizes[0]; - const unsigned char *data_end = data + data_sz; - ptrdiff_t first_partition_length_in_bytes; - - int i, j, k, l; - const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; - int corrupt_tokens = 0; - int prev_independent_partitions = pbi->independent_partitions; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - /* start with no corruption of current frame */ - xd->corrupted = 0; - yv12_fb_new->corrupted = 0; - - if (data_end - data < 3) - { - if (!pbi->ec_active) - { - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet"); - } - - /* Declare the missing frame as an inter frame since it will - be handled as an inter frame when we have estimated its - motion vectors. */ - pc->frame_type = INTER_FRAME; - pc->version = 0; - pc->show_frame = 1; - first_partition_length_in_bytes = 0; - } - else - { - unsigned char clear_buffer[10]; - const unsigned char *clear = data; - if (pbi->decrypt_cb) - { - int n = (int)VPXMIN(sizeof(clear_buffer), data_sz); - pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); - clear = clear_buffer; - } - - pc->frame_type = (FRAME_TYPE)(clear[0] & 1); - pc->version = (clear[0] >> 1) & 7; - pc->show_frame = (clear[0] >> 4) & 1; - first_partition_length_in_bytes = - (clear[0] | (clear[1] << 8) | (clear[2] << 16)) >> 5; - - if (!pbi->ec_active && - (data + first_partition_length_in_bytes > data_end - || data + first_partition_length_in_bytes < data)) - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt partition 0 length"); - - data += 3; - clear += 3; - - vp8_setup_version(pc); - - - if (pc->frame_type == KEY_FRAME) - { - /* vet via sync code */ - /* When error concealment is enabled we should only check the sync - * code if we have enough bits available - */ - if (!pbi->ec_active || data + 3 < data_end) - { - if (clear[0] != 0x9d || clear[1] != 0x01 || clear[2] != 0x2a) - vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - } - - /* If error concealment is enabled we should only parse the new size - * if we have enough data. Otherwise we will end up with the wrong - * size. - */ - if (!pbi->ec_active || data + 6 < data_end) - { - pc->Width = (clear[3] | (clear[4] << 8)) & 0x3fff; - pc->horiz_scale = clear[4] >> 6; - pc->Height = (clear[5] | (clear[6] << 8)) & 0x3fff; - pc->vert_scale = clear[6] >> 6; - } - data += 7; - } - else - { - memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); - } - } - if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) - { - return -1; - } - - init_frame(pbi); - - if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data), - pbi->decrypt_cb, pbi->decrypt_state)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - if (pc->frame_type == KEY_FRAME) { - (void)vp8_read_bit(bc); // colorspace - pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc); - } - - /* Is segmentation enabled */ - xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); - - if (xd->segmentation_enabled) - { - /* Signal whether or not the segmentation map is being explicitly updated this frame. */ - xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc); - xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc); - - if (xd->update_mb_segmentation_data) - { - xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); - - memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - - /* For each segmentation feature (Quant and loop filter level) */ - for (i = 0; i < MB_LVL_MAX; i++) - { - for (j = 0; j < MAX_MB_SEGMENTS; j++) - { - /* Frame level data */ - if (vp8_read_bit(bc)) - { - xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); - - if (vp8_read_bit(bc)) - xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j]; - } - else - xd->segment_feature_data[i][j] = 0; - } - } - } - - if (xd->update_mb_segmentation_map) - { - /* Which macro block level features are enabled */ - memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); - - /* Read the probs used to decode the segment id for each macro block. */ - for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) - { - /* If not explicitly set value is defaulted to 255 by memset above */ - if (vp8_read_bit(bc)) - xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8); - } - } - } - else - { - /* No segmentation updates on this frame */ - xd->update_mb_segmentation_map = 0; - xd->update_mb_segmentation_data = 0; - } - - /* Read the loop filter level and type */ - pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc); - pc->filter_level = vp8_read_literal(bc, 6); - pc->sharpness_level = vp8_read_literal(bc, 3); - - /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */ - xd->mode_ref_lf_delta_update = 0; - xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc); - - if (xd->mode_ref_lf_delta_enabled) - { - /* Do the deltas need to be updated */ - xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc); - - if (xd->mode_ref_lf_delta_update) - { - /* Send update */ - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - { - if (vp8_read_bit(bc)) - { - /*sign = vp8_read_bit( bc );*/ - xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - - if (vp8_read_bit(bc)) /* Apply sign */ - xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1; - } - } - - /* Send update */ - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - { - if (vp8_read_bit(bc)) - { - /*sign = vp8_read_bit( bc );*/ - xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - - if (vp8_read_bit(bc)) /* Apply sign */ - xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1; - } - } - } - } - - setup_token_decoder(pbi, data + first_partition_length_in_bytes); - - xd->current_bc = &pbi->mbc[0]; - - /* Read the default quantizers. */ - { - int Q, q_update; - - Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */ - pc->base_qindex = Q; - q_update = 0; - pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update); - pc->y2dc_delta_q = get_delta_q(bc, pc->y2dc_delta_q, &q_update); - pc->y2ac_delta_q = get_delta_q(bc, pc->y2ac_delta_q, &q_update); - pc->uvdc_delta_q = get_delta_q(bc, pc->uvdc_delta_q, &q_update); - pc->uvac_delta_q = get_delta_q(bc, pc->uvac_delta_q, &q_update); - - if (q_update) - vp8cx_init_de_quantizer(pbi); - - /* MB level dequantizer setup */ - vp8_mb_init_dequantizer(pbi, &pbi->mb); - } - - /* Determine if the golden frame or ARF buffer should be updated and how. - * For all non key frames the GF and ARF refresh flags and sign bias - * flags must be set explicitly. - */ - if (pc->frame_type != KEY_FRAME) - { - /* Should the GF or ARF be updated from the current frame */ - pc->refresh_golden_frame = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh golden if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_golden_frame = 0; -#endif - - pc->refresh_alt_ref_frame = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh altref if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_alt_ref_frame = 0; -#endif - - /* Buffer to buffer copy flags. */ - pc->copy_buffer_to_gf = 0; - - if (!pc->refresh_golden_frame) - pc->copy_buffer_to_gf = vp8_read_literal(bc, 2); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't copy to the golden if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->copy_buffer_to_gf = 0; -#endif - - pc->copy_buffer_to_arf = 0; - - if (!pc->refresh_alt_ref_frame) - pc->copy_buffer_to_arf = vp8_read_literal(bc, 2); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't copy to the alt-ref if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->copy_buffer_to_arf = 0; -#endif - - - pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc); - pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc); - } - - pc->refresh_entropy_probs = vp8_read_bit(bc); -#if CONFIG_ERROR_CONCEALMENT - /* Assume we shouldn't refresh the probabilities if the bit is - * missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_entropy_probs = 0; -#endif - if (pc->refresh_entropy_probs == 0) - { - memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); - } - - pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc); - -#if CONFIG_ERROR_CONCEALMENT - /* Assume we should refresh the last frame if the bit is missing */ - xd->corrupted |= vp8dx_bool_error(bc); - if (pbi->ec_active && xd->corrupted) - pc->refresh_last_frame = 1; -#endif - - if (0) - { - FILE *z = fopen("decodestats.stt", "a"); - fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", - pc->current_video_frame, - pc->frame_type, - pc->refresh_golden_frame, - pc->refresh_alt_ref_frame, - pc->refresh_last_frame, - pc->base_qindex); - fclose(z); - } - - { - pbi->independent_partitions = 1; - - /* read coef probability tree */ - for (i = 0; i < BLOCK_TYPES; i++) - for (j = 0; j < COEF_BANDS; j++) - for (k = 0; k < PREV_COEF_CONTEXTS; k++) - for (l = 0; l < ENTROPY_NODES; l++) - { - - vp8_prob *const p = pc->fc.coef_probs [i][j][k] + l; - - if (vp8_read(bc, vp8_coef_update_probs [i][j][k][l])) - { - *p = (vp8_prob)vp8_read_literal(bc, 8); - - } - if (k > 0 && *p != pc->fc.coef_probs[i][j][k-1][l]) - pbi->independent_partitions = 0; - - } - } - - /* clear out the coeff buffer */ - memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - vp8_decode_mode_mvs(pbi); - -#if CONFIG_ERROR_CONCEALMENT - if (pbi->ec_active && - pbi->mvs_corrupt_from_mb < (unsigned int)pc->mb_cols * pc->mb_rows) - { - /* Motion vectors are missing in this frame. We will try to estimate - * them and then continue decoding the frame as usual */ - vp8_estimate_missing_mvs(pbi); - } -#endif - - memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); - pbi->frame_corrupt_residual = 0; - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) - { - unsigned int thread; - vp8mt_decode_mb_rows(pbi, xd); - vp8_yv12_extend_frame_borders(yv12_fb_new); - for (thread = 0; thread < pbi->decoding_thread_count; ++thread) - corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; - } - else -#endif - { - decode_mb_rows(pbi); - corrupt_tokens |= xd->corrupted; - } - - /* Collect information about decoder corruption. */ - /* 1. Check first boolean decoder for errors. */ - yv12_fb_new->corrupted = vp8dx_bool_error(bc); - /* 2. Check the macroblock information */ - yv12_fb_new->corrupted |= corrupt_tokens; - - if (!pbi->decoded_key_frame) - { - if (pc->frame_type == KEY_FRAME && - !yv12_fb_new->corrupted) - pbi->decoded_key_frame = 1; - else - vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, - "A stream must start with a complete key frame"); - } - - /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ - - if (pc->refresh_entropy_probs == 0) - { - memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); - pbi->independent_partitions = prev_independent_partitions; - } - -#ifdef PACKET_TESTING - { - FILE *f = fopen("decompressor.VP8", "ab"); - unsigned int size = pbi->bc2.pos + pbi->bc.pos + 8; - fwrite((void *) &size, 4, 1, f); - fwrite((void *) pbi->Source, size, 1, f); - fclose(f); - } -#endif - - return 0; -} diff --git a/thirdparty/libvpx/vp8/decoder/decodemv.c b/thirdparty/libvpx/vp8/decoder/decodemv.c deleted file mode 100644 index 1d155e7e16..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decodemv.c +++ /dev/null @@ -1,670 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "treereader.h" -#include "vp8/common/entropymv.h" -#include "vp8/common/entropymode.h" -#include "onyxd_int.h" -#include "vp8/common/findnearmv.h" - -#if CONFIG_DEBUG -#include <assert.h> -#endif -static B_PREDICTION_MODE read_bmode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_bmode_tree, p); - - return (B_PREDICTION_MODE)i; -} - -static MB_PREDICTION_MODE read_ymode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_ymode_tree, p); - - return (MB_PREDICTION_MODE)i; -} - -static MB_PREDICTION_MODE read_kf_ymode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); - - return (MB_PREDICTION_MODE)i; -} - -static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); - - return (MB_PREDICTION_MODE)i; -} - -static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) -{ - vp8_reader *const bc = & pbi->mbc[8]; - const int mis = pbi->common.mode_info_stride; - - mi->mbmi.ref_frame = INTRA_FRAME; - mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); - - if (mi->mbmi.mode == B_PRED) - { - int i = 0; - mi->mbmi.is_4x4 = 1; - - do - { - const B_PREDICTION_MODE A = above_block_mode(mi, i, mis); - const B_PREDICTION_MODE L = left_block_mode(mi, i); - - mi->bmi[i].as_mode = - read_bmode(bc, vp8_kf_bmode_prob [A] [L]); - } - while (++i < 16); - } - - mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); -} - -static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) -{ - const vp8_prob *const p = (const vp8_prob *) mvc; - int x = 0; - - if (vp8_read(r, p [mvpis_short])) /* Large */ - { - int i = 0; - - do - { - x += vp8_read(r, p [MVPbits + i]) << i; - } - while (++i < 3); - - i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ - - do - { - x += vp8_read(r, p [MVPbits + i]) << i; - } - while (--i > 3); - - if (!(x & 0xFFF0) || vp8_read(r, p [MVPbits + 3])) - x += 8; - } - else /* small */ - x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort); - - if (x && vp8_read(r, p [MVPsign])) - x = -x; - - return x; -} - -static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) -{ - mv->row = (short)(read_mvcomponent(r, mvc) * 2); - mv->col = (short)(read_mvcomponent(r, ++mvc) * 2); -} - - -static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) -{ - int i = 0; - - do - { - const vp8_prob *up = vp8_mv_update_probs[i].prob; - vp8_prob *p = (vp8_prob *)(mvc + i); - vp8_prob *const pstop = p + MVPcount; - - do - { - if (vp8_read(bc, *up++)) - { - const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7); - - *p = x ? x << 1 : 1; - } - } - while (++p < pstop); - } - while (++i < 2); -} - -static const unsigned char mbsplit_fill_count[4] = {8, 8, 4, 1}; -static const unsigned char mbsplit_fill_offset[4][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15}, - { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}, - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} -}; - - -static void mb_mode_mv_init(VP8D_COMP *pbi) -{ - vp8_reader *const bc = & pbi->mbc[8]; - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - -#if CONFIG_ERROR_CONCEALMENT - /* Default is that no macroblock is corrupt, therefore we initialize - * mvs_corrupt_from_mb to something very big, which we can be sure is - * outside the frame. */ - pbi->mvs_corrupt_from_mb = UINT_MAX; -#endif - /* Read the mb_no_coeff_skip flag */ - pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); - - pbi->prob_skip_false = 0; - if (pbi->common.mb_no_coeff_skip) - pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); - - if(pbi->common.frame_type != KEY_FRAME) - { - pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8); - pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8); - pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8); - - if (vp8_read_bit(bc)) - { - int i = 0; - - do - { - pbi->common.fc.ymode_prob[i] = - (vp8_prob) vp8_read_literal(bc, 8); - } - while (++i < 4); - } - - if (vp8_read_bit(bc)) - { - int i = 0; - - do - { - pbi->common.fc.uv_mode_prob[i] = - (vp8_prob) vp8_read_literal(bc, 8); - } - while (++i < 3); - } - - read_mvcontexts(bc, mvc); - } -} - -const vp8_prob vp8_sub_mv_ref_prob3 [8][VP8_SUBMVREFS-1] = -{ - { 147, 136, 18 }, /* SUBMVREF_NORMAL */ - { 223, 1 , 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ - { 106, 145, 1 }, /* SUBMVREF_LEFT_ZED */ - { 208, 1 , 1 }, /* SUBMVREF_LEFT_ABOVE_ZED */ - { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ - { 223, 1 , 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ - { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ - { 208, 1 , 1 } /* SUBMVREF_LEFT_ABOVE_ZED */ -}; - -static -const vp8_prob * get_sub_mv_ref_prob(const int left, const int above) -{ - int lez = (left == 0); - int aez = (above == 0); - int lea = (left == above); - const vp8_prob * prob; - - prob = vp8_sub_mv_ref_prob3[(aez << 2) | - (lez << 1) | - (lea)]; - - return prob; -} - -static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, - const MODE_INFO *left_mb, const MODE_INFO *above_mb, - MB_MODE_INFO *mbmi, int_mv best_mv, - MV_CONTEXT *const mvc, int mb_to_left_edge, - int mb_to_right_edge, int mb_to_top_edge, - int mb_to_bottom_edge) -{ - int s; /* split configuration (16x8, 8x16, 8x8, 4x4) */ - int num_p; /* number of partitions in the split configuration - (see vp8_mbsplit_count) */ - int j = 0; - - s = 3; - num_p = 16; - if( vp8_read(bc, 110) ) - { - s = 2; - num_p = 4; - if( vp8_read(bc, 111) ) - { - s = vp8_read(bc, 150); - num_p = 2; - } - } - - do /* for each subset j */ - { - int_mv leftmv, abovemv; - int_mv blockmv; - int k; /* first block in subset j */ - - const vp8_prob *prob; - k = vp8_mbsplit_offset[s][j]; - - if (!(k & 3)) - { - /* On L edge, get from MB to left of us */ - if(left_mb->mbmi.mode != SPLITMV) - leftmv.as_int = left_mb->mbmi.mv.as_int; - else - leftmv.as_int = (left_mb->bmi + k + 4 - 1)->mv.as_int; - } - else - leftmv.as_int = (mi->bmi + k - 1)->mv.as_int; - - if (!(k >> 2)) - { - /* On top edge, get from MB above us */ - if(above_mb->mbmi.mode != SPLITMV) - abovemv.as_int = above_mb->mbmi.mv.as_int; - else - abovemv.as_int = (above_mb->bmi + k + 16 - 4)->mv.as_int; - } - else - abovemv.as_int = (mi->bmi + k - 4)->mv.as_int; - - prob = get_sub_mv_ref_prob(leftmv.as_int, abovemv.as_int); - - if( vp8_read(bc, prob[0]) ) - { - if( vp8_read(bc, prob[1]) ) - { - blockmv.as_int = 0; - if( vp8_read(bc, prob[2]) ) - { - blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2; - blockmv.as_mv.row += best_mv.as_mv.row; - blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2; - blockmv.as_mv.col += best_mv.as_mv.col; - } - } - else - { - blockmv.as_int = abovemv.as_int; - } - } - else - { - blockmv.as_int = leftmv.as_int; - } - - mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, - mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - - { - /* Fill (uniform) modes, mvs of jth subset. - Must do it here because ensuing subsets can - refer back to us via "left" or "above". */ - const unsigned char *fill_offset; - unsigned int fill_count = mbsplit_fill_count[s]; - - fill_offset = &mbsplit_fill_offset[s] - [(unsigned char)j * mbsplit_fill_count[s]]; - - do { - mi->bmi[ *fill_offset].mv.as_int = blockmv.as_int; - fill_offset++; - }while (--fill_count); - } - - } - while (++j < num_p); - - mbmi->partitioning = s; -} - -static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) -{ - vp8_reader *const bc = & pbi->mbc[8]; - mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra); - if (mbmi->ref_frame) /* inter MB */ - { - enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV}; - int cnt[4]; - int *cntx = cnt; - int_mv near_mvs[4]; - int_mv *nmv = near_mvs; - const int mis = pbi->mb.mode_info_stride; - const MODE_INFO *above = mi - mis; - const MODE_INFO *left = mi - 1; - const MODE_INFO *aboveleft = above - 1; - int *ref_frame_sign_bias = pbi->common.ref_frame_sign_bias; - - mbmi->need_to_clamp_mvs = 0; - - if (vp8_read(bc, pbi->prob_last)) - { - mbmi->ref_frame = - (MV_REFERENCE_FRAME)((int)(2 + vp8_read(bc, pbi->prob_gf))); - } - - /* Zero accumulators */ - nmv[0].as_int = nmv[1].as_int = nmv[2].as_int = 0; - cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; - - /* Process above */ - if (above->mbmi.ref_frame != INTRA_FRAME) - { - if (above->mbmi.mv.as_int) - { - (++nmv)->as_int = above->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], - mbmi->ref_frame, nmv, ref_frame_sign_bias); - ++cntx; - } - - *cntx += 2; - } - - /* Process left */ - if (left->mbmi.ref_frame != INTRA_FRAME) - { - if (left->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = left->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], - mbmi->ref_frame, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != nmv->as_int) - { - (++nmv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 2; - } - else - cnt[CNT_INTRA] += 2; - } - - /* Process above left */ - if (aboveleft->mbmi.ref_frame != INTRA_FRAME) - { - if (aboveleft->mbmi.mv.as_int) - { - int_mv this_mv; - - this_mv.as_int = aboveleft->mbmi.mv.as_int; - mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], - mbmi->ref_frame, &this_mv, ref_frame_sign_bias); - - if (this_mv.as_int != nmv->as_int) - { - (++nmv)->as_int = this_mv.as_int; - ++cntx; - } - - *cntx += 1; - } - else - cnt[CNT_INTRA] += 1; - } - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_INTRA]] [0]) ) - { - - /* If we have three distinct MV's ... */ - /* See if above-left MV can be merged with NEAREST */ - cnt[CNT_NEAREST] += ( (cnt[CNT_SPLITMV] > 0) & - (nmv->as_int == near_mvs[CNT_NEAREST].as_int)); - - /* Swap near and nearest if necessary */ - if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) - { - int tmp; - tmp = cnt[CNT_NEAREST]; - cnt[CNT_NEAREST] = cnt[CNT_NEAR]; - cnt[CNT_NEAR] = tmp; - tmp = near_mvs[CNT_NEAREST].as_int; - near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; - near_mvs[CNT_NEAR].as_int = tmp; - } - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_NEAREST]] [1]) ) - { - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_NEAR]] [2]) ) - { - int mb_to_top_edge; - int mb_to_bottom_edge; - int mb_to_left_edge; - int mb_to_right_edge; - MV_CONTEXT *const mvc = pbi->common.fc.mvc; - int near_index; - - mb_to_top_edge = pbi->mb.mb_to_top_edge; - mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge; - mb_to_top_edge -= LEFT_TOP_MARGIN; - mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; - mb_to_right_edge = pbi->mb.mb_to_right_edge; - mb_to_right_edge += RIGHT_BOTTOM_MARGIN; - mb_to_left_edge = pbi->mb.mb_to_left_edge; - mb_to_left_edge -= LEFT_TOP_MARGIN; - - /* Use near_mvs[0] to store the "best" MV */ - near_index = CNT_INTRA + - (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]); - - vp8_clamp_mv2(&near_mvs[near_index], &pbi->mb); - - cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) - + (left->mbmi.mode == SPLITMV)) * 2 - + (aboveleft->mbmi.mode == SPLITMV); - - if( vp8_read(bc, vp8_mode_contexts [cnt[CNT_SPLITMV]] [3]) ) - { - decode_split_mv(bc, mi, left, above, - mbmi, - near_mvs[near_index], - mvc, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - mbmi->mv.as_int = mi->bmi[15].mv.as_int; - mbmi->mode = SPLITMV; - mbmi->is_4x4 = 1; - } - else - { - int_mv *const mbmi_mv = & mbmi->mv; - read_mv(bc, &mbmi_mv->as_mv, (const MV_CONTEXT *) mvc); - mbmi_mv->as_mv.row += near_mvs[near_index].as_mv.row; - mbmi_mv->as_mv.col += near_mvs[near_index].as_mv.col; - - /* Don't need to check this on NEARMV and NEARESTMV - * modes since those modes clamp the MV. The NEWMV mode - * does not, so signal to the prediction stage whether - * special handling may be required. - */ - mbmi->need_to_clamp_mvs = - vp8_check_mv_bounds(mbmi_mv, mb_to_left_edge, - mb_to_right_edge, - mb_to_top_edge, - mb_to_bottom_edge); - mbmi->mode = NEWMV; - } - } - else - { - mbmi->mode = NEARMV; - mbmi->mv.as_int = near_mvs[CNT_NEAR].as_int; - vp8_clamp_mv2(&mbmi->mv, &pbi->mb); - } - } - else - { - mbmi->mode = NEARESTMV; - mbmi->mv.as_int = near_mvs[CNT_NEAREST].as_int; - vp8_clamp_mv2(&mbmi->mv, &pbi->mb); - } - } - else - { - mbmi->mode = ZEROMV; - mbmi->mv.as_int = 0; - } - -#if CONFIG_ERROR_CONCEALMENT - if(pbi->ec_enabled && (mbmi->mode != SPLITMV)) - { - mi->bmi[ 0].mv.as_int = - mi->bmi[ 1].mv.as_int = - mi->bmi[ 2].mv.as_int = - mi->bmi[ 3].mv.as_int = - mi->bmi[ 4].mv.as_int = - mi->bmi[ 5].mv.as_int = - mi->bmi[ 6].mv.as_int = - mi->bmi[ 7].mv.as_int = - mi->bmi[ 8].mv.as_int = - mi->bmi[ 9].mv.as_int = - mi->bmi[10].mv.as_int = - mi->bmi[11].mv.as_int = - mi->bmi[12].mv.as_int = - mi->bmi[13].mv.as_int = - mi->bmi[14].mv.as_int = - mi->bmi[15].mv.as_int = mbmi->mv.as_int; - } -#endif - } - else - { - /* required for left and above block mv */ - mbmi->mv.as_int = 0; - - /* MB is intra coded */ - if ((mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED) - { - int j = 0; - mbmi->is_4x4 = 1; - do - { - mi->bmi[j].as_mode = read_bmode(bc, pbi->common.fc.bmode_prob); - } - while (++j < 16); - } - - mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob); - } - -} - -static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) -{ - /* Is segmentation enabled */ - if (x->segmentation_enabled && x->update_mb_segmentation_map) - { - /* If so then read the segment id. */ - if (vp8_read(r, x->mb_segment_tree_probs[0])) - mi->segment_id = - (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2])); - else - mi->segment_id = - (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); - } -} - -static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi, - MB_MODE_INFO *mbmi) -{ - (void)mbmi; - - /* Read the Macroblock segmentation map if it is being updated explicitly - * this frame (reset to 0 above by default) - * By default on a key frame reset all MBs to segment 0 - */ - if (pbi->mb.update_mb_segmentation_map) - read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); - else if(pbi->common.frame_type == KEY_FRAME) - mi->mbmi.segment_id = 0; - - /* Read the macroblock coeff skip flag if this feature is in use, - * else default to 0 */ - if (pbi->common.mb_no_coeff_skip) - mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); - else - mi->mbmi.mb_skip_coeff = 0; - - mi->mbmi.is_4x4 = 0; - if(pbi->common.frame_type == KEY_FRAME) - read_kf_modes(pbi, mi); - else - read_mb_modes_mv(pbi, mi, &mi->mbmi); - -} - -void vp8_decode_mode_mvs(VP8D_COMP *pbi) -{ - MODE_INFO *mi = pbi->common.mi; - int mb_row = -1; - int mb_to_right_edge_start; - - mb_mode_mv_init(pbi); - - pbi->mb.mb_to_top_edge = 0; - pbi->mb.mb_to_bottom_edge = ((pbi->common.mb_rows - 1) * 16) << 3; - mb_to_right_edge_start = ((pbi->common.mb_cols - 1) * 16) << 3; - - while (++mb_row < pbi->common.mb_rows) - { - int mb_col = -1; - - pbi->mb.mb_to_left_edge = 0; - pbi->mb.mb_to_right_edge = mb_to_right_edge_start; - - while (++mb_col < pbi->common.mb_cols) - { -#if CONFIG_ERROR_CONCEALMENT - int mb_num = mb_row * pbi->common.mb_cols + mb_col; -#endif - - decode_mb_mode_mvs(pbi, mi, &mi->mbmi); - -#if CONFIG_ERROR_CONCEALMENT - /* look for corruption. set mvs_corrupt_from_mb to the current - * mb_num if the frame is corrupt from this macroblock. */ - if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < - (int)pbi->mvs_corrupt_from_mb) - { - pbi->mvs_corrupt_from_mb = mb_num; - /* no need to continue since the partition is corrupt from - * here on. - */ - return; - } -#endif - - pbi->mb.mb_to_left_edge -= (16 << 3); - pbi->mb.mb_to_right_edge -= (16 << 3); - mi++; /* next macroblock */ - } - pbi->mb.mb_to_top_edge -= (16 << 3); - pbi->mb.mb_to_bottom_edge -= (16 << 3); - - mi++; /* skip left predictor each row */ - } -} diff --git a/thirdparty/libvpx/vp8/decoder/decodemv.h b/thirdparty/libvpx/vp8/decoder/decodemv.h deleted file mode 100644 index f33b07351d..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decodemv.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_DECODER_DECODEMV_H_ -#define VP8_DECODER_DECODEMV_H_ - -#include "onyxd_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_decode_mode_mvs(VP8D_COMP *); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp8/decoder/decoderthreading.h b/thirdparty/libvpx/vp8/decoder/decoderthreading.h deleted file mode 100644 index c563cf6e93..0000000000 --- a/thirdparty/libvpx/vp8/decoder/decoderthreading.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_DECODER_DECODERTHREADING_H_ -#define VP8_DECODER_DECODERTHREADING_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_MULTITHREAD -void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); -void vp8_decoder_remove_threads(VP8D_COMP *pbi); -void vp8_decoder_create_threads(VP8D_COMP *pbi); -void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); -void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DECODERTHREADING_H_ diff --git a/thirdparty/libvpx/vp8/decoder/detokenize.c b/thirdparty/libvpx/vp8/decoder/detokenize.c deleted file mode 100644 index fcc7533c50..0000000000 --- a/thirdparty/libvpx/vp8/decoder/detokenize.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/blockd.h" -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "detokenize.h" - -void vp8_reset_mb_tokens_context(MACROBLOCKD *x) -{ - ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); - ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); - - memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); - memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); - - /* Clear entropy contexts for Y2 blocks */ - if (!x->mode_info_context->mbmi.is_4x4) - { - a_ctx[8] = l_ctx[8] = 0; - } -} - -/* - ------------------------------------------------------------------------------ - Residual decoding (Paragraph 13.2 / 13.3) -*/ -static const uint8_t kBands[16 + 1] = { - 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, - 0 /* extra entry as sentinel */ -}; - -static const uint8_t kCat3[] = { 173, 148, 140, 0 }; -static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; -static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; -static const uint8_t kCat6[] = - { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; -static const uint8_t kZigzag[16] = { - 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 -}; - -#define VP8GetBit vp8dx_decode_bool -#define NUM_PROBAS 11 -#define NUM_CTX 3 - -/* for const-casting */ -typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; - -static int GetSigned(BOOL_DECODER *br, int value_to_sign) -{ - int split = (br->range + 1) >> 1; - VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); - int v; - - if(br->count < 0) - vp8dx_bool_decoder_fill(br); - - if ( br->value < bigsplit ) - { - br->range = split; - v= value_to_sign; - } - else - { - br->range = br->range-split; - br->value = br->value-bigsplit; - v = -value_to_sign; - } - br->range +=br->range; - br->value +=br->value; - br->count--; - - return v; -} -/* - Returns the position of the last non-zero coeff plus one - (and 0 if there's no coeff at all) -*/ -static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob, - int ctx, int n, int16_t* out) -{ - const uint8_t* p = prob[n][ctx]; - if (!VP8GetBit(br, p[0])) - { /* first EOB is more a 'CBP' bit. */ - return 0; - } - while (1) - { - ++n; - if (!VP8GetBit(br, p[1])) - { - p = prob[kBands[n]][0]; - } - else - { /* non zero coeff */ - int v, j; - if (!VP8GetBit(br, p[2])) - { - p = prob[kBands[n]][1]; - v = 1; - } - else - { - if (!VP8GetBit(br, p[3])) - { - if (!VP8GetBit(br, p[4])) - { - v = 2; - } - else - { - v = 3 + VP8GetBit(br, p[5]); - } - } - else - { - if (!VP8GetBit(br, p[6])) - { - if (!VP8GetBit(br, p[7])) - { - v = 5 + VP8GetBit(br, 159); - } else - { - v = 7 + 2 * VP8GetBit(br, 165); - v += VP8GetBit(br, 145); - } - } - else - { - const uint8_t* tab; - const int bit1 = VP8GetBit(br, p[8]); - const int bit0 = VP8GetBit(br, p[9 + bit1]); - const int cat = 2 * bit1 + bit0; - v = 0; - for (tab = kCat3456[cat]; *tab; ++tab) - { - v += v + VP8GetBit(br, *tab); - } - v += 3 + (8 << cat); - } - } - p = prob[kBands[n]][2]; - } - j = kZigzag[n - 1]; - - out[j] = GetSigned(br, v); - - if (n == 16 || !VP8GetBit(br, p[0])) - { /* EOB */ - return n; - } - } - if (n == 16) - { - return 16; - } - } -} - -int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) -{ - BOOL_DECODER *bc = x->current_bc; - const FRAME_CONTEXT * const fc = &dx->common.fc; - char *eobs = x->eobs; - - int i; - int nonzeros; - int eobtotal = 0; - - short *qcoeff_ptr; - ProbaArray coef_probs; - ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); - ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int skip_dc = 0; - - qcoeff_ptr = &x->qcoeff[0]; - - if (!x->mode_info_context->mbmi.is_4x4) - { - a = a_ctx + 8; - l = l_ctx + 8; - - coef_probs = fc->coef_probs [1]; - - nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16); - *a = *l = (nonzeros > 0); - - eobs[24] = nonzeros; - eobtotal += nonzeros - 16; - - coef_probs = fc->coef_probs [0]; - skip_dc = 1; - } - else - { - coef_probs = fc->coef_probs [3]; - skip_dc = 0; - } - - for (i = 0; i < 16; ++i) - { - a = a_ctx + (i&3); - l = l_ctx + ((i&0xc)>>2); - - nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr); - *a = *l = (nonzeros > 0); - - nonzeros += skip_dc; - eobs[i] = nonzeros; - eobtotal += nonzeros; - qcoeff_ptr += 16; - } - - coef_probs = fc->coef_probs [2]; - - a_ctx += 4; - l_ctx += 4; - for (i = 16; i < 24; ++i) - { - a = a_ctx + ((i > 19)<<1) + (i&1); - l = l_ctx + ((i > 19)<<1) + ((i&3)>1); - - nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr); - *a = *l = (nonzeros > 0); - - eobs[i] = nonzeros; - eobtotal += nonzeros; - qcoeff_ptr += 16; - } - - return eobtotal; -} - diff --git a/thirdparty/libvpx/vp8/decoder/detokenize.h b/thirdparty/libvpx/vp8/decoder/detokenize.h deleted file mode 100644 index f0b125444f..0000000000 --- a/thirdparty/libvpx/vp8/decoder/detokenize.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP8_DECODER_DETOKENIZE_H_ -#define VP8_DECODER_DETOKENIZE_H_ - -#include "onyxd_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_reset_mb_tokens_context(MACROBLOCKD *x); -int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp8/decoder/onyxd_if.c b/thirdparty/libvpx/vp8/decoder/onyxd_if.c deleted file mode 100644 index 3468268a2a..0000000000 --- a/thirdparty/libvpx/vp8/decoder/onyxd_if.c +++ /dev/null @@ -1,521 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vp8/common/onyxc_int.h" -#if CONFIG_POSTPROC -#include "vp8/common/postproc.h" -#endif -#include "vp8/common/onyxd.h" -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/alloccommon.h" -#include "vp8/common/loopfilter.h" -#include "vp8/common/swapyv12buffer.h" -#include "vp8/common/threading.h" -#include "decoderthreading.h" -#include <stdio.h> -#include <assert.h> - -#include "vp8/common/quant_common.h" -#include "vp8/common/reconintra.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "vpx_scale/vpx_scale.h" -#include "vp8/common/systemdependent.h" -#include "vpx_ports/vpx_once.h" -#include "vpx_ports/vpx_timer.h" -#include "detokenize.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif -#if ARCH_ARM -#include "vpx_ports/arm.h" -#endif - -extern void vp8_init_loop_filter(VP8_COMMON *cm); -extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); -static int get_free_fb (VP8_COMMON *cm); -static void ref_cnt_fb (int *buf, int *idx, int new_idx); - -static void initialize_dec(void) { - static volatile int init_done = 0; - - if (!init_done) - { - vpx_dsp_rtcd(); - vp8_init_intra_predictors(); - init_done = 1; - } -} - -static void remove_decompressor(VP8D_COMP *pbi) -{ -#if CONFIG_ERROR_CONCEALMENT - vp8_de_alloc_overlap_lists(pbi); -#endif - vp8_remove_common(&pbi->common); - vpx_free(pbi); -} - -static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) -{ - VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); - - if (!pbi) - return NULL; - - memset(pbi, 0, sizeof(VP8D_COMP)); - - if (setjmp(pbi->common.error.jmp)) - { - pbi->common.error.setjmp = 0; - remove_decompressor(pbi); - return 0; - } - - pbi->common.error.setjmp = 1; - - vp8_create_common(&pbi->common); - - pbi->common.current_video_frame = 0; - pbi->ready_for_new_data = 1; - - /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid - * unnecessary calling of vp8cx_init_de_quantizer() for every frame. - */ - vp8cx_init_de_quantizer(pbi); - - vp8_loop_filter_init(&pbi->common); - - pbi->common.error.setjmp = 0; - -#if CONFIG_ERROR_CONCEALMENT - pbi->ec_enabled = oxcf->error_concealment; - pbi->overlaps = NULL; -#else - (void)oxcf; - pbi->ec_enabled = 0; -#endif - /* Error concealment is activated after a key frame has been - * decoded without errors when error concealment is enabled. - */ - pbi->ec_active = 0; - - pbi->decoded_key_frame = 0; - - /* Independent partitions is activated when a frame updates the - * token probability table to have equal probabilities over the - * PREV_COEF context. - */ - pbi->independent_partitions = 0; - - vp8_setup_block_dptrs(&pbi->mb); - - once(initialize_dec); - - return pbi; -} - -vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) -{ - VP8_COMMON *cm = &pbi->common; - int ref_fb_idx; - - if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_idx = cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_idx = cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_idx = cm->alt_fb_idx; - else{ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return pbi->common.error.error_code; - } - - if(cm->yv12_fb[ref_fb_idx].y_height != sd->y_height || - cm->yv12_fb[ref_fb_idx].y_width != sd->y_width || - cm->yv12_fb[ref_fb_idx].uv_height != sd->uv_height || - cm->yv12_fb[ref_fb_idx].uv_width != sd->uv_width){ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } - else - vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); - - return pbi->common.error.error_code; -} - - -vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) -{ - VP8_COMMON *cm = &pbi->common; - int *ref_fb_ptr = NULL; - int free_fb; - - if (ref_frame_flag == VP8_LAST_FRAME) - ref_fb_ptr = &cm->lst_fb_idx; - else if (ref_frame_flag == VP8_GOLD_FRAME) - ref_fb_ptr = &cm->gld_fb_idx; - else if (ref_frame_flag == VP8_ALTR_FRAME) - ref_fb_ptr = &cm->alt_fb_idx; - else{ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return pbi->common.error.error_code; - } - - if(cm->yv12_fb[*ref_fb_ptr].y_height != sd->y_height || - cm->yv12_fb[*ref_fb_ptr].y_width != sd->y_width || - cm->yv12_fb[*ref_fb_ptr].uv_height != sd->uv_height || - cm->yv12_fb[*ref_fb_ptr].uv_width != sd->uv_width){ - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } - else{ - /* Find an empty frame buffer. */ - free_fb = get_free_fb(cm); - /* Decrease fb_idx_ref_cnt since it will be increased again in - * ref_cnt_fb() below. */ - cm->fb_idx_ref_cnt[free_fb]--; - - /* Manage the reference counters and copy image. */ - ref_cnt_fb (cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb); - vp8_yv12_copy_frame(sd, &cm->yv12_fb[*ref_fb_ptr]); - } - - return pbi->common.error.error_code; -} - -static int get_free_fb (VP8_COMMON *cm) -{ - int i; - for (i = 0; i < NUM_YV12_BUFFERS; i++) - if (cm->fb_idx_ref_cnt[i] == 0) - break; - - assert(i < NUM_YV12_BUFFERS); - cm->fb_idx_ref_cnt[i] = 1; - return i; -} - -static void ref_cnt_fb (int *buf, int *idx, int new_idx) -{ - if (buf[*idx] > 0) - buf[*idx]--; - - *idx = new_idx; - - buf[new_idx]++; -} - -/* If any buffer copy / swapping is signalled it should be done here. */ -static int swap_frame_buffers (VP8_COMMON *cm) -{ - int err = 0; - - /* The alternate reference frame or golden frame can be updated - * using the new, last, or golden/alt ref frame. If it - * is updated using the newly decoded frame it is a refresh. - * An update using the last or golden/alt ref frame is a copy. - */ - if (cm->copy_buffer_to_arf) - { - int new_fb = 0; - - if (cm->copy_buffer_to_arf == 1) - new_fb = cm->lst_fb_idx; - else if (cm->copy_buffer_to_arf == 2) - new_fb = cm->gld_fb_idx; - else - err = -1; - - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb); - } - - if (cm->copy_buffer_to_gf) - { - int new_fb = 0; - - if (cm->copy_buffer_to_gf == 1) - new_fb = cm->lst_fb_idx; - else if (cm->copy_buffer_to_gf == 2) - new_fb = cm->alt_fb_idx; - else - err = -1; - - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb); - } - - if (cm->refresh_golden_frame) - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx); - - if (cm->refresh_alt_ref_frame) - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx); - - if (cm->refresh_last_frame) - { - ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx); - - cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; - } - else - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - return err; -} - -static int check_fragments_for_errors(VP8D_COMP *pbi) -{ - if (!pbi->ec_active && - pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0) - { - VP8_COMMON *cm = &pbi->common; - - /* If error concealment is disabled we won't signal missing frames - * to the decoder. - */ - if (cm->fb_idx_ref_cnt[cm->lst_fb_idx] > 1) - { - /* The last reference shares buffer with another reference - * buffer. Move it to its own buffer before setting it as - * corrupt, otherwise we will make multiple buffers corrupt. - */ - const int prev_idx = cm->lst_fb_idx; - cm->fb_idx_ref_cnt[prev_idx]--; - cm->lst_fb_idx = get_free_fb(cm); - vp8_yv12_copy_frame(&cm->yv12_fb[prev_idx], - &cm->yv12_fb[cm->lst_fb_idx]); - } - /* This is used to signal that we are missing frames. - * We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; - - /* Signal that we have no frame to show. */ - cm->show_frame = 0; - - /* Nothing more to do. */ - return 0; - } - - return 1; -} - -int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, - const uint8_t *source, - int64_t time_stamp) -{ - VP8_COMMON *cm = &pbi->common; - int retcode = -1; - (void)size; - (void)source; - - pbi->common.error.error_code = VPX_CODEC_OK; - - retcode = check_fragments_for_errors(pbi); - if(retcode <= 0) - return retcode; - - cm->new_fb_idx = get_free_fb (cm); - - /* setup reference frames for vp8_decode_frame */ - pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; - pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; - pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; - pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; - - if (setjmp(pbi->common.error.jmp)) - { - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; - - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - goto decode_exit; - } - - pbi->common.error.setjmp = 1; - - retcode = vp8_decode_frame(pbi); - - if (retcode < 0) - { - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - - pbi->common.error.error_code = VPX_CODEC_ERROR; - goto decode_exit; - } - - if (swap_frame_buffers (cm)) - { - pbi->common.error.error_code = VPX_CODEC_ERROR; - goto decode_exit; - } - - vp8_clear_system_state(); - - if (cm->show_frame) - { - cm->current_video_frame++; - cm->show_frame_mi = cm->mi; - } - - #if CONFIG_ERROR_CONCEALMENT - /* swap the mode infos to storage for future error concealment */ - if (pbi->ec_enabled && pbi->common.prev_mi) - { - MODE_INFO* tmp = pbi->common.prev_mi; - int row, col; - pbi->common.prev_mi = pbi->common.mi; - pbi->common.mi = tmp; - - /* Propagate the segment_ids to the next frame */ - for (row = 0; row < pbi->common.mb_rows; ++row) - { - for (col = 0; col < pbi->common.mb_cols; ++col) - { - const int i = row*pbi->common.mode_info_stride + col; - pbi->common.mi[i].mbmi.segment_id = - pbi->common.prev_mi[i].mbmi.segment_id; - } - } - } -#endif - - pbi->ready_for_new_data = 0; - pbi->last_time_stamp = time_stamp; - -decode_exit: - pbi->common.error.setjmp = 0; - vp8_clear_system_state(); - return retcode; -} -int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) -{ - int ret = -1; - - if (pbi->ready_for_new_data == 1) - return ret; - - /* ie no raw frame to show!!! */ - if (pbi->common.show_frame == 0) - return ret; - - pbi->ready_for_new_data = 1; - *time_stamp = pbi->last_time_stamp; - *time_end_stamp = 0; - -#if CONFIG_POSTPROC - ret = vp8_post_proc_frame(&pbi->common, sd, flags); -#else - (void)flags; - - if (pbi->common.frame_to_show) - { - *sd = *pbi->common.frame_to_show; - sd->y_width = pbi->common.Width; - sd->y_height = pbi->common.Height; - sd->uv_height = pbi->common.Height / 2; - ret = 0; - } - else - { - ret = -1; - } - -#endif /*!CONFIG_POSTPROC*/ - vp8_clear_system_state(); - return ret; -} - - -/* This function as written isn't decoder specific, but the encoder has - * much faster ways of computing this, so it's ok for it to live in a - * decode specific file. - */ -int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ) -{ - const MODE_INFO *mi = oci->mi; - int mb_row, mb_col; - - for (mb_row = 0; mb_row < oci->mb_rows; mb_row++) - { - for (mb_col = 0; mb_col < oci->mb_cols; mb_col++,mi++) - { - if( mi->mbmi.ref_frame == ref_frame) - return 1; - } - mi++; - } - return 0; - -} - -int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) -{ - if(!fb->use_frame_threads) - { - /* decoder instance for single thread mode */ - fb->pbi[0] = create_decompressor(oxcf); - if(!fb->pbi[0]) - return VPX_CODEC_ERROR; - -#if CONFIG_MULTITHREAD - /* enable row-based threading only when use_frame_threads - * is disabled */ - fb->pbi[0]->max_threads = oxcf->max_threads; - vp8_decoder_create_threads(fb->pbi[0]); -#endif - } - else - { - /* TODO : create frame threads and decoder instances for each - * thread here */ - } - - return VPX_CODEC_OK; -} - -int vp8_remove_decoder_instances(struct frame_buffers *fb) -{ - if(!fb->use_frame_threads) - { - VP8D_COMP *pbi = fb->pbi[0]; - - if (!pbi) - return VPX_CODEC_ERROR; -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); - vp8_decoder_remove_threads(pbi); -#endif - - /* decoder instance for single thread mode */ - remove_decompressor(pbi); - } - else - { - /* TODO : remove frame threads and decoder instances for each - * thread here */ - } - - return VPX_CODEC_OK; -} diff --git a/thirdparty/libvpx/vp8/decoder/onyxd_int.h b/thirdparty/libvpx/vp8/decoder/onyxd_int.h deleted file mode 100644 index 313fe01c07..0000000000 --- a/thirdparty/libvpx/vp8/decoder/onyxd_int.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_DECODER_ONYXD_INT_H_ -#define VP8_DECODER_ONYXD_INT_H_ - -#include "vpx_config.h" -#include "vp8/common/onyxd.h" -#include "treereader.h" -#include "vp8/common/onyxc_int.h" -#include "vp8/common/threading.h" - -#if CONFIG_ERROR_CONCEALMENT -#include "ec_types.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - int ithread; - void *ptr1; - void *ptr2; -} DECODETHREAD_DATA; - -typedef struct -{ - MACROBLOCKD mbd; -} MB_ROW_DEC; - - -typedef struct -{ - int enabled; - unsigned int count; - const unsigned char *ptrs[MAX_PARTITIONS]; - unsigned int sizes[MAX_PARTITIONS]; -} FRAGMENT_DATA; - -#define MAX_FB_MT_DEC 32 - -struct frame_buffers -{ - /* - * this struct will be populated with frame buffer management - * info in future commits. */ - - /* enable/disable frame-based threading */ - int use_frame_threads; - - /* decoder instances */ - struct VP8D_COMP *pbi[MAX_FB_MT_DEC]; - -}; - -typedef struct VP8D_COMP -{ - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - - YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; - - DECLARE_ALIGNED(16, VP8_COMMON, common); - - /* the last partition will be used for the modes/mvs */ - vp8_reader mbc[MAX_PARTITIONS]; - - VP8D_CONFIG oxcf; - - FRAGMENT_DATA fragments; - -#if CONFIG_MULTITHREAD - /* variable for threading */ - - int b_multithreaded_rd; - int max_threads; - int current_mb_col_main; - unsigned int decoding_thread_count; - int allocated_decoding_thread_count; - - int mt_baseline_filter_level[MAX_MB_SEGMENTS]; - int sync_range; - int *mt_current_mb_col; /* Each row remembers its already decoded column. */ - pthread_mutex_t *pmutex; - pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */ - - unsigned char **mt_yabove_row; /* mb_rows x width */ - unsigned char **mt_uabove_row; - unsigned char **mt_vabove_row; - unsigned char **mt_yleft_col; /* mb_rows x 16 */ - unsigned char **mt_uleft_col; /* mb_rows x 8 */ - unsigned char **mt_vleft_col; /* mb_rows x 8 */ - - MB_ROW_DEC *mb_row_di; - DECODETHREAD_DATA *de_thread_data; - - pthread_t *h_decoding_thread; - sem_t *h_event_start_decoding; - sem_t h_event_end_decoding; - /* end of threading data */ -#endif - - int64_t last_time_stamp; - int ready_for_new_data; - - vp8_prob prob_intra; - vp8_prob prob_last; - vp8_prob prob_gf; - vp8_prob prob_skip_false; - -#if CONFIG_ERROR_CONCEALMENT - MB_OVERLAP *overlaps; - /* the mb num from which modes and mvs (first partition) are corrupt */ - unsigned int mvs_corrupt_from_mb; -#endif - int ec_enabled; - int ec_active; - int decoded_key_frame; - int independent_partitions; - int frame_corrupt_residual; - - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; -} VP8D_COMP; - -int vp8_decode_frame(VP8D_COMP *cpi); - -int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf); -int vp8_remove_decoder_instances(struct frame_buffers *fb); - -#if CONFIG_DEBUG -#define CHECK_MEM_ERROR(lval,expr) do {\ - lval = (expr); \ - if(!lval) \ - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\ - "Failed to allocate "#lval" at %s:%d", \ - __FILE__,__LINE__);\ - } while(0) -#else -#define CHECK_MEM_ERROR(lval,expr) do {\ - lval = (expr); \ - if(!lval) \ - vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,\ - "Failed to allocate "#lval);\ - } while(0) -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_ONYXD_INT_H_ diff --git a/thirdparty/libvpx/vp8/decoder/threading.c b/thirdparty/libvpx/vp8/decoder/threading.c deleted file mode 100644 index 3c1b8387ec..0000000000 --- a/thirdparty/libvpx/vp8/decoder/threading.c +++ /dev/null @@ -1,928 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_config.h" -#include "vp8_rtcd.h" -#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 -# include <unistd.h> -#endif -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vp8/common/threading.h" - -#include "vp8/common/loopfilter.h" -#include "vp8/common/extend.h" -#include "vpx_ports/vpx_timer.h" -#include "detokenize.h" -#include "vp8/common/reconintra4x4.h" -#include "vp8/common/reconinter.h" -#include "vp8/common/reconintra.h" -#include "vp8/common/setupintrarecon.h" -#if CONFIG_ERROR_CONCEALMENT -#include "error_concealment.h" -#endif - -#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) -#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ - CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ - memset((p), 0, (n) * sizeof(*(p))); \ -} while (0) - - -void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); - -static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) -{ - VP8_COMMON *const pc = & pbi->common; - int i; - - for (i = 0; i < count; i++) - { - MACROBLOCKD *mbd = &mbrd[i].mbd; - mbd->subpixel_predict = xd->subpixel_predict; - mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; - mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; - mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - - mbd->frame_type = pc->frame_type; - mbd->pre = xd->pre; - mbd->dst = xd->dst; - - mbd->segmentation_enabled = xd->segmentation_enabled; - mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); - - /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ - memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); - /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ - memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); - /*unsigned char mode_ref_lf_delta_enabled; - unsigned char mode_ref_lf_delta_update;*/ - mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; - mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; - - mbd->current_bc = &pbi->mbc[0]; - - memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); - memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); - memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); - memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); - - mbd->fullpixel_mask = 0xffffffff; - - if (pc->full_pixel) - mbd->fullpixel_mask = 0xfffffff8; - - } - - for (i = 0; i < pc->mb_rows; i++) - pbi->mt_current_mb_col[i] = -1; -} - -static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, - unsigned int mb_idx) -{ - MB_PREDICTION_MODE mode; - int i; -#if CONFIG_ERROR_CONCEALMENT - int corruption_detected = 0; -#else - (void)mb_idx; -#endif - - if (xd->mode_info_context->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else if (!vp8dx_bool_error(xd->current_bc)) - { - int eobtotal; - eobtotal = vp8_decode_mb_tokens(pbi, xd); - - /* Special case: Force the loopfilter to skip when eobtotal is zero */ - xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); - } - - mode = xd->mode_info_context->mbmi.mode; - - if (xd->segmentation_enabled) - vp8_mb_init_dequantizer(pbi, xd); - - -#if CONFIG_ERROR_CONCEALMENT - - if(pbi->ec_active) - { - int throw_residual; - /* When we have independent partitions we can apply residual even - * though other partitions within the frame are corrupt. - */ - throw_residual = (!pbi->independent_partitions && - pbi->frame_corrupt_residual); - throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); - - if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) - { - /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ - pbi->frame_corrupt_residual = 1; - memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - - corruption_detected = 1; - - /* force idct to be skipped for B_PRED and use the - * prediction only for reconstruction - * */ - memset(xd->eobs, 0, 25); - } - } -#endif - - /* do prediction */ - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) - { - vp8_build_intra_predictors_mbuv_s(xd, - xd->recon_above[1], - xd->recon_above[2], - xd->recon_left[1], - xd->recon_left[2], - xd->recon_left_stride[1], - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride); - - if (mode != B_PRED) - { - vp8_build_intra_predictors_mby_s(xd, - xd->recon_above[0], - xd->recon_left[0], - xd->recon_left_stride[0], - xd->dst.y_buffer, - xd->dst.y_stride); - } - else - { - short *DQC = xd->dequant_y1; - int dst_stride = xd->dst.y_stride; - - /* clear out residual eob info */ - if(xd->mode_info_context->mbmi.mb_skip_coeff) - memset(xd->eobs, 0, 25); - - intra_prediction_down_copy(xd, xd->recon_above[0] + 16); - - for (i = 0; i < 16; i++) - { - BLOCKD *b = &xd->block[i]; - unsigned char *dst = xd->dst.y_buffer + b->offset; - B_PREDICTION_MODE b_mode = - xd->mode_info_context->bmi[i].as_mode; - unsigned char *Above; - unsigned char *yleft; - int left_stride; - unsigned char top_left; - - /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ - if (i < 4 && pbi->common.filter_level) - Above = xd->recon_above[0] + b->offset; - else - Above = dst - dst_stride; - - if (i%4==0 && pbi->common.filter_level) - { - yleft = xd->recon_left[0] + i; - left_stride = 1; - } - else - { - yleft = dst - 1; - left_stride = dst_stride; - } - - if ((i==4 || i==8 || i==12) && pbi->common.filter_level) - top_left = *(xd->recon_left[0] + i - 1); - else - top_left = Above[-1]; - - vp8_intra4x4_predict(Above, yleft, left_stride, - b_mode, dst, dst_stride, top_left); - - if (xd->eobs[i] ) - { - if (xd->eobs[i] > 1) - { - vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); - } - else - { - vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], - dst, dst_stride, dst, dst_stride); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - } - } - } - } - else - { - vp8_build_inter_predictors_mb(xd); - } - - -#if CONFIG_ERROR_CONCEALMENT - if (corruption_detected) - { - return; - } -#endif - - if(!xd->mode_info_context->mbmi.mb_skip_coeff) - { - /* dequantization and idct */ - if (mode != B_PRED) - { - short *DQC = xd->dequant_y1; - - if (mode != SPLITMV) - { - BLOCKD *b = &xd->block[24]; - - /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) - { - vp8_dequantize_b(b, xd->dequant_y2); - - vp8_short_inv_walsh4x4(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); - } - else - { - b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; - vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], - xd->qcoeff); - memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); - } - - /* override the dc dequant constant in order to preserve the - * dc components - */ - DQC = xd->dequant_y1_dc; - } - - vp8_dequant_idct_add_y_block - (xd->qcoeff, DQC, - xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); - } - - vp8_dequant_idct_add_uv_block - (xd->qcoeff+16*16, xd->dequant_uv, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); - } -} - -static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) -{ - const int *last_row_current_mb_col; - int *current_mb_col; - int mb_row; - VP8_COMMON *pc = &pbi->common; - const int nsync = pbi->sync_range; - const int first_row_no_sync_above = pc->mb_cols + nsync; - int num_part = 1 << pbi->common.multi_token_partition; - int last_mb_row = start_mb_row; - - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; - - int recon_y_stride = yv12_fb_new->y_stride; - int recon_uv_stride = yv12_fb_new->uv_stride; - - unsigned char *ref_buffer[MAX_REF_FRAMES][3]; - unsigned char *dst_buffer[3]; - int i; - int ref_fb_corrupted[MAX_REF_FRAMES]; - - ref_fb_corrupted[INTRA_FRAME] = 0; - - for(i = 1; i < MAX_REF_FRAMES; i++) - { - YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; - - ref_buffer[i][0] = this_fb->y_buffer; - ref_buffer[i][1] = this_fb->u_buffer; - ref_buffer[i][2] = this_fb->v_buffer; - - ref_fb_corrupted[i] = this_fb->corrupted; - } - - dst_buffer[0] = yv12_fb_new->y_buffer; - dst_buffer[1] = yv12_fb_new->u_buffer; - dst_buffer[2] = yv12_fb_new->v_buffer; - - xd->up_available = (start_mb_row != 0); - - xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; - xd->mode_info_stride = pc->mode_info_stride; - - for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) - { - int recon_yoffset, recon_uvoffset; - int mb_col; - int filter_level; - loop_filter_info_n *lfi_n = &pc->lf_info; - - /* save last row processed by this thread */ - last_mb_row = mb_row; - /* select bool coder for current partition */ - xd->current_bc = &pbi->mbc[mb_row%num_part]; - - if (mb_row > 0) - last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; - else - last_row_current_mb_col = &first_row_no_sync_above; - - current_mb_col = &pbi->mt_current_mb_col[mb_row]; - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - - /* reset contexts */ - xd->above_context = pc->above_context; - memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); - - xd->left_available = 0; - - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - if (pbi->common.filter_level) - { - xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32; - xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16; - xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16; - - xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; - xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; - xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = 1; - xd->recon_left_stride[1] = 1; - } - else - { - xd->recon_above[0] = dst_buffer[0] + recon_yoffset; - xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; - xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; - - xd->recon_left[0] = xd->recon_above[0] - 1; - xd->recon_left[1] = xd->recon_above[1] - 1; - xd->recon_left[2] = xd->recon_above[2] - 1; - - xd->recon_above[0] -= xd->dst.y_stride; - xd->recon_above[1] -= xd->dst.uv_stride; - xd->recon_above[2] -= xd->dst.uv_stride; - - /* TODO: move to outside row loop */ - xd->recon_left_stride[0] = xd->dst.y_stride; - xd->recon_left_stride[1] = xd->dst.uv_stride; - - setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], - xd->recon_left[2], xd->dst.y_stride, - xd->dst.uv_stride); - } - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { - if (((mb_col - 1) % nsync) == 0) { - pthread_mutex_t *mutex = &pbi->pmutex[mb_row]; - protected_write(mutex, current_mb_col, mb_col - 1); - } - - if (mb_row && !(mb_col & (nsync - 1))) { - pthread_mutex_t *mutex = &pbi->pmutex[mb_row-1]; - sync_read(mutex, mb_col, last_row_current_mb_col, nsync); - } - - /* Distance of MB to the various image edges. - * These are specified to 8th pel as they are always - * compared to values that are in 1/8th pel units. - */ - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - - #if CONFIG_ERROR_CONCEALMENT - { - int corrupt_residual = - (!pbi->independent_partitions && - pbi->frame_corrupt_residual) || - vp8dx_bool_error(xd->current_bc); - if (pbi->ec_active && - (xd->mode_info_context->mbmi.ref_frame == - INTRA_FRAME) && - corrupt_residual) - { - /* We have an intra block with corrupt - * coefficients, better to conceal with an inter - * block. - * Interpolate MVs from neighboring MBs - * - * Note that for the first mb with corrupt - * residual in a frame, we might not discover - * that before decoding the residual. That - * happens after this check, and therefore no - * inter concealment will be done. - */ - vp8_interpolate_motion(xd, - mb_row, mb_col, - pc->mb_rows, pc->mb_cols); - } - } - #endif - - - xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; - xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; - xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; - - xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset; - xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset; - xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset; - - /* propagate errors from reference frames */ - xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; - - mt_decode_macroblock(pbi, xd, 0); - - xd->left_available = 1; - - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= vp8dx_bool_error(xd->current_bc); - - xd->recon_above[0] += 16; - xd->recon_above[1] += 8; - xd->recon_above[2] += 8; - - if (!pbi->common.filter_level) - { - xd->recon_left[0] += 16; - xd->recon_left[1] += 8; - xd->recon_left[2] += 8; - } - - if (pbi->common.filter_level) - { - int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && - xd->mode_info_context->mbmi.mode != SPLITMV && - xd->mode_info_context->mbmi.mb_skip_coeff); - - const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; - const int seg = xd->mode_info_context->mbmi.segment_id; - const int ref_frame = xd->mode_info_context->mbmi.ref_frame; - - filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; - - if( mb_row != pc->mb_rows-1 ) - { - /* Save decoded MB last row data for next-row decoding */ - memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); - memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); - memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); - } - - /* save left_col for next MB decoding */ - if(mb_col != pc->mb_cols-1) - { - MODE_INFO *next = xd->mode_info_context +1; - - if (next->mbmi.ref_frame == INTRA_FRAME) - { - for (i = 0; i < 16; i++) - pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; - for (i = 0; i < 8; i++) - { - pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; - pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; - } - } - } - - /* loopfilter on this macroblock. */ - if (filter_level) - { - if(pc->filter_type == NORMAL_LOOPFILTER) - { - loop_filter_info lfi; - FRAME_TYPE frame_type = pc->frame_type; - const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; - lfi.mblim = lfi_n->mblim[filter_level]; - lfi.blim = lfi_n->blim[filter_level]; - lfi.lim = lfi_n->lim[filter_level]; - lfi.hev_thr = lfi_n->hev_thr[hev_index]; - - if (mb_col > 0) - vp8_loop_filter_mbv - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bv - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_mbh - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - - if (!skip_lf) - vp8_loop_filter_bh - (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); - } - else - { - if (mb_col > 0) - vp8_loop_filter_simple_mbv - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bv - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - - /* don't apply across umv border */ - if (mb_row > 0) - vp8_loop_filter_simple_mbh - (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); - - if (!skip_lf) - vp8_loop_filter_simple_bh - (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); - } - } - - } - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->above_context++; - } - - /* adjust to the next row of mbs */ - if (pbi->common.filter_level) - { - if(mb_row != pc->mb_rows-1) - { - int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; - int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); - - for (i = 0; i < 4; i++) - { - pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; - pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; - pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; - } - } - } - else - vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, - xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - - /* last MB of row is ready just after extension is done */ - protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync); - - ++xd->mode_info_context; /* skip prediction column */ - xd->up_available = 1; - - /* since we have multithread */ - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - } - - /* signal end of frame decoding if this thread processed the last mb_row */ - if (last_mb_row == (pc->mb_rows - 1)) - sem_post(&pbi->h_event_end_decoding); - -} - - -static THREAD_FUNCTION thread_decoding_proc(void *p_data) -{ - int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; - VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); - MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT_PLANES mb_row_left_context; - - while (1) - { - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) - break; - - if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) - { - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) - break; - else - { - MACROBLOCKD *xd = &mbrd->mbd; - xd->left_context = &mb_row_left_context; - - mt_decode_mb_rows(pbi, xd, ithread+1); - } - } - } - - return 0 ; -} - - -void vp8_decoder_create_threads(VP8D_COMP *pbi) -{ - int core_count = 0; - unsigned int ithread; - - pbi->b_multithreaded_rd = 0; - pbi->allocated_decoding_thread_count = 0; - pthread_mutex_init(&pbi->mt_mutex, NULL); - - /* limit decoding threads to the max number of token partitions */ - core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; - - /* limit decoding threads to the available cores */ - if (core_count > pbi->common.processor_core_count) - core_count = pbi->common.processor_core_count; - - if (core_count > 1) - { - pbi->b_multithreaded_rd = 1; - pbi->decoding_thread_count = core_count - 1; - - CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); - CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); - CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); - CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); - - for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) - { - sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); - - vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); - - pbi->de_thread_data[ithread].ithread = ithread; - pbi->de_thread_data[ithread].ptr1 = (void *)pbi; - pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; - - pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread])); - } - - sem_init(&pbi->h_event_end_decoding, 0, 0); - - pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; - } -} - - -void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) -{ - int i; - - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) - { - /* De-allocate mutex */ - if (pbi->pmutex != NULL) { - for (i = 0; i < mb_rows; i++) { - pthread_mutex_destroy(&pbi->pmutex[i]); - } - vpx_free(pbi->pmutex); - pbi->pmutex = NULL; - } - - vpx_free(pbi->mt_current_mb_col); - pbi->mt_current_mb_col = NULL ; - - /* Free above_row buffers. */ - if (pbi->mt_yabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_yabove_row[i]); - pbi->mt_yabove_row[i] = NULL ; - } - vpx_free(pbi->mt_yabove_row); - pbi->mt_yabove_row = NULL ; - } - - if (pbi->mt_uabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_uabove_row[i]); - pbi->mt_uabove_row[i] = NULL ; - } - vpx_free(pbi->mt_uabove_row); - pbi->mt_uabove_row = NULL ; - } - - if (pbi->mt_vabove_row) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_vabove_row[i]); - pbi->mt_vabove_row[i] = NULL ; - } - vpx_free(pbi->mt_vabove_row); - pbi->mt_vabove_row = NULL ; - } - - /* Free left_col buffers. */ - if (pbi->mt_yleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_yleft_col[i]); - pbi->mt_yleft_col[i] = NULL ; - } - vpx_free(pbi->mt_yleft_col); - pbi->mt_yleft_col = NULL ; - } - - if (pbi->mt_uleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_uleft_col[i]); - pbi->mt_uleft_col[i] = NULL ; - } - vpx_free(pbi->mt_uleft_col); - pbi->mt_uleft_col = NULL ; - } - - if (pbi->mt_vleft_col) - { - for (i=0; i< mb_rows; i++) - { - vpx_free(pbi->mt_vleft_col[i]); - pbi->mt_vleft_col[i] = NULL ; - } - vpx_free(pbi->mt_vleft_col); - pbi->mt_vleft_col = NULL ; - } - } -} - - -void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) -{ - VP8_COMMON *const pc = & pbi->common; - int i; - int uv_width; - - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) - { - vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); - - /* our internal buffers are always multiples of 16 */ - if ((width & 0xf) != 0) - width += 16 - (width & 0xf); - - if (width < 640) pbi->sync_range = 1; - else if (width <= 1280) pbi->sync_range = 8; - else if (width <= 2560) pbi->sync_range =16; - else pbi->sync_range = 32; - - uv_width = width >>1; - - /* Allocate mutex */ - CHECK_MEM_ERROR(pbi->pmutex, vpx_malloc(sizeof(*pbi->pmutex) * - pc->mb_rows)); - if (pbi->pmutex) { - for (i = 0; i < pc->mb_rows; i++) { - pthread_mutex_init(&pbi->pmutex[i], NULL); - } - } - - /* Allocate an int for each mb row. */ - CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); - - /* Allocate memory for above_row buffers. */ - CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); - - CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - - CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); - - /* Allocate memory for left_col buffers. */ - CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); - - CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - - CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); - for (i = 0; i < pc->mb_rows; i++) - CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); - } -} - - -void vp8_decoder_remove_threads(VP8D_COMP *pbi) -{ - /* shutdown MB Decoding thread; */ - if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) - { - int i; - - protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0); - - /* allow all threads to exit */ - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_post(&pbi->h_event_start_decoding[i]); - pthread_join(pbi->h_decoding_thread[i], NULL); - } - - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_destroy(&pbi->h_event_start_decoding[i]); - } - - sem_destroy(&pbi->h_event_end_decoding); - - vpx_free(pbi->h_decoding_thread); - pbi->h_decoding_thread = NULL; - - vpx_free(pbi->h_event_start_decoding); - pbi->h_event_start_decoding = NULL; - - vpx_free(pbi->mb_row_di); - pbi->mb_row_di = NULL ; - - vpx_free(pbi->de_thread_data); - pbi->de_thread_data = NULL; - } - pthread_mutex_destroy(&pbi->mt_mutex); -} - -void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - VP8_COMMON *pc = &pbi->common; - unsigned int i; - int j; - - int filter_level = pc->filter_level; - YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; - - if (filter_level) - { - /* Set above_row buffer to 127 for decoding first MB row */ - memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); - memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); - - for (j=1; j<pc->mb_rows; j++) - { - memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); - memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); - } - - /* Set left_col to 129 initially */ - for (j=0; j<pc->mb_rows; j++) - { - memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); - memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); - memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); - } - - /* Initialize the loop filter for this frame. */ - vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); - } - else - vp8_setup_intra_recon_top_line(yv12_fb_new); - - setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); - - for (i = 0; i < pbi->decoding_thread_count; i++) - sem_post(&pbi->h_event_start_decoding[i]); - - mt_decode_mb_rows(pbi, xd, 0); - - sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ -} diff --git a/thirdparty/libvpx/vp8/decoder/treereader.h b/thirdparty/libvpx/vp8/decoder/treereader.h deleted file mode 100644 index f7d23c3698..0000000000 --- a/thirdparty/libvpx/vp8/decoder/treereader.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP8_DECODER_TREEREADER_H_ -#define VP8_DECODER_TREEREADER_H_ - -#include "./vpx_config.h" -#include "vp8/common/treecoder.h" -#include "dboolhuff.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef BOOL_DECODER vp8_reader; - -#define vp8_read vp8dx_decode_bool -#define vp8_read_literal vp8_decode_value -#define vp8_read_bit(R) vp8_read(R, vp8_prob_half) - - -/* Intent of tree data structure is to make decoding trivial. */ - -static INLINE int vp8_treed_read( - vp8_reader *const r, /* !!! must return a 0 or 1 !!! */ - vp8_tree t, - const vp8_prob *const p -) -{ - register vp8_tree_index i = 0; - - while ((i = t[ i + vp8_read(r, p[i>>1])]) > 0) ; - - return -i; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP8_DECODER_TREEREADER_H_ diff --git a/thirdparty/libvpx/vp8/vp8_dx_iface.c b/thirdparty/libvpx/vp8/vp8_dx_iface.c deleted file mode 100644 index fc9288d62b..0000000000 --- a/thirdparty/libvpx/vp8/vp8_dx_iface.c +++ /dev/null @@ -1,828 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include <assert.h> -#include <stdlib.h> -#include <string.h> -#include "./vp8_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" -#include "vpx/vpx_decoder.h" -#include "vpx/vp8dx.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx_version.h" -#include "common/alloccommon.h" -#include "common/common.h" -#include "common/onyxd.h" -#include "decoder/onyxd_int.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#if CONFIG_ERROR_CONCEALMENT -#include "decoder/error_concealment.h" -#endif -#include "decoder/decoderthreading.h" - -#define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) -#define VP8_CAP_ERROR_CONCEALMENT (CONFIG_ERROR_CONCEALMENT ? \ - VPX_CODEC_CAP_ERROR_CONCEALMENT : 0) - -typedef vpx_codec_stream_info_t vp8_stream_info_t; - -/* Structures for handling memory allocations */ -typedef enum -{ - VP8_SEG_ALG_PRIV = 256, - VP8_SEG_MAX -} mem_seg_id_t; -#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) - -struct vpx_codec_alg_priv -{ - vpx_codec_priv_t base; - vpx_codec_dec_cfg_t cfg; - vp8_stream_info_t si; - int decoder_init; - int postproc_cfg_set; - vp8_postproc_cfg_t postproc_cfg; -#if CONFIG_POSTPROC_VISUALIZER - unsigned int dbg_postproc_flag; - int dbg_color_ref_frame_flag; - int dbg_color_mb_modes_flag; - int dbg_color_b_modes_flag; - int dbg_display_mv_flag; -#endif - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - vpx_image_t img; - int img_setup; - struct frame_buffers yv12_frame_buffers; - void *user_priv; - FRAGMENT_DATA fragments; -}; - -static int vp8_init_ctx(vpx_codec_ctx_t *ctx) -{ - vpx_codec_alg_priv_t *priv = - (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); - if (!priv) return 1; - - ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->init_flags = ctx->init_flags; - - priv->si.sz = sizeof(priv->si); - priv->decrypt_cb = NULL; - priv->decrypt_state = NULL; - - if (ctx->config.dec) - { - /* Update the reference to the config structure to an internal copy. */ - priv->cfg = *ctx->config.dec; - ctx->config.dec = &priv->cfg; - } - - return 0; -} - -static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) -{ - vpx_codec_err_t res = VPX_CODEC_OK; - vpx_codec_alg_priv_t *priv = NULL; - (void) data; - - vp8_rtcd(); - vpx_dsp_rtcd(); - vpx_scale_rtcd(); - - /* This function only allocates space for the vpx_codec_alg_priv_t - * structure. More memory may be required at the time the stream - * information becomes known. - */ - if (!ctx->priv) { - if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR; - priv = (vpx_codec_alg_priv_t *)ctx->priv; - - /* initialize number of fragments to zero */ - priv->fragments.count = 0; - /* is input fragments enabled? */ - priv->fragments.enabled = - (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); - - /*post processing level initialized to do nothing */ - } else { - priv = (vpx_codec_alg_priv_t *)ctx->priv; - } - - priv->yv12_frame_buffers.use_frame_threads = - (ctx->priv->init_flags & VPX_CODEC_USE_FRAME_THREADING); - - /* for now, disable frame threading */ - priv->yv12_frame_buffers.use_frame_threads = 0; - - if (priv->yv12_frame_buffers.use_frame_threads && - ((ctx->priv->init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT) || - (ctx->priv->init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS))) { - /* row-based threading, error concealment, and input fragments will - * not be supported when using frame-based threading */ - res = VPX_CODEC_INVALID_PARAM; - } - - return res; -} - -static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx) -{ - vp8_remove_decoder_instances(&ctx->yv12_frame_buffers); - - vpx_free(ctx); - - return VPX_CODEC_OK; -} - -static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) -{ - vpx_codec_err_t res = VPX_CODEC_OK; - - assert(data != NULL); - - if(data + data_sz <= data) - { - res = VPX_CODEC_INVALID_PARAM; - } - else - { - /* Parse uncompresssed part of key frame header. - * 3 bytes:- including version, frame type and an offset - * 3 bytes:- sync code (0x9d, 0x01, 0x2a) - * 4 bytes:- including image width and height in the lowest 14 bits - * of each 2-byte value. - */ - uint8_t clear_buffer[10]; - const uint8_t *clear = data; - if (decrypt_cb) - { - int n = VPXMIN(sizeof(clear_buffer), data_sz); - decrypt_cb(decrypt_state, data, clear_buffer, n); - clear = clear_buffer; - } - si->is_kf = 0; - - if (data_sz >= 10 && !(clear[0] & 0x01)) /* I-Frame */ - { - si->is_kf = 1; - - /* vet via sync code */ - if (clear[3] != 0x9d || clear[4] != 0x01 || clear[5] != 0x2a) - return VPX_CODEC_UNSUP_BITSTREAM; - - si->w = (clear[6] | (clear[7] << 8)) & 0x3fff; - si->h = (clear[8] | (clear[9] << 8)) & 0x3fff; - - /*printf("w=%d, h=%d\n", si->w, si->h);*/ - if (!(si->h | si->w)) - res = VPX_CODEC_UNSUP_BITSTREAM; - } - else - { - res = VPX_CODEC_UNSUP_BITSTREAM; - } - } - - return res; -} - -static vpx_codec_err_t vp8_peek_si(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) { - return vp8_peek_si_internal(data, data_sz, si, NULL, NULL); -} - -static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si) -{ - - unsigned int sz; - - if (si->sz >= sizeof(vp8_stream_info_t)) - sz = sizeof(vp8_stream_info_t); - else - sz = sizeof(vpx_codec_stream_info_t); - - memcpy(si, &ctx->si, sz); - si->sz = sz; - - return VPX_CODEC_OK; -} - - -static vpx_codec_err_t -update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) -{ - vpx_codec_err_t res; - - if ((res = error->error_code)) - ctx->base.err_detail = error->has_detail - ? error->detail - : NULL; - - return res; -} - -static void yuvconfig2image(vpx_image_t *img, - const YV12_BUFFER_CONFIG *yv12, - void *user_priv) -{ - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - img->fmt = VPX_IMG_FMT_I420; - img->w = yv12->y_stride; - img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; - img->d_w = img->r_w = yv12->y_width; - img->d_h = img->r_h = yv12->y_height; - img->x_chroma_shift = 1; - img->y_chroma_shift = 1; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; - img->bit_depth = 8; - img->bps = 12; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} - -static int -update_fragments(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - vpx_codec_err_t *res) -{ - *res = VPX_CODEC_OK; - - if (ctx->fragments.count == 0) - { - /* New frame, reset fragment pointers and sizes */ - memset((void*)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); - memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); - } - if (ctx->fragments.enabled && !(data == NULL && data_sz == 0)) - { - /* Store a pointer to this fragment and return. We haven't - * received the complete frame yet, so we will wait with decoding. - */ - ctx->fragments.ptrs[ctx->fragments.count] = data; - ctx->fragments.sizes[ctx->fragments.count] = data_sz; - ctx->fragments.count++; - if (ctx->fragments.count > (1 << EIGHT_PARTITION) + 1) - { - ctx->fragments.count = 0; - *res = VPX_CODEC_INVALID_PARAM; - return -1; - } - return 0; - } - - if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) - { - return 0; - } - - if (!ctx->fragments.enabled) - { - ctx->fragments.ptrs[0] = data; - ctx->fragments.sizes[0] = data_sz; - ctx->fragments.count = 1; - } - - return 1; -} - -static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline) -{ - vpx_codec_err_t res = VPX_CODEC_OK; - unsigned int resolution_change = 0; - unsigned int w, h; - - if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) - { - return 0; - } - - /* Update the input fragment data */ - if(update_fragments(ctx, data, data_sz, &res) <= 0) - return res; - - /* Determine the stream parameters. Note that we rely on peek_si to - * validate that we have a buffer that does not wrap around the top - * of the heap. - */ - w = ctx->si.w; - h = ctx->si.h; - - res = vp8_peek_si_internal(ctx->fragments.ptrs[0], ctx->fragments.sizes[0], - &ctx->si, ctx->decrypt_cb, ctx->decrypt_state); - - if((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) - { - /* the peek function returns an error for non keyframes, however for - * this case, it is not an error */ - res = VPX_CODEC_OK; - } - - if(!ctx->decoder_init && !ctx->si.is_kf) - res = VPX_CODEC_UNSUP_BITSTREAM; - - if ((ctx->si.h != h) || (ctx->si.w != w)) - resolution_change = 1; - - /* Initialize the decoder instance on the first frame*/ - if (!res && !ctx->decoder_init) - { - VP8D_CONFIG oxcf; - - oxcf.Width = ctx->si.w; - oxcf.Height = ctx->si.h; - oxcf.Version = 9; - oxcf.postprocess = 0; - oxcf.max_threads = ctx->cfg.threads; - oxcf.error_concealment = - (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT); - - /* If postprocessing was enabled by the application and a - * configuration has not been provided, default it. - */ - if (!ctx->postproc_cfg_set - && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { - ctx->postproc_cfg.post_proc_flag = - VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE; - ctx->postproc_cfg.deblocking_level = 4; - ctx->postproc_cfg.noise_level = 0; - } - - res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf); - ctx->decoder_init = 1; - } - - /* Set these even if already initialized. The caller may have changed the - * decrypt config between frames. - */ - if (ctx->decoder_init) { - ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb; - ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state; - } - - if (!res) - { - VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; - if (resolution_change) - { - VP8_COMMON *const pc = & pbi->common; - MACROBLOCKD *const xd = & pbi->mb; -#if CONFIG_MULTITHREAD - int i; -#endif - pc->Width = ctx->si.w; - pc->Height = ctx->si.h; - { - int prev_mb_rows = pc->mb_rows; - - if (setjmp(pbi->common.error.jmp)) - { - pbi->common.error.setjmp = 0; - vp8_clear_system_state(); - /* same return value as used in vp8dx_receive_compressed_data */ - return -1; - } - - pbi->common.error.setjmp = 1; - - if (pc->Width <= 0) - { - pc->Width = w; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame width"); - } - - if (pc->Height <= 0) - { - pc->Height = h; - vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame height"); - } - - if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); - - xd->pre = pc->yv12_fb[pc->lst_fb_idx]; - xd->dst = pc->yv12_fb[pc->new_fb_idx]; - -#if CONFIG_MULTITHREAD - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; - vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); - } -#endif - vp8_build_block_doffsets(&pbi->mb); - - /* allocate memory for last frame MODE_INFO array */ -#if CONFIG_ERROR_CONCEALMENT - - if (pbi->ec_enabled) - { - /* old prev_mip was released by vp8_de_alloc_frame_buffers() - * called in vp8_alloc_frame_buffers() */ - pc->prev_mip = vpx_calloc( - (pc->mb_cols + 1) * (pc->mb_rows + 1), - sizeof(MODE_INFO)); - - if (!pc->prev_mip) - { - vp8_de_alloc_frame_buffers(pc); - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate" - "last frame MODE_INFO array"); - } - - pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; - - if (vp8_alloc_overlap_lists(pbi)) - vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate overlap lists " - "for error concealment"); - } - -#endif - -#if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_rd) - vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); -#else - (void)prev_mb_rows; -#endif - } - - pbi->common.error.setjmp = 0; - - /* required to get past the first get_free_fb() call */ - pbi->common.fb_idx_ref_cnt[0] = 0; - } - - /* update the pbi fragment data */ - pbi->fragments = ctx->fragments; - - ctx->user_priv = user_priv; - if (vp8dx_receive_compressed_data(pbi, data_sz, data, deadline)) - { - res = update_error_state(ctx, &pbi->common.error); - } - - /* get ready for the next series of fragments */ - ctx->fragments.count = 0; - } - - return res; -} - -static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) -{ - vpx_image_t *img = NULL; - - /* iter acts as a flip flop, so an image is only returned on the first - * call to get_frame. - */ - if (!(*iter) && ctx->yv12_frame_buffers.pbi[0]) - { - YV12_BUFFER_CONFIG sd; - int64_t time_stamp = 0, time_end_stamp = 0; - vp8_ppflags_t flags; - vp8_zero(flags); - - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - { - flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag -#if CONFIG_POSTPROC_VISUALIZER - - | ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0) - | ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0) -#endif - ; - flags.deblocking_level = ctx->postproc_cfg.deblocking_level; - flags.noise_level = ctx->postproc_cfg.noise_level; -#if CONFIG_POSTPROC_VISUALIZER - flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag; - flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; - flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; - flags.display_mv_flag = ctx->dbg_display_mv_flag; -#endif - } - - if (0 == vp8dx_get_raw_frame(ctx->yv12_frame_buffers.pbi[0], &sd, - &time_stamp, &time_end_stamp, &flags)) - { - yuvconfig2image(&ctx->img, &sd, ctx->user_priv); - - img = &ctx->img; - *iter = img; - } - } - - return img; -} - -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) -{ - const int y_w = img->d_w; - const int y_h = img->d_h; - const int uv_w = (img->d_w + 1) / 2; - const int uv_h = (img->d_h + 1) / 2; - vpx_codec_err_t res = VPX_CODEC_OK; - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = y_w; - yv12->y_crop_height = y_h; - yv12->y_width = y_w; - yv12->y_height = y_h; - yv12->uv_crop_width = uv_w; - yv12->uv_crop_height = uv_h; - yv12->uv_width = uv_w; - yv12->uv_height = uv_h; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - - yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2; - return res; -} - - -static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - - if (data && !ctx->yv12_frame_buffers.use_frame_threads) - { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; - YV12_BUFFER_CONFIG sd; - - image2yuvconfig(&frame->img, &sd); - - return vp8dx_set_reference(ctx->yv12_frame_buffers.pbi[0], - frame->frame_type, &sd); - } - else - return VPX_CODEC_INVALID_PARAM; - -} - -static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - - if (data && !ctx->yv12_frame_buffers.use_frame_threads) - { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; - YV12_BUFFER_CONFIG sd; - - image2yuvconfig(&frame->img, &sd); - - return vp8dx_get_reference(ctx->yv12_frame_buffers.pbi[0], - frame->frame_type, &sd); - } - else - return VPX_CODEC_INVALID_PARAM; - -} - -static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, - va_list args) -{ -#if CONFIG_POSTPROC - vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - - if (data) - { - ctx->postproc_cfg_set = 1; - ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; - -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - - -static vpx_codec_err_t vp8_set_dbg_color_ref_frame(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_color_ref_frame_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_set_dbg_color_mb_modes(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_color_mb_modes_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_set_dbg_color_b_modes(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_color_b_modes_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_set_dbg_display_mv(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC - ctx->dbg_display_mv_flag = va_arg(args, int); - return VPX_CODEC_OK; -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int *update_info = va_arg(args, int *); - - if (update_info && !ctx->yv12_frame_buffers.use_frame_threads) - { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; - - *update_info = pbi->common.refresh_alt_ref_frame * (int) VP8_ALTR_FRAME - + pbi->common.refresh_golden_frame * (int) VP8_GOLD_FRAME - + pbi->common.refresh_last_frame * (int) VP8_LAST_FRAME; - - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; -} - -extern int vp8dx_references_buffer( VP8_COMMON *oci, int ref_frame ); -static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - int *ref_info = va_arg(args, int *); - - if (ref_info && !ctx->yv12_frame_buffers.use_frame_threads) - { - VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; - VP8_COMMON *oci = &pbi->common; - *ref_info = - (vp8dx_references_buffer( oci, ALTREF_FRAME )?VP8_ALTR_FRAME:0) | - (vp8dx_references_buffer( oci, GOLDEN_FRAME )?VP8_GOLD_FRAME:0) | - (vp8dx_references_buffer( oci, LAST_FRAME )?VP8_LAST_FRAME:0); - - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - - int *corrupted = va_arg(args, int *); - VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; - - if (corrupted && pbi) - { - const YV12_BUFFER_CONFIG *const frame = pbi->common.frame_to_show; - if (frame == NULL) return VPX_CODEC_ERROR; - *corrupted = frame->corrupted; - return VPX_CODEC_OK; - } - else - return VPX_CODEC_INVALID_PARAM; - -} - -static vpx_codec_err_t vp8_set_decryptor(vpx_codec_alg_priv_t *ctx, - va_list args) -{ - vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); - - if (init) - { - ctx->decrypt_cb = init->decrypt_cb; - ctx->decrypt_state = init->decrypt_state; - } - else - { - ctx->decrypt_cb = NULL; - ctx->decrypt_state = NULL; - } - return VPX_CODEC_OK; -} - -vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = -{ - {VP8_SET_REFERENCE, vp8_set_reference}, - {VP8_COPY_REFERENCE, vp8_get_reference}, - {VP8_SET_POSTPROC, vp8_set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_color_ref_frame}, - {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_color_mb_modes}, - {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_color_b_modes}, - {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_display_mv}, - {VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates}, - {VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted}, - {VP8D_GET_LAST_REF_USED, vp8_get_last_ref_frame}, - {VPXD_SET_DECRYPTOR, vp8_set_decryptor}, - { -1, NULL}, -}; - - -#ifndef VERSION_STRING -#define VERSION_STRING -#endif -CODEC_INTERFACE(vpx_codec_vp8_dx) = -{ - "WebM Project VP8 Decoder" VERSION_STRING, - VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC | VP8_CAP_ERROR_CONCEALMENT | - VPX_CODEC_CAP_INPUT_FRAGMENTS, - /* vpx_codec_caps_t caps; */ - vp8_init, /* vpx_codec_init_fn_t init; */ - vp8_destroy, /* vpx_codec_destroy_fn_t destroy; */ - vp8_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ - { - vp8_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ - vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */ - vp8_decode, /* vpx_codec_decode_fn_t decode; */ - vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ - NULL, - }, - { /* encoder functions */ - 0, - NULL, /* vpx_codec_enc_cfg_map_t */ - NULL, /* vpx_codec_encode_fn_t */ - NULL, /* vpx_codec_get_cx_data_fn_t */ - NULL, /* vpx_codec_enc_config_set_fn_t */ - NULL, /* vpx_codec_get_global_headers_fn_t */ - NULL, /* vpx_codec_get_preview_frame_fn_t */ - NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */ - } -}; diff --git a/thirdparty/libvpx/vp8_rtcd.h b/thirdparty/libvpx/vp8_rtcd.h deleted file mode 100644 index c5eeb5e579..0000000000 --- a/thirdparty/libvpx/vp8_rtcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - #include "rtcd/vp8_rtcd_x86.h" -#elif defined(WEBM_ARMASM) && ARCH_ARM - #include "rtcd/vp8_rtcd_arm.h" -#else - #include "rtcd/vp8_rtcd_c.h" -#endif diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c deleted file mode 100644 index 1761fada2f..0000000000 --- a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" -#include "vp9/common/vp9_common.h" - -static int16_t sinpi_1_9 = 0x14a3; -static int16_t sinpi_2_9 = 0x26c9; -static int16_t sinpi_3_9 = 0x3441; -static int16_t sinpi_4_9 = 0x3b6c; -static int16_t cospi_8_64 = 0x3b21; -static int16_t cospi_16_64 = 0x2d41; -static int16_t cospi_24_64 = 0x187e; - -static INLINE void TRANSPOSE4X4( - int16x8_t *q8s16, - int16x8_t *q9s16) { - int32x4_t q8s32, q9s32; - int16x4x2_t d0x2s16, d1x2s16; - int32x4x2_t q0x2s32; - - d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); - d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); - - q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); - q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); - q0x2s32 = vtrnq_s32(q8s32, q9s32); - - *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); - *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); - return; -} - -static INLINE void GENERATE_COSINE_CONSTANTS( - int16x4_t *d0s16, - int16x4_t *d1s16, - int16x4_t *d2s16) { - *d0s16 = vdup_n_s16(cospi_8_64); - *d1s16 = vdup_n_s16(cospi_16_64); - *d2s16 = vdup_n_s16(cospi_24_64); - return; -} - -static INLINE void GENERATE_SINE_CONSTANTS( - int16x4_t *d3s16, - int16x4_t *d4s16, - int16x4_t *d5s16, - int16x8_t *q3s16) { - *d3s16 = vdup_n_s16(sinpi_1_9); - *d4s16 = vdup_n_s16(sinpi_2_9); - *q3s16 = vdupq_n_s16(sinpi_3_9); - *d5s16 = vdup_n_s16(sinpi_4_9); - return; -} - -static INLINE void IDCT4x4_1D( - int16x4_t *d0s16, - int16x4_t *d1s16, - int16x4_t *d2s16, - int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; - int16x4_t d26s16, d27s16, d28s16, d29s16; - int32x4_t q10s32, q13s32, q14s32, q15s32; - int16x8_t q13s16, q14s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, *d2s16); - q10s32 = vmull_s16(d17s16, *d0s16); - q13s32 = vmull_s16(d23s16, *d1s16); - q14s32 = vmull_s16(d24s16, *d1s16); - q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); - q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q10s32, 14); - - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - *q8s16 = vaddq_s16(q13s16, q14s16); - *q9s16 = vsubq_s16(q13s16, q14s16); - *q9s16 = vcombine_s16(vget_high_s16(*q9s16), - vget_low_s16(*q9s16)); // vswp - return; -} - -static INLINE void IADST4x4_1D( - int16x4_t *d3s16, - int16x4_t *d4s16, - int16x4_t *d5s16, - int16x8_t *q3s16, - int16x8_t *q8s16, - int16x8_t *q9s16) { - int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; - int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d6s16 = vget_low_s16(*q3s16); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - - q10s32 = vmull_s16(*d3s16, d16s16); - q11s32 = vmull_s16(*d4s16, d16s16); - q12s32 = vmull_s16(d6s16, d17s16); - q13s32 = vmull_s16(*d5s16, d18s16); - q14s32 = vmull_s16(*d3s16, d18s16); - q15s32 = vmovl_s16(d16s16); - q15s32 = vaddw_s16(q15s32, d19s16); - q8s32 = vmull_s16(*d4s16, d19s16); - q15s32 = vsubw_s16(q15s32, d18s16); - q9s32 = vmull_s16(*d5s16, d19s16); - - q10s32 = vaddq_s32(q10s32, q13s32); - q10s32 = vaddq_s32(q10s32, q8s32); - q11s32 = vsubq_s32(q11s32, q14s32); - q8s32 = vdupq_n_s32(sinpi_3_9); - q11s32 = vsubq_s32(q11s32, q9s32); - q15s32 = vmulq_s32(q15s32, q8s32); - - q13s32 = vaddq_s32(q10s32, q12s32); - q10s32 = vaddq_s32(q10s32, q11s32); - q14s32 = vaddq_s32(q11s32, q12s32); - q10s32 = vsubq_s32(q10s32, q12s32); - - d16s16 = vqrshrn_n_s32(q13s32, 14); - d17s16 = vqrshrn_n_s32(q14s32, 14); - d18s16 = vqrshrn_n_s32(q15s32, 14); - d19s16 = vqrshrn_n_s32(q10s32, 14); - - *q8s16 = vcombine_s16(d16s16, d17s16); - *q9s16 = vcombine_s16(d18s16, d19s16); - return; -} - -void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { - uint8x8_t d26u8, d27u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; - uint32x2_t d26u32, d27u32; - int16x8_t q3s16, q8s16, q9s16; - uint16x8_t q8u16, q9u16; - - d26u32 = d27u32 = vdup_n_u32(0); - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - - TRANSPOSE4X4(&q8s16, &q9s16); - - switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp9_iht4x4_16_add_c(input, dest, dest_stride, tx_type); - return; - break; - case 1: // iadst_idct - // generate constants - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - break; - case 2: // idct_iadst - // generate constantsyy - GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); - break; - case 3: // iadst_iadst - // generate constants - GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); - - // first transform rows - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - - // transpose the matrix - TRANSPOSE4X4(&q8s16, &q9s16); - - // then transform columns - IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); - break; - default: // iadst_idct - assert(0); - break; - } - - q8s16 = vrshrq_n_s16(q8s16, 4); - q9s16 = vrshrq_n_s16(q9s16, 4); - - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); - dest += dest_stride; - d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); - dest += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); - dest += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); - - d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); - dest -= dest_stride; - vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); - return; -} diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c deleted file mode 100644 index 04b342c3d3..0000000000 --- a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "./vp9_rtcd.h" -#include "./vpx_config.h" -#include "vp9/common/vp9_common.h" - -static int16_t cospi_2_64 = 16305; -static int16_t cospi_4_64 = 16069; -static int16_t cospi_6_64 = 15679; -static int16_t cospi_8_64 = 15137; -static int16_t cospi_10_64 = 14449; -static int16_t cospi_12_64 = 13623; -static int16_t cospi_14_64 = 12665; -static int16_t cospi_16_64 = 11585; -static int16_t cospi_18_64 = 10394; -static int16_t cospi_20_64 = 9102; -static int16_t cospi_22_64 = 7723; -static int16_t cospi_24_64 = 6270; -static int16_t cospi_26_64 = 4756; -static int16_t cospi_28_64 = 3196; -static int16_t cospi_30_64 = 1606; - -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; -} - -static INLINE void IDCT8x8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d26s16, d2s16); - q6s32 = vmull_s16(d27s16, d2s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); - q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q2s32 = vmull_s16(d18s16, d1s16); - q3s32 = vmull_s16(d19s16, d1s16); - q9s32 = vmull_s16(d26s16, d3s16); - q13s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlal_s16(q2s32, d30s16, d0s16); - q3s32 = vmlal_s16(q3s32, d31s16, d0s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q13s32 = vmlal_s16(q13s32, d23s16, d2s16); - - d14s16 = vqrshrn_n_s32(q2s32, 14); - d15s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q13s32, 14); - q6s16 = vcombine_s16(d12s16, d13s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d0s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d0s16); - q3s32 = vmull_s16(d17s16, d0s16); - q13s32 = vmull_s16(d16s16, d0s16); - q15s32 = vmull_s16(d17s16, d0s16); - - q2s32 = vmlal_s16(q2s32, d24s16, d0s16); - q3s32 = vmlal_s16(q3s32, d25s16, d0s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); - q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); - - d0s16 = vdup_n_s16(cospi_24_64); - d1s16 = vdup_n_s16(cospi_8_64); - - d18s16 = vqrshrn_n_s32(q2s32, 14); - d19s16 = vqrshrn_n_s32(q3s32, 14); - d22s16 = vqrshrn_n_s32(q13s32, 14); - d23s16 = vqrshrn_n_s32(q15s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q2s32 = vmull_s16(d20s16, d0s16); - q3s32 = vmull_s16(d21s16, d0s16); - q8s32 = vmull_s16(d20s16, d1s16); - q12s32 = vmull_s16(d21s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); - q8s32 = vmlal_s16(q8s32, d28s16, d0s16); - q12s32 = vmlal_s16(q12s32, d29s16, d0s16); - - d26s16 = vqrshrn_n_s32(q2s32, 14); - d27s16 = vqrshrn_n_s32(q3s32, 14); - d30s16 = vqrshrn_n_s32(q8s32, 14); - d31s16 = vqrshrn_n_s32(q12s32, 14); - *q13s16 = vcombine_s16(d26s16, d27s16); - *q15s16 = vcombine_s16(d30s16, d31s16); - - q0s16 = vaddq_s16(*q9s16, *q15s16); - q1s16 = vaddq_s16(*q11s16, *q13s16); - q2s16 = vsubq_s16(*q11s16, *q13s16); - q3s16 = vsubq_s16(*q9s16, *q15s16); - - *q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - *q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - *q8s16 = vaddq_s16(q0s16, q7s16); - *q9s16 = vaddq_s16(q1s16, q6s16); - *q10s16 = vaddq_s16(q2s16, q5s16); - *q11s16 = vaddq_s16(q3s16, q4s16); - *q12s16 = vsubq_s16(q3s16, q4s16); - *q13s16 = vsubq_s16(q2s16, q5s16); - *q14s16 = vsubq_s16(q1s16, q6s16); - *q15s16 = vsubq_s16(q0s16, q7s16); - return; -} - -static INLINE void IADST8X8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q2s16, q4s16, q5s16, q6s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; - int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - d14s16 = vdup_n_s16(cospi_2_64); - d15s16 = vdup_n_s16(cospi_30_64); - - q1s32 = vmull_s16(d30s16, d14s16); - q2s32 = vmull_s16(d31s16, d14s16); - q3s32 = vmull_s16(d30s16, d15s16); - q4s32 = vmull_s16(d31s16, d15s16); - - d30s16 = vdup_n_s16(cospi_18_64); - d31s16 = vdup_n_s16(cospi_14_64); - - q1s32 = vmlal_s16(q1s32, d16s16, d15s16); - q2s32 = vmlal_s16(q2s32, d17s16, d15s16); - q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); - q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); - - q5s32 = vmull_s16(d22s16, d30s16); - q6s32 = vmull_s16(d23s16, d30s16); - q7s32 = vmull_s16(d22s16, d31s16); - q8s32 = vmull_s16(d23s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d24s16, d31s16); - q6s32 = vmlal_s16(q6s32, d25s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); - q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); - - q11s32 = vaddq_s32(q1s32, q5s32); - q12s32 = vaddq_s32(q2s32, q6s32); - q1s32 = vsubq_s32(q1s32, q5s32); - q2s32 = vsubq_s32(q2s32, q6s32); - - d22s16 = vqrshrn_n_s32(q11s32, 14); - d23s16 = vqrshrn_n_s32(q12s32, 14); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q12s32 = vaddq_s32(q3s32, q7s32); - q15s32 = vaddq_s32(q4s32, q8s32); - q3s32 = vsubq_s32(q3s32, q7s32); - q4s32 = vsubq_s32(q4s32, q8s32); - - d2s16 = vqrshrn_n_s32(q1s32, 14); - d3s16 = vqrshrn_n_s32(q2s32, 14); - d24s16 = vqrshrn_n_s32(q12s32, 14); - d25s16 = vqrshrn_n_s32(q15s32, 14); - d6s16 = vqrshrn_n_s32(q3s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - *q12s16 = vcombine_s16(d24s16, d25s16); - - d0s16 = vdup_n_s16(cospi_10_64); - d1s16 = vdup_n_s16(cospi_22_64); - q4s32 = vmull_s16(d26s16, d0s16); - q5s32 = vmull_s16(d27s16, d0s16); - q2s32 = vmull_s16(d26s16, d1s16); - q6s32 = vmull_s16(d27s16, d1s16); - - d30s16 = vdup_n_s16(cospi_26_64); - d31s16 = vdup_n_s16(cospi_6_64); - - q4s32 = vmlal_s16(q4s32, d20s16, d1s16); - q5s32 = vmlal_s16(q5s32, d21s16, d1s16); - q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); - q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); - - q0s32 = vmull_s16(d18s16, d30s16); - q13s32 = vmull_s16(d19s16, d30s16); - - q0s32 = vmlal_s16(q0s32, d28s16, d31s16); - q13s32 = vmlal_s16(q13s32, d29s16, d31s16); - - q10s32 = vmull_s16(d18s16, d31s16); - q9s32 = vmull_s16(d19s16, d31s16); - - q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); - q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); - - q14s32 = vaddq_s32(q2s32, q10s32); - q15s32 = vaddq_s32(q6s32, q9s32); - q2s32 = vsubq_s32(q2s32, q10s32); - q6s32 = vsubq_s32(q6s32, q9s32); - - d28s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d4s16 = vqrshrn_n_s32(q2s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - q9s32 = vaddq_s32(q4s32, q0s32); - q10s32 = vaddq_s32(q5s32, q13s32); - q4s32 = vsubq_s32(q4s32, q0s32); - q5s32 = vsubq_s32(q5s32, q13s32); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - d18s16 = vqrshrn_n_s32(q9s32, 14); - d19s16 = vqrshrn_n_s32(q10s32, 14); - d8s16 = vqrshrn_n_s32(q4s32, 14); - d9s16 = vqrshrn_n_s32(q5s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q5s32 = vmull_s16(d2s16, d30s16); - q6s32 = vmull_s16(d3s16, d30s16); - q7s32 = vmull_s16(d2s16, d31s16); - q0s32 = vmull_s16(d3s16, d31s16); - - q5s32 = vmlal_s16(q5s32, d6s16, d31s16); - q6s32 = vmlal_s16(q6s32, d7s16, d31s16); - q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); - - q1s32 = vmull_s16(d4s16, d30s16); - q3s32 = vmull_s16(d5s16, d30s16); - q10s32 = vmull_s16(d4s16, d31s16); - q2s32 = vmull_s16(d5s16, d31s16); - - q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); - q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); - q10s32 = vmlal_s16(q10s32, d8s16, d30s16); - q2s32 = vmlal_s16(q2s32, d9s16, d30s16); - - *q8s16 = vaddq_s16(*q11s16, *q9s16); - *q11s16 = vsubq_s16(*q11s16, *q9s16); - q4s16 = vaddq_s16(*q12s16, *q14s16); - *q12s16 = vsubq_s16(*q12s16, *q14s16); - - q14s32 = vaddq_s32(q5s32, q1s32); - q15s32 = vaddq_s32(q6s32, q3s32); - q5s32 = vsubq_s32(q5s32, q1s32); - q6s32 = vsubq_s32(q6s32, q3s32); - - d18s16 = vqrshrn_n_s32(q14s32, 14); - d19s16 = vqrshrn_n_s32(q15s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - - q1s32 = vaddq_s32(q7s32, q10s32); - q3s32 = vaddq_s32(q0s32, q2s32); - q7s32 = vsubq_s32(q7s32, q10s32); - q0s32 = vsubq_s32(q0s32, q2s32); - - d28s16 = vqrshrn_n_s32(q1s32, 14); - d29s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q7s32, 14); - d15s16 = vqrshrn_n_s32(q0s32, 14); - *q14s16 = vcombine_s16(d28s16, d29s16); - - d30s16 = vdup_n_s16(cospi_16_64); - - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - q2s32 = vmull_s16(d22s16, d30s16); - q3s32 = vmull_s16(d23s16, d30s16); - q13s32 = vmull_s16(d22s16, d30s16); - q1s32 = vmull_s16(d23s16, d30s16); - - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - q2s32 = vmlal_s16(q2s32, d24s16, d30s16); - q3s32 = vmlal_s16(q3s32, d25s16, d30s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); - q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); - - d4s16 = vqrshrn_n_s32(q2s32, 14); - d5s16 = vqrshrn_n_s32(q3s32, 14); - d24s16 = vqrshrn_n_s32(q13s32, 14); - d25s16 = vqrshrn_n_s32(q1s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - *q12s16 = vcombine_s16(d24s16, d25s16); - - q13s32 = vmull_s16(d10s16, d30s16); - q1s32 = vmull_s16(d11s16, d30s16); - q11s32 = vmull_s16(d10s16, d30s16); - q0s32 = vmull_s16(d11s16, d30s16); - - q13s32 = vmlal_s16(q13s32, d14s16, d30s16); - q1s32 = vmlal_s16(q1s32, d15s16, d30s16); - q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); - q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); - - d20s16 = vqrshrn_n_s32(q13s32, 14); - d21s16 = vqrshrn_n_s32(q1s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q0s32, 14); - *q10s16 = vcombine_s16(d20s16, d21s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - q5s16 = vdupq_n_s16(0); - - *q9s16 = vsubq_s16(q5s16, *q9s16); - *q11s16 = vsubq_s16(q5s16, q2s16); - *q13s16 = vsubq_s16(q5s16, q6s16); - *q15s16 = vsubq_s16(q5s16, q4s16); - return; -} - -void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, - int dest_stride, int tx_type) { - int i; - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 8 * 2); - q11s16 = vld1q_s16(input + 8 * 3); - q12s16 = vld1q_s16(input + 8 * 4); - q13s16 = vld1q_s16(input + 8 * 5); - q14s16 = vld1q_s16(input + 8 * 6); - q15s16 = vld1q_s16(input + 8 * 7); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - switch (tx_type) { - case 0: // idct_idct is not supported. Fall back to C - vp9_iht8x8_64_add_c(input, dest, dest_stride, tx_type); - return; - break; - case 1: // iadst_idct - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // first transform rows - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - case 2: // idct_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // generate IDCT constants - // GENERATE_IDCT_CONSTANTS - - // then transform columns - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - case 3: // iadst_iadst - // generate IADST constants - // GENERATE_IADST_CONSTANTS - - // first transform rows - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // transpose the matrix - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // then transform columns - IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - break; - default: // iadst_idct - assert(0); - break; - } - - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - for (d1 = d2 = dest, i = 0; i < 2; i++) { - if (i != 0) { - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - } - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - } - return; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.c b/thirdparty/libvpx/vp9/common/vp9_alloccommon.c deleted file mode 100644 index 7dd1005d3f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_alloccommon.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_onyxc_int.h" - -// TODO(hkuang): Don't need to lock the whole pool after implementing atomic -// frame reference count. -void lock_buffer_pool(BufferPool *const pool) { -#if CONFIG_MULTITHREAD - pthread_mutex_lock(&pool->pool_mutex); -#else - (void)pool; -#endif -} - -void unlock_buffer_pool(BufferPool *const pool) { -#if CONFIG_MULTITHREAD - pthread_mutex_unlock(&pool->pool_mutex); -#else - (void)pool; -#endif -} - -void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { - const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); - const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); - - cm->mi_cols = aligned_width >> MI_SIZE_LOG2; - cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mi_stride = calc_mi_size(cm->mi_cols); - - cm->mb_cols = (cm->mi_cols + 1) >> 1; - cm->mb_rows = (cm->mi_rows + 1) >> 1; - cm->MBs = cm->mb_rows * cm->mb_cols; -} - -static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { - int i; - - for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { - cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); - if (cm->seg_map_array[i] == NULL) - return 1; - } - cm->seg_map_alloc_size = seg_map_size; - - // Init the index. - cm->seg_map_idx = 0; - cm->prev_seg_map_idx = 1; - - cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; - if (!cm->frame_parallel_decode) - cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; - - return 0; -} - -static void free_seg_map(VP9_COMMON *cm) { - int i; - - for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { - vpx_free(cm->seg_map_array[i]); - cm->seg_map_array[i] = NULL; - } - - cm->current_frame_seg_map = NULL; - - if (!cm->frame_parallel_decode) { - cm->last_frame_seg_map = NULL; - } -} - -void vp9_free_ref_frame_buffers(BufferPool *pool) { - int i; - - for (i = 0; i < FRAME_BUFFERS; ++i) { - if (pool->frame_bufs[i].ref_count > 0 && - pool->frame_bufs[i].raw_frame_buffer.data != NULL) { - pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); - pool->frame_bufs[i].ref_count = 0; - } - vpx_free(pool->frame_bufs[i].mvs); - pool->frame_bufs[i].mvs = NULL; - vpx_free_frame_buffer(&pool->frame_bufs[i].buf); - } -} - -void vp9_free_postproc_buffers(VP9_COMMON *cm) { -#if CONFIG_VP9_POSTPROC - vpx_free_frame_buffer(&cm->post_proc_buffer); - vpx_free_frame_buffer(&cm->post_proc_buffer_int); -#else - (void)cm; -#endif -} - -void vp9_free_context_buffers(VP9_COMMON *cm) { - cm->free_mi(cm); - free_seg_map(cm); - vpx_free(cm->above_context); - cm->above_context = NULL; - vpx_free(cm->above_seg_context); - cm->above_seg_context = NULL; - vpx_free(cm->lf.lfm); - cm->lf.lfm = NULL; -} - - -int vp9_alloc_loop_filter(VP9_COMMON *cm) { - vpx_free(cm->lf.lfm); - // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The - // stride and rows are rounded up / truncated to a multiple of 8. - cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; - cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( - ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, - sizeof(*cm->lf.lfm)); - if (!cm->lf.lfm) - return 1; - return 0; -} - -int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { - int new_mi_size; - - vp9_set_mb_mi(cm, width, height); - new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); - if (cm->mi_alloc_size < new_mi_size) { - cm->free_mi(cm); - if (cm->alloc_mi(cm, new_mi_size)) - goto fail; - } - - if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { - // Create the segmentation map structure and set to 0. - free_seg_map(cm); - if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) - goto fail; - } - - if (cm->above_context_alloc_cols < cm->mi_cols) { - vpx_free(cm->above_context); - cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( - 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, - sizeof(*cm->above_context)); - if (!cm->above_context) goto fail; - - vpx_free(cm->above_seg_context); - cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( - mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); - if (!cm->above_seg_context) goto fail; - cm->above_context_alloc_cols = cm->mi_cols; - } - - if (vp9_alloc_loop_filter(cm)) - goto fail; - - return 0; - - fail: - vp9_free_context_buffers(cm); - return 1; -} - -void vp9_remove_common(VP9_COMMON *cm) { - vp9_free_context_buffers(cm); - - vpx_free(cm->fc); - cm->fc = NULL; - vpx_free(cm->frame_contexts); - cm->frame_contexts = NULL; -} - -void vp9_init_context_buffers(VP9_COMMON *cm) { - cm->setup_mi(cm); - if (cm->last_frame_seg_map && !cm->frame_parallel_decode) - memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); -} - -void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { - // Swap indices. - const int tmp = cm->seg_map_idx; - cm->seg_map_idx = cm->prev_seg_map_idx; - cm->prev_seg_map_idx = tmp; - - cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; - cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.h b/thirdparty/libvpx/vp9/common/vp9_alloccommon.h deleted file mode 100644 index e53955b998..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_alloccommon.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ -#define VP9_COMMON_VP9_ALLOCCOMMON_H_ - -#define INVALID_IDX -1 // Invalid buffer index. - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct BufferPool; - -void vp9_remove_common(struct VP9Common *cm); - -int vp9_alloc_loop_filter(struct VP9Common *cm); -int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); -void vp9_init_context_buffers(struct VP9Common *cm); -void vp9_free_context_buffers(struct VP9Common *cm); - -void vp9_free_ref_frame_buffers(struct BufferPool *pool); -void vp9_free_postproc_buffers(struct VP9Common *cm); - -int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); -void vp9_free_state_buffers(struct VP9Common *cm); - -void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); - -void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.c b/thirdparty/libvpx/vp9/common/vp9_blockd.c deleted file mode 100644 index 7bab27d4fd..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_blockd.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_blockd.h" - -PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b) { - if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(left_mi)) - return DC_PRED; - - return get_y_mode(left_mi, b + 1); - } else { - assert(b == 1 || b == 3); - return cur_mi->bmi[b - 1].as_mode; - } -} - -PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b) { - if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(above_mi)) - return DC_PRED; - - return get_y_mode(above_mi, b + 2); - } else { - assert(b == 2 || b == 3); - return cur_mi->bmi[b - 2].as_mode; - } -} - -void vp9_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO* mi = xd->mi[0]; - // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") - // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) - : mi->tx_size; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int step = 1 << (tx_size << 1); - int i = 0, r, c; - - // If mb_to_right_edge is < 0 we are in a situation in which - // the current block size extends into the UMV and we won't - // visit the sub blocks that are wholly within the UMV. - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : - xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : - xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step; - - // Keep track of the row and column of the blocks we use so that we know - // if we are in the unrestricted motion border. - for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { - // Skip visiting the sub blocks that are wholly within the UMV. - for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { - visit(plane, i, plane_bsize, tx_size, arg); - i += step; - } - i += extra_step; - } -} - -void vp9_foreach_transformed_block(const MACROBLOCKD* const xd, - BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, - void *arg) { - int plane; - - for (plane = 0; plane < MAX_MB_PLANE; ++plane) - vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); -} - -void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, - int aoff, int loff) { - ENTROPY_CONTEXT *const a = pd->above_context + aoff; - ENTROPY_CONTEXT *const l = pd->left_context + loff; - const int tx_size_in_blocks = 1 << tx_size; - - // above - if (has_eob && xd->mb_to_right_edge < 0) { - int i; - const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] + - (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - int above_contexts = tx_size_in_blocks; - if (above_contexts + aoff > blocks_wide) - above_contexts = blocks_wide - aoff; - - for (i = 0; i < above_contexts; ++i) - a[i] = has_eob; - for (i = above_contexts; i < tx_size_in_blocks; ++i) - a[i] = 0; - } else { - memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); - } - - // left - if (has_eob && xd->mb_to_bottom_edge < 0) { - int i; - const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + - (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - int left_contexts = tx_size_in_blocks; - if (left_contexts + loff > blocks_high) - left_contexts = blocks_high - loff; - - for (i = 0; i < left_contexts; ++i) - l[i] = has_eob; - for (i = left_contexts; i < tx_size_in_blocks; ++i) - l[i] = 0; - } else { - memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); - } -} - -void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].subsampling_x = i ? ss_x : 0; - xd->plane[i].subsampling_y = i ? ss_y : 0; - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.h b/thirdparty/libvpx/vp9/common/vp9_blockd.h deleted file mode 100644 index 3d26fb2b5d..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_blockd.h +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_BLOCKD_H_ -#define VP9_COMMON_VP9_BLOCKD_H_ - -#include "./vpx_config.h" - -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" -#include "vpx_scale/yv12config.h" - -#include "vp9/common/vp9_common_data.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_mv.h" -#include "vp9/common/vp9_scale.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_tile_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_MB_PLANE 3 - -typedef enum { - KEY_FRAME = 0, - INTER_FRAME = 1, - FRAME_TYPES, -} FRAME_TYPE; - -static INLINE int is_inter_mode(PREDICTION_MODE mode) { - return mode >= NEARESTMV && mode <= NEWMV; -} - -/* For keyframes, intra block modes are predicted by the (already decoded) - modes for the Y blocks to the left and above us; for interframes, there - is a single probability table. */ - -typedef struct { - PREDICTION_MODE as_mode; - int_mv as_mv[2]; // first, second inter predictor motion vectors -} b_mode_info; - -// Note that the rate-distortion optimization loop, bit-stream writer, and -// decoder implementation modules critically rely on the defined entry values -// specified herein. They should be refactored concurrently. - -#define NONE -1 -#define INTRA_FRAME 0 -#define LAST_FRAME 1 -#define GOLDEN_FRAME 2 -#define ALTREF_FRAME 3 -#define MAX_REF_FRAMES 4 -typedef int8_t MV_REFERENCE_FRAME; - -// This structure now relates to 8x8 block regions. -typedef struct MODE_INFO { - // Common for both INTER and INTRA blocks - BLOCK_SIZE sb_type; - PREDICTION_MODE mode; - TX_SIZE tx_size; - int8_t skip; - int8_t segment_id; - int8_t seg_id_predicted; // valid only when temporal_update is enabled - - // Only for INTRA blocks - PREDICTION_MODE uv_mode; - - // Only for INTER blocks - INTERP_FILTER interp_filter; - MV_REFERENCE_FRAME ref_frame[2]; - - // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. - int_mv mv[2]; - - b_mode_info bmi[4]; -} MODE_INFO; - -static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { - return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode - : mi->mode; -} - -static INLINE int is_inter_block(const MODE_INFO *mi) { - return mi->ref_frame[0] > INTRA_FRAME; -} - -static INLINE int has_second_ref(const MODE_INFO *mi) { - return mi->ref_frame[1] > INTRA_FRAME; -} - -PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *left_mi, int b); - -PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, - const MODE_INFO *above_mi, int b); - -enum mv_precision { - MV_PRECISION_Q3, - MV_PRECISION_Q4 -}; - -struct buf_2d { - uint8_t *buf; - int stride; -}; - -struct macroblockd_plane { - tran_low_t *dqcoeff; - int subsampling_x; - int subsampling_y; - struct buf_2d dst; - struct buf_2d pre[2]; - ENTROPY_CONTEXT *above_context; - ENTROPY_CONTEXT *left_context; - int16_t seg_dequant[MAX_SEGMENTS][2]; - - // number of 4x4s in current block - uint16_t n4_w, n4_h; - // log2 of n4_w, n4_h - uint8_t n4_wl, n4_hl; - - // encoder - const int16_t *dequant; -}; - -#define BLOCK_OFFSET(x, i) ((x) + (i) * 16) - -typedef struct RefBuffer { - // TODO(dkovalev): idx is not really required and should be removed, now it - // is used in vp9_onyxd_if.c - int idx; - YV12_BUFFER_CONFIG *buf; - struct scale_factors sf; -} RefBuffer; - -typedef struct macroblockd { - struct macroblockd_plane plane[MAX_MB_PLANE]; - uint8_t bmode_blocks_wl; - uint8_t bmode_blocks_hl; - - FRAME_COUNTS *counts; - TileInfo tile; - - int mi_stride; - - MODE_INFO **mi; - MODE_INFO *left_mi; - MODE_INFO *above_mi; - - unsigned int max_blocks_wide; - unsigned int max_blocks_high; - - const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; - - /* Distance of MB away from frame edges */ - int mb_to_left_edge; - int mb_to_right_edge; - int mb_to_top_edge; - int mb_to_bottom_edge; - - FRAME_CONTEXT *fc; - - /* pointers to reference frames */ - RefBuffer *block_refs[2]; - - /* pointer to current frame */ - const YV12_BUFFER_CONFIG *cur_buf; - - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; - - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; - -#if CONFIG_VP9_HIGHBITDEPTH - /* Bit depth: 8, 10, 12 */ - int bd; -#endif - - int lossless; - int corrupted; - - struct vpx_internal_error_info *error_info; -} MACROBLOCKD; - -static INLINE PLANE_TYPE get_plane_type(int plane) { - return (PLANE_TYPE)(plane > 0); -} - -static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, - PARTITION_TYPE partition) { - return subsize_lookup[partition][bsize]; -} - -extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; - -static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, - const MACROBLOCKD *xd) { - const MODE_INFO *const mi = xd->mi[0]; - - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) - return DCT_DCT; - - return intra_mode_to_tx_type_lookup[mi->mode]; -} - -static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, - const MACROBLOCKD *xd, int ib) { - const MODE_INFO *const mi = xd->mi[0]; - - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) - return DCT_DCT; - - return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; -} - -void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); - -static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, - int xss, int yss) { - if (bsize < BLOCK_8X8) { - return TX_4X4; - } else { - const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss]; - return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]); - } -} - -static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, - const struct macroblockd_plane *pd) { - return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x, - pd->subsampling_y); -} - -static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd) { - return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; -} - -static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - struct macroblockd_plane *const pd = &xd->plane[i]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - memset(pd->above_context, 0, - sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]); - memset(pd->left_context, 0, - sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]); - } -} - -static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, - const MODE_INFO *above_mi, - const MODE_INFO *left_mi, - int block) { - const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); - const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); - return vp9_kf_y_mode_prob[above][left]; -} - -typedef void (*foreach_transformed_block_visitor)(int plane, int block, - BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, - void *arg); - -void vp9_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg); - - -void vp9_foreach_transformed_block( - const MACROBLOCKD* const xd, BLOCK_SIZE bsize, - foreach_transformed_block_visitor visit, void *arg); - -static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, int block, - int *x, int *y) { - const int bwl = b_width_log2_lookup[plane_bsize]; - const int tx_cols_log2 = bwl - tx_size; - const int tx_cols = 1 << tx_cols_log2; - const int raster_mb = block >> (tx_size << 1); - *x = (raster_mb & (tx_cols - 1)) << tx_size; - *y = (raster_mb >> tx_cols_log2) << tx_size; -} - -void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, - int aoff, int loff); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common.h b/thirdparty/libvpx/vp9/common/vp9_common.h deleted file mode 100644 index 908fa80a31..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_common.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_COMMON_H_ -#define VP9_COMMON_VP9_COMMON_H_ - -/* Interface header for common constant data structures and lookup tables */ - -#include <assert.h> - -#include "./vpx_config.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/bitops.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Only need this for fixed-size arrays, for structs just assign. -#define vp9_copy(dest, src) { \ - assert(sizeof(dest) == sizeof(src)); \ - memcpy(dest, src, sizeof(src)); \ - } - -// Use this for variably-sized arrays. -#define vp9_copy_array(dest, src, n) { \ - assert(sizeof(*dest) == sizeof(*src)); \ - memcpy(dest, src, n * sizeof(*src)); \ - } - -#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) -#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest)) - -static INLINE int get_unsigned_bits(unsigned int num_values) { - return num_values > 0 ? get_msb(num_values) + 1 : 0; -} - -#if CONFIG_DEBUG -#define CHECK_MEM_ERROR(cm, lval, expr) do { \ - lval = (expr); \ - if (!lval) \ - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ - "Failed to allocate "#lval" at %s:%d", \ - __FILE__, __LINE__); \ - } while (0) -#else -#define CHECK_MEM_ERROR(cm, lval, expr) do { \ - lval = (expr); \ - if (!lval) \ - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ - "Failed to allocate "#lval); \ - } while (0) -#endif - -#define VP9_SYNC_CODE_0 0x49 -#define VP9_SYNC_CODE_1 0x83 -#define VP9_SYNC_CODE_2 0x42 - -#define VP9_FRAME_MARKER 0x2 - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.c b/thirdparty/libvpx/vp9/common/vp9_common_data.c deleted file mode 100644 index 3409d04844..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_common_data.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_common_data.h" -#include "vpx_dsp/vpx_dsp_common.h" - -// Log 2 conversion lookup tables for block width and height -const uint8_t b_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; -const uint8_t b_height_log2_lookup[BLOCK_SIZES] = - {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; -const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16}; -const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = - {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16}; -// Log 2 conversion lookup tables for modeinfo width and height -const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = - {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; -const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; -const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = - {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; - -// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) -const uint8_t size_group_lookup[BLOCK_SIZES] = - {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; - -const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = - {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; - -const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { - { // 4X4 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID - }, { // 8X8 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID - }, { // 16X16 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID - }, { // 32X32 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, - PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, - PARTITION_INVALID, PARTITION_INVALID - }, { // 64X64 - // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, - PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, - PARTITION_NONE - } -}; - -const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { - { // PARTITION_NONE - BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, - BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, - BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, - BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, - BLOCK_64X64, - }, { // PARTITION_HORZ - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_64X32, - }, { // PARTITION_VERT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X64, - }, { // PARTITION_SPLIT - BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, - BLOCK_32X32, - } -}; - -const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { - TX_4X4, TX_4X4, TX_4X4, - TX_8X8, TX_8X8, TX_8X8, - TX_16X16, TX_16X16, TX_16X16, - TX_32X32, TX_32X32, TX_32X32, TX_32X32 -}; - -const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { - BLOCK_4X4, // TX_4X4 - BLOCK_8X8, // TX_8X8 - BLOCK_16X16, // TX_16X16 - BLOCK_32X32, // TX_32X32 -}; - -const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { - TX_4X4, // ONLY_4X4 - TX_8X8, // ALLOW_8X8 - TX_16X16, // ALLOW_16X16 - TX_32X32, // ALLOW_32X32 - TX_32X32, // TX_MODE_SELECT -}; - -const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { -// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 -// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 - {{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}}, - {{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}}, - {{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}}, - {{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}}, - {{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}}, - {{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}}, - {{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}}, - {{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}}, - {{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}}, - {{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}}, - {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, - {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, - {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, -}; - -// Generates 4 bit field in which each bit set to 1 represents -// a blocksize partition 1111 means we split 64x64, 32x32, 16x16 -// and 8x8. 1000 means we just split the 64x64 to 32x32 -const struct { - PARTITION_CONTEXT above; - PARTITION_CONTEXT left; -} partition_context_lookup[BLOCK_SIZES]= { - {15, 15}, // 4X4 - {0b1111, 0b1111} - {15, 14}, // 4X8 - {0b1111, 0b1110} - {14, 15}, // 8X4 - {0b1110, 0b1111} - {14, 14}, // 8X8 - {0b1110, 0b1110} - {14, 12}, // 8X16 - {0b1110, 0b1100} - {12, 14}, // 16X8 - {0b1100, 0b1110} - {12, 12}, // 16X16 - {0b1100, 0b1100} - {12, 8 }, // 16X32 - {0b1100, 0b1000} - {8, 12}, // 32X16 - {0b1000, 0b1100} - {8, 8 }, // 32X32 - {0b1000, 0b1000} - {8, 0 }, // 32X64 - {0b1000, 0b0000} - {0, 8 }, // 64X32 - {0b0000, 0b1000} - {0, 0 }, // 64X64 - {0b0000, 0b0000} -}; - -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH -const uint8_t need_top_left[INTRA_MODES] = { - 0, // DC_PRED - 0, // V_PRED - 0, // H_PRED - 0, // D45_PRED - 1, // D135_PRED - 1, // D117_PRED - 1, // D153_PRED - 0, // D207_PRED - 0, // D63_PRED - 1, // TM_PRED -}; -#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.h b/thirdparty/libvpx/vp9/common/vp9_common_data.h deleted file mode 100644 index 0ae24dad54..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_common_data.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_COMMON_DATA_H_ -#define VP9_COMMON_VP9_COMMON_DATA_H_ - -#include "vp9/common/vp9_enums.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern const uint8_t b_width_log2_lookup[BLOCK_SIZES]; -extern const uint8_t b_height_log2_lookup[BLOCK_SIZES]; -extern const uint8_t mi_width_log2_lookup[BLOCK_SIZES]; -extern const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES]; -extern const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES]; -extern const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES]; -extern const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES]; -extern const uint8_t size_group_lookup[BLOCK_SIZES]; -extern const uint8_t num_pels_log2_lookup[BLOCK_SIZES]; -extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES]; -extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; -extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; -extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; -extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; -extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH -extern const uint8_t need_top_left[INTRA_MODES]; -#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_COMMON_DATA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_debugmodes.c b/thirdparty/libvpx/vp9/common/vp9_debugmodes.c deleted file mode 100644 index d9c1fd9686..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_debugmodes.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdio.h> - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" - -static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { - fprintf(f, "%s", str); - fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame, - cm->show_frame, cm->base_qindex); -} -/* This function dereferences a pointer to the mbmi structure - * and uses the passed in member offset to print out the value of an integer - * for each mbmi member value in the mi structure. - */ -static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, - size_t member_offset) { - int mi_row, mi_col; - MODE_INFO **mi = cm->mi_grid_visible; - int rows = cm->mi_rows; - int cols = cm->mi_cols; - char prefix = descriptor[0]; - - log_frame_info(cm, descriptor, file); - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(file, "%c ", prefix); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(file, "%2d ", - *((int*) ((char *) (mi[0]) + - member_offset))); - mi++; - } - fprintf(file, "\n"); - mi += 8; - } - fprintf(file, "\n"); -} - -void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { - int mi_row; - int mi_col; - FILE *mvs = fopen(file, "a"); - MODE_INFO **mi = cm->mi_grid_visible; - int rows = cm->mi_rows; - int cols = cm->mi_cols; - - print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); - print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); - print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); - print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); - print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); - - // output skip infomation. - log_frame_info(cm, "Skips:", mvs); - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(mvs, "S "); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%2d ", mi[0]->skip); - mi++; - } - fprintf(mvs, "\n"); - mi += 8; - } - fprintf(mvs, "\n"); - - // output motion vectors. - log_frame_info(cm, "Vectors ", mvs); - mi = cm->mi_grid_visible; - for (mi_row = 0; mi_row < rows; mi_row++) { - fprintf(mvs, "V "); - for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, - mi[0]->mv[0].as_mv.col); - mi++; - } - fprintf(mvs, "\n"); - mi += 8; - } - fprintf(mvs, "\n"); - - fclose(mvs); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.c b/thirdparty/libvpx/vp9/common/vp9_entropy.c deleted file mode 100644 index 7b490af34f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropy.c +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_entropymode.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx/vpx_integer.h" - -// Unconstrained Node Tree -const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { - 2, 6, // 0 = LOW_VAL - -TWO_TOKEN, 4, // 1 = TWO - -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE - 8, 10, // 3 = HIGH_LOW - -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE - 12, 14, // 5 = CAT_THREEFOUR - -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE - -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE -}; - -const vpx_prob vp9_cat1_prob[] = { 159 }; -const vpx_prob vp9_cat2_prob[] = { 165, 145 }; -const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 }; -const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 }; -const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 }; -const vpx_prob vp9_cat6_prob[] = { - 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 -}; -#if CONFIG_VP9_HIGHBITDEPTH -const vpx_prob vp9_cat6_prob_high12[] = { - 255, 255, 255, 255, 254, 254, 254, 252, 249, - 243, 230, 196, 177, 153, 140, 133, 130, 129 -}; -#endif - -const uint8_t vp9_coefband_trans_8x8plus[1024] = { - 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, - // beyond MAXBAND_INDEX+1 all values are filled as 5 - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -}; - -const uint8_t vp9_coefband_trans_4x4[16] = { - 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, -}; - -const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = { - 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 -}; - -// Model obtained from a 2-sided zero-centerd distribuition derived -// from a Pareto distribution. The cdf of the distribution is: -// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta] -// -// For a given beta and a given probablity of the 1-node, the alpha -// is first solved, and then the {alpha, beta} pair is used to generate -// the probabilities for the rest of the nodes. - -// beta = 8 - -// Every odd line in this table can be generated from the even lines -// by averaging : -// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] + -// vp9_pareto8_full[l+1][node] ) >> 1; -const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { - { 3, 86, 128, 6, 86, 23, 88, 29}, - { 6, 86, 128, 11, 87, 42, 91, 52}, - { 9, 86, 129, 17, 88, 61, 94, 76}, - { 12, 86, 129, 22, 88, 77, 97, 93}, - { 15, 87, 129, 28, 89, 93, 100, 110}, - { 17, 87, 129, 33, 90, 105, 103, 123}, - { 20, 88, 130, 38, 91, 118, 106, 136}, - { 23, 88, 130, 43, 91, 128, 108, 146}, - { 26, 89, 131, 48, 92, 139, 111, 156}, - { 28, 89, 131, 53, 93, 147, 114, 163}, - { 31, 90, 131, 58, 94, 156, 117, 171}, - { 34, 90, 131, 62, 94, 163, 119, 177}, - { 37, 90, 132, 66, 95, 171, 122, 184}, - { 39, 90, 132, 70, 96, 177, 124, 189}, - { 42, 91, 132, 75, 97, 183, 127, 194}, - { 44, 91, 132, 79, 97, 188, 129, 198}, - { 47, 92, 133, 83, 98, 193, 132, 202}, - { 49, 92, 133, 86, 99, 197, 134, 205}, - { 52, 93, 133, 90, 100, 201, 137, 208}, - { 54, 93, 133, 94, 100, 204, 139, 211}, - { 57, 94, 134, 98, 101, 208, 142, 214}, - { 59, 94, 134, 101, 102, 211, 144, 216}, - { 62, 94, 135, 105, 103, 214, 146, 218}, - { 64, 94, 135, 108, 103, 216, 148, 220}, - { 66, 95, 135, 111, 104, 219, 151, 222}, - { 68, 95, 135, 114, 105, 221, 153, 223}, - { 71, 96, 136, 117, 106, 224, 155, 225}, - { 73, 96, 136, 120, 106, 225, 157, 226}, - { 76, 97, 136, 123, 107, 227, 159, 228}, - { 78, 97, 136, 126, 108, 229, 160, 229}, - { 80, 98, 137, 129, 109, 231, 162, 231}, - { 82, 98, 137, 131, 109, 232, 164, 232}, - { 84, 98, 138, 134, 110, 234, 166, 233}, - { 86, 98, 138, 137, 111, 235, 168, 234}, - { 89, 99, 138, 140, 112, 236, 170, 235}, - { 91, 99, 138, 142, 112, 237, 171, 235}, - { 93, 100, 139, 145, 113, 238, 173, 236}, - { 95, 100, 139, 147, 114, 239, 174, 237}, - { 97, 101, 140, 149, 115, 240, 176, 238}, - { 99, 101, 140, 151, 115, 241, 177, 238}, - {101, 102, 140, 154, 116, 242, 179, 239}, - {103, 102, 140, 156, 117, 242, 180, 239}, - {105, 103, 141, 158, 118, 243, 182, 240}, - {107, 103, 141, 160, 118, 243, 183, 240}, - {109, 104, 141, 162, 119, 244, 185, 241}, - {111, 104, 141, 164, 119, 244, 186, 241}, - {113, 104, 142, 166, 120, 245, 187, 242}, - {114, 104, 142, 168, 121, 245, 188, 242}, - {116, 105, 143, 170, 122, 246, 190, 243}, - {118, 105, 143, 171, 122, 246, 191, 243}, - {120, 106, 143, 173, 123, 247, 192, 244}, - {121, 106, 143, 175, 124, 247, 193, 244}, - {123, 107, 144, 177, 125, 248, 195, 244}, - {125, 107, 144, 178, 125, 248, 196, 244}, - {127, 108, 145, 180, 126, 249, 197, 245}, - {128, 108, 145, 181, 127, 249, 198, 245}, - {130, 109, 145, 183, 128, 249, 199, 245}, - {132, 109, 145, 184, 128, 249, 200, 245}, - {134, 110, 146, 186, 129, 250, 201, 246}, - {135, 110, 146, 187, 130, 250, 202, 246}, - {137, 111, 147, 189, 131, 251, 203, 246}, - {138, 111, 147, 190, 131, 251, 204, 246}, - {140, 112, 147, 192, 132, 251, 205, 247}, - {141, 112, 147, 193, 132, 251, 206, 247}, - {143, 113, 148, 194, 133, 251, 207, 247}, - {144, 113, 148, 195, 134, 251, 207, 247}, - {146, 114, 149, 197, 135, 252, 208, 248}, - {147, 114, 149, 198, 135, 252, 209, 248}, - {149, 115, 149, 199, 136, 252, 210, 248}, - {150, 115, 149, 200, 137, 252, 210, 248}, - {152, 115, 150, 201, 138, 252, 211, 248}, - {153, 115, 150, 202, 138, 252, 212, 248}, - {155, 116, 151, 204, 139, 253, 213, 249}, - {156, 116, 151, 205, 139, 253, 213, 249}, - {158, 117, 151, 206, 140, 253, 214, 249}, - {159, 117, 151, 207, 141, 253, 215, 249}, - {161, 118, 152, 208, 142, 253, 216, 249}, - {162, 118, 152, 209, 142, 253, 216, 249}, - {163, 119, 153, 210, 143, 253, 217, 249}, - {164, 119, 153, 211, 143, 253, 217, 249}, - {166, 120, 153, 212, 144, 254, 218, 250}, - {167, 120, 153, 212, 145, 254, 219, 250}, - {168, 121, 154, 213, 146, 254, 220, 250}, - {169, 121, 154, 214, 146, 254, 220, 250}, - {171, 122, 155, 215, 147, 254, 221, 250}, - {172, 122, 155, 216, 147, 254, 221, 250}, - {173, 123, 155, 217, 148, 254, 222, 250}, - {174, 123, 155, 217, 149, 254, 222, 250}, - {176, 124, 156, 218, 150, 254, 223, 250}, - {177, 124, 156, 219, 150, 254, 223, 250}, - {178, 125, 157, 220, 151, 254, 224, 251}, - {179, 125, 157, 220, 151, 254, 224, 251}, - {180, 126, 157, 221, 152, 254, 225, 251}, - {181, 126, 157, 221, 152, 254, 225, 251}, - {183, 127, 158, 222, 153, 254, 226, 251}, - {184, 127, 158, 223, 154, 254, 226, 251}, - {185, 128, 159, 224, 155, 255, 227, 251}, - {186, 128, 159, 224, 155, 255, 227, 251}, - {187, 129, 160, 225, 156, 255, 228, 251}, - {188, 130, 160, 225, 156, 255, 228, 251}, - {189, 131, 160, 226, 157, 255, 228, 251}, - {190, 131, 160, 226, 158, 255, 228, 251}, - {191, 132, 161, 227, 159, 255, 229, 251}, - {192, 132, 161, 227, 159, 255, 229, 251}, - {193, 133, 162, 228, 160, 255, 230, 252}, - {194, 133, 162, 229, 160, 255, 230, 252}, - {195, 134, 163, 230, 161, 255, 231, 252}, - {196, 134, 163, 230, 161, 255, 231, 252}, - {197, 135, 163, 231, 162, 255, 231, 252}, - {198, 135, 163, 231, 162, 255, 231, 252}, - {199, 136, 164, 232, 163, 255, 232, 252}, - {200, 136, 164, 232, 164, 255, 232, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {201, 137, 165, 233, 165, 255, 233, 252}, - {202, 138, 166, 233, 166, 255, 233, 252}, - {203, 138, 166, 233, 166, 255, 233, 252}, - {204, 139, 166, 234, 167, 255, 234, 252}, - {205, 139, 166, 234, 167, 255, 234, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {206, 140, 167, 235, 168, 255, 235, 252}, - {207, 141, 168, 236, 169, 255, 235, 252}, - {208, 141, 168, 236, 170, 255, 235, 252}, - {209, 142, 169, 237, 171, 255, 236, 252}, - {209, 143, 169, 237, 171, 255, 236, 252}, - {210, 144, 169, 237, 172, 255, 236, 252}, - {211, 144, 169, 237, 172, 255, 236, 252}, - {212, 145, 170, 238, 173, 255, 237, 252}, - {213, 145, 170, 238, 173, 255, 237, 252}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {214, 146, 171, 239, 174, 255, 237, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {215, 147, 172, 240, 175, 255, 238, 253}, - {216, 148, 173, 240, 176, 255, 238, 253}, - {217, 148, 173, 240, 176, 255, 238, 253}, - {218, 149, 173, 241, 177, 255, 239, 253}, - {218, 149, 173, 241, 178, 255, 239, 253}, - {219, 150, 174, 241, 179, 255, 239, 253}, - {219, 151, 174, 241, 179, 255, 239, 253}, - {220, 152, 175, 242, 180, 255, 240, 253}, - {221, 152, 175, 242, 180, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {222, 153, 176, 242, 181, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {223, 154, 177, 243, 182, 255, 240, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {224, 155, 178, 244, 183, 255, 241, 253}, - {225, 156, 178, 244, 184, 255, 241, 253}, - {225, 157, 178, 244, 184, 255, 241, 253}, - {226, 158, 179, 244, 185, 255, 242, 253}, - {227, 158, 179, 244, 185, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {228, 159, 180, 245, 186, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {229, 160, 181, 245, 187, 255, 242, 253}, - {230, 161, 182, 246, 188, 255, 243, 253}, - {230, 162, 182, 246, 188, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {231, 163, 183, 246, 189, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {232, 164, 184, 247, 190, 255, 243, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {233, 165, 185, 247, 191, 255, 244, 253}, - {234, 166, 185, 247, 192, 255, 244, 253}, - {234, 167, 185, 247, 192, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {235, 168, 186, 248, 193, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 169, 187, 248, 194, 255, 244, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {236, 170, 188, 248, 195, 255, 245, 253}, - {237, 171, 189, 249, 196, 255, 245, 254}, - {237, 172, 189, 249, 196, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {238, 173, 190, 249, 197, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {239, 174, 191, 249, 198, 255, 245, 254}, - {240, 175, 192, 249, 199, 255, 246, 254}, - {240, 176, 192, 249, 199, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {240, 177, 193, 250, 200, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {241, 178, 194, 250, 201, 255, 246, 254}, - {242, 179, 195, 250, 202, 255, 246, 254}, - {242, 180, 195, 250, 202, 255, 246, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {242, 181, 196, 250, 203, 255, 247, 254}, - {243, 182, 197, 251, 204, 255, 247, 254}, - {243, 183, 197, 251, 204, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 184, 198, 251, 205, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {244, 185, 199, 251, 206, 255, 247, 254}, - {245, 186, 200, 251, 207, 255, 247, 254}, - {245, 187, 200, 251, 207, 255, 247, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 188, 201, 252, 207, 255, 248, 254}, - {246, 189, 202, 252, 208, 255, 248, 254}, - {246, 190, 202, 252, 208, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 191, 203, 252, 209, 255, 248, 254}, - {247, 192, 204, 252, 210, 255, 248, 254}, - {247, 193, 204, 252, 210, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 194, 205, 252, 211, 255, 248, 254}, - {248, 195, 206, 252, 212, 255, 249, 254}, - {248, 196, 206, 252, 212, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 197, 207, 253, 213, 255, 249, 254}, - {249, 198, 208, 253, 214, 255, 249, 254}, - {249, 199, 209, 253, 214, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 200, 210, 253, 215, 255, 249, 254}, - {250, 201, 211, 253, 215, 255, 249, 254}, - {250, 202, 211, 253, 215, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {250, 203, 212, 253, 216, 255, 249, 254}, - {251, 204, 213, 253, 217, 255, 250, 254}, - {251, 205, 213, 253, 217, 255, 250, 254}, - {251, 206, 214, 254, 218, 255, 250, 254}, - {251, 206, 215, 254, 218, 255, 250, 254}, - {252, 207, 216, 254, 219, 255, 250, 254}, - {252, 208, 216, 254, 219, 255, 250, 254}, - {252, 209, 217, 254, 220, 255, 250, 254}, - {252, 210, 217, 254, 220, 255, 250, 254}, - {252, 211, 218, 254, 221, 255, 250, 254}, - {252, 212, 218, 254, 221, 255, 250, 254}, - {253, 213, 219, 254, 222, 255, 250, 254}, - {253, 213, 220, 254, 222, 255, 250, 254}, - {253, 214, 221, 254, 223, 255, 250, 254}, - {253, 215, 221, 254, 223, 255, 250, 254}, - {253, 216, 222, 254, 224, 255, 251, 254}, - {253, 217, 223, 254, 224, 255, 251, 254}, - {253, 218, 224, 254, 225, 255, 251, 254}, - {253, 219, 224, 254, 225, 255, 251, 254}, - {254, 220, 225, 254, 225, 255, 251, 254}, - {254, 221, 226, 254, 225, 255, 251, 254}, - {254, 222, 227, 255, 226, 255, 251, 254}, - {254, 223, 227, 255, 226, 255, 251, 254}, - {254, 224, 228, 255, 227, 255, 251, 254}, - {254, 225, 229, 255, 227, 255, 251, 254}, - {254, 226, 230, 255, 228, 255, 251, 254}, - {254, 227, 230, 255, 229, 255, 251, 254}, - {255, 228, 231, 255, 230, 255, 251, 254}, - {255, 229, 232, 255, 230, 255, 251, 254}, - {255, 230, 233, 255, 231, 255, 252, 254}, - {255, 231, 234, 255, 231, 255, 252, 254}, - {255, 232, 235, 255, 232, 255, 252, 254}, - {255, 233, 236, 255, 232, 255, 252, 254}, - {255, 235, 237, 255, 233, 255, 252, 254}, - {255, 236, 238, 255, 234, 255, 252, 254}, - {255, 238, 240, 255, 235, 255, 252, 255}, - {255, 239, 241, 255, 235, 255, 252, 254}, - {255, 241, 243, 255, 236, 255, 252, 254}, - {255, 243, 245, 255, 237, 255, 252, 254}, - {255, 246, 247, 255, 239, 255, 253, 255}, -}; - -static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 } - }, { // Band 1 - { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 }, - { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 } - }, { // Band 2 - { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 }, - { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 } - }, { // Band 3 - { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 }, - { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 } - }, { // Band 4 - { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 }, - { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 } - }, { // Band 5 - { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 }, - { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 } - } - }, { // Inter - { // Band 0 - { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 } - }, { // Band 1 - { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 }, - { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 } - }, { // Band 2 - { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 }, - { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 } - }, { // Band 3 - { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 }, - { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 } - }, { // Band 4 - { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 }, - { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 } - }, { // Band 5 - { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 }, - { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 } - }, { // Band 1 - { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 }, - { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 } - }, { // Band 2 - { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 }, - { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 } - }, { // Band 3 - { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 }, - { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 } - }, { // Band 4 - { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 }, - { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 } - }, { // Band 5 - { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 }, - { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 } - } - }, { // Inter - { // Band 0 - { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 } - }, { // Band 1 - { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 }, - { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 } - }, { // Band 2 - { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 }, - { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 } - }, { // Band 3 - { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 }, - { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 } - }, { // Band 4 - { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 }, - { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 } - }, { // Band 5 - { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 }, - { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 } - } - } - } -}; - -static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 } - }, { // Band 1 - { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 }, - { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 } - }, { // Band 2 - { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 }, - { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 } - }, { // Band 3 - { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 }, - { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 } - }, { // Band 4 - { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 }, - { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 } - }, { // Band 5 - { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 }, - { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 } - } - }, { // Inter - { // Band 0 - { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 } - }, { // Band 1 - { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 }, - { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 } - }, { // Band 2 - { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 }, - { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 } - }, { // Band 3 - { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 }, - { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 } - }, { // Band 4 - { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 }, - { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 } - }, { // Band 5 - { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 }, - { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 } - }, { // Band 1 - { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 }, - { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 } - }, { // Band 2 - { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 }, - { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 } - }, { // Band 3 - { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 }, - { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 } - }, { // Band 4 - { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 }, - { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 } - }, { // Band 5 - { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 }, - { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 } - } - }, { // Inter - { // Band 0 - { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 } - }, { // Band 1 - { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 }, - { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 } - }, { // Band 2 - { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 }, - { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 } - }, { // Band 3 - { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 }, - { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 } - }, { // Band 4 - { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 }, - { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 } - }, { // Band 5 - { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 }, - { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 } - } - } - } -}; - -static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 } - }, { // Band 1 - { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 }, - { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 } - }, { // Band 2 - { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 }, - { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 } - }, { // Band 3 - { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 }, - { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 } - }, { // Band 4 - { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 }, - { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 } - }, { // Band 5 - { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 }, - { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 } - } - }, { // Inter - { // Band 0 - { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 } - }, { // Band 1 - { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 }, - { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 } - }, { // Band 2 - { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 }, - { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 } - }, { // Band 3 - { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 }, - { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 } - }, { // Band 4 - { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 }, - { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 } - }, { // Band 5 - { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 }, - { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 } - }, { // Band 1 - { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 }, - { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 } - }, { // Band 2 - { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 }, - { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 } - }, { // Band 3 - { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 }, - { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 } - }, { // Band 4 - { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 }, - { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 } - }, { // Band 5 - { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 }, - { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 } - } - }, { // Inter - { // Band 0 - { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 } - }, { // Band 1 - { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 }, - { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 } - }, { // Band 2 - { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 }, - { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 } - }, { // Band 3 - { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 }, - { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 } - }, { // Band 4 - { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 }, - { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 } - }, { // Band 5 - { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 }, - { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 } - } - } - } -}; - -static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { - { // Y plane - { // Intra - { // Band 0 - { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 } - }, { // Band 1 - { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 }, - { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 } - }, { // Band 2 - { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 }, - { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 } - }, { // Band 3 - { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 }, - { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 } - }, { // Band 4 - { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 }, - { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 } - }, { // Band 5 - { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 }, - { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 } - } - }, { // Inter - { // Band 0 - { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 } - }, { // Band 1 - { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 }, - { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 } - }, { // Band 2 - { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 }, - { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 } - }, { // Band 3 - { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 }, - { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 } - }, { // Band 4 - { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 }, - { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 } - }, { // Band 5 - { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 }, - { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 } - } - } - }, { // UV plane - { // Intra - { // Band 0 - { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 } - }, { // Band 1 - { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 }, - { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 } - }, { // Band 2 - { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 }, - { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 } - }, { // Band 3 - { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 }, - { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 } - }, { // Band 4 - { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 }, - { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 } - }, { // Band 5 - { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 }, - { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 } - } - }, { // Inter - { // Band 0 - { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 } - }, { // Band 1 - { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 }, - { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 } - }, { // Band 2 - { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 }, - { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 } - }, { // Band 3 - { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 }, - { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 } - }, { // Band 4 - { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 }, - { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 } - }, { // Band 5 - { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 }, - { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 } - } - } - } -}; - -static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { - assert(p != 0); - memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); -} - -void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { - if (full != model) - memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES); - extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); -} - -void vp9_default_coef_probs(VP9_COMMON *cm) { - vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4); - vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8); - vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16); - vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32); -} - -#define COEF_COUNT_SAT 24 -#define COEF_MAX_UPDATE_FACTOR 112 -#define COEF_COUNT_SAT_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_KEY 112 -#define COEF_COUNT_SAT_AFTER_KEY 24 -#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 - -static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size, - unsigned int count_sat, - unsigned int update_factor) { - const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; - vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size]; - const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size]; - vp9_coeff_count_model *counts = cm->counts.coef[tx_size]; - unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = - cm->counts.eob_branch[tx_size]; - int i, j, k, l, m; - - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - const int n0 = counts[i][j][k][l][ZERO_TOKEN]; - const int n1 = counts[i][j][k][l][ONE_TOKEN]; - const int n2 = counts[i][j][k][l][TWO_TOKEN]; - const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN]; - const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = { - { neob, eob_counts[i][j][k][l] - neob }, - { n0, n1 + n2 }, - { n1, n2 } - }; - for (m = 0; m < UNCONSTRAINED_NODES; ++m) - probs[i][j][k][l][m] = merge_probs(pre_probs[i][j][k][l][m], - branch_ct[m], - count_sat, update_factor); - } -} - -void vp9_adapt_coef_probs(VP9_COMMON *cm) { - TX_SIZE t; - unsigned int count_sat, update_factor; - - if (frame_is_intra_only(cm)) { - update_factor = COEF_MAX_UPDATE_FACTOR_KEY; - count_sat = COEF_COUNT_SAT_KEY; - } else if (cm->last_frame_type == KEY_FRAME) { - update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ - count_sat = COEF_COUNT_SAT_AFTER_KEY; - } else { - update_factor = COEF_MAX_UPDATE_FACTOR; - count_sat = COEF_COUNT_SAT; - } - for (t = TX_4X4; t <= TX_32X32; t++) - adapt_coef_probs(cm, t, count_sat, update_factor); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.h b/thirdparty/libvpx/vp9/common/vp9_entropy.h deleted file mode 100644 index 63b3bff5d9..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropy.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ENTROPY_H_ -#define VP9_COMMON_VP9_ENTROPY_H_ - -#include "vpx/vpx_integer.h" -#include "vpx_dsp/prob.h" - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define DIFF_UPDATE_PROB 252 - -// Coefficient token alphabet -#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 -#define ONE_TOKEN 1 // 1 Extra Bits 0+1 -#define TWO_TOKEN 2 // 2 Extra Bits 0+1 -#define THREE_TOKEN 3 // 3 Extra Bits 0+1 -#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 -#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 -#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 -#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 -#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 -#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 -#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 -#define EOB_TOKEN 11 // EOB Extra Bits 0+0 - -#define ENTROPY_TOKENS 12 - -#define ENTROPY_NODES 11 - -DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]); - -#define CAT1_MIN_VAL 5 -#define CAT2_MIN_VAL 7 -#define CAT3_MIN_VAL 11 -#define CAT4_MIN_VAL 19 -#define CAT5_MIN_VAL 35 -#define CAT6_MIN_VAL 67 - -// Extra bit probabilities. -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob[1]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob[2]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob[3]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob[4]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob[5]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob[14]); - -#if CONFIG_VP9_HIGHBITDEPTH -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high10[1]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high10[2]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high10[3]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high10[4]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high10[5]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high10[16]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high12[1]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high12[2]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high12[3]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high12[4]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high12[5]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#define EOB_MODEL_TOKEN 3 - -#define DCT_MAX_VALUE 16384 -#if CONFIG_VP9_HIGHBITDEPTH -#define DCT_MAX_VALUE_HIGH10 65536 -#define DCT_MAX_VALUE_HIGH12 262144 -#endif // CONFIG_VP9_HIGHBITDEPTH - -/* Coefficients are predicted via a 3-dimensional probability table. */ - -#define REF_TYPES 2 // intra=0, inter=1 - -/* Middle dimension reflects the coefficient position within the transform. */ -#define COEF_BANDS 6 - -/* Inside dimension is measure of nearby complexity, that reflects the energy - of nearby coefficients are nonzero. For the first coefficient (DC, unless - block type is 0), we look at the (already encoded) blocks above and to the - left of the current block. The context index is then the number (0,1,or 2) - of these blocks having nonzero coefficients. - After decoding a coefficient, the measure is determined by the size of the - most recently decoded coefficient. - Note that the intuitive meaning of this measure changes as coefficients - are decoded, e.g., prior to the first token, a zero means that my neighbors - are empty while, after the first token, because of the use of end-of-block, - a zero means we just decoded a zero and hence guarantees that a non-zero - coefficient will appear later in this block. However, this shift - in meaning is perfectly OK because our context depends also on the - coefficient band (and since zigzag positions 0, 1, and 2 are in - distinct bands). */ - -#define COEFF_CONTEXTS 6 -#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS) - -// #define ENTROPY_STATS - -typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] - [ENTROPY_TOKENS]; -typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] - [ENTROPY_NODES][2]; - -#define SUBEXP_PARAM 4 /* Subexponential code parameter */ -#define MODULUS_PARAM 13 /* Modulus parameter */ - -struct VP9Common; -void vp9_default_coef_probs(struct VP9Common *cm); -void vp9_adapt_coef_probs(struct VP9Common *cm); - -// This is the index in the scan order beyond which all coefficients for -// 8x8 transform and above are in the top band. -// This macro is currently unused but may be used by certain implementations -#define MAXBAND_INDEX 21 - -DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]); -DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]); - -static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { - return tx_size == TX_4X4 ? vp9_coefband_trans_4x4 - : vp9_coefband_trans_8x8plus; -} - -// 128 lists of probabilities are stored for the following ONE node probs: -// 1, 3, 5, 7, ..., 253, 255 -// In between probabilities are interpolated linearly - -#define COEFF_PROB_MODELS 255 - -#define UNCONSTRAINED_NODES 3 - -#define PIVOT_NODE 2 // which node is pivot - -#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) -extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; -extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; - -typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS][UNCONSTRAINED_NODES]; - -typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS] - [UNCONSTRAINED_NODES + 1]; - -void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full); - -typedef char ENTROPY_CONTEXT; - -static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, - ENTROPY_CONTEXT b) { - return (a != 0) + (b != 0); -} - -static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, - const ENTROPY_CONTEXT *l) { - ENTROPY_CONTEXT above_ec = 0, left_ec = 0; - - switch (tx_size) { - case TX_4X4: - above_ec = a[0] != 0; - left_ec = l[0] != 0; - break; - case TX_8X8: - above_ec = !!*(const uint16_t *)a; - left_ec = !!*(const uint16_t *)l; - break; - case TX_16X16: - above_ec = !!*(const uint32_t *)a; - left_ec = !!*(const uint32_t *)l; - break; - case TX_32X32: - above_ec = !!*(const uint64_t *)a; - left_ec = !!*(const uint64_t *)l; - break; - default: - assert(0 && "Invalid transform size."); - break; - } - - return combine_entropy_contexts(above_ec, left_ec); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.c b/thirdparty/libvpx/vp9/common/vp9_entropymode.c deleted file mode 100644 index 670348bafd..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymode.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_mem/vpx_mem.h" - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_seg_common.h" - -const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { - { // above = dc - { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc - { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v - { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h - { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 - { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 - { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 - { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 - { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 - { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 - { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm - }, { // above = v - { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc - { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v - { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h - { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 - { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 - { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 - { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 - { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 - { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 - { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm - }, { // above = h - { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc - { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v - { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h - { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 - { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 - { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 - { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 - { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 - { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 - { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm - }, { // above = d45 - { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc - { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v - { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h - { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 - { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 - { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 - { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 - { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 - { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 - { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm - }, { // above = d135 - { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc - { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v - { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h - { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 - { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 - { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 - { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 - { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 - { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 - { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm - }, { // above = d117 - { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc - { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v - { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h - { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 - { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 - { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 - { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 - { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 - { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 - { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm - }, { // above = d153 - { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc - { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v - { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h - { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 - { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 - { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 - { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 - { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 - { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 - { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm - }, { // above = d207 - { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc - { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v - { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h - { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 - { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 - { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 - { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 - { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 - { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 - { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm - }, { // above = d63 - { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc - { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v - { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h - { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 - { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 - { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 - { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 - { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 - { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 - { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm - }, { // above = tm - { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc - { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v - { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h - { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 - { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 - { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 - { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 - { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 - { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 - { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm - } -}; - -const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { - { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc - { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v - { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h - { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45 - { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135 - { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117 - { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153 - { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207 - { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63 - { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm -}; - -static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { - { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 - { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 - { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 - { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 -}; - -static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { - { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc - { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v - { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h - { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 - { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 - { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 - { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 - { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 - { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 - { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm -}; - -const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 158, 97, 94 }, // a/l both not split - { 93, 24, 99 }, // a split, l not split - { 85, 119, 44 }, // l split, a not split - { 62, 59, 67 }, // a/l both split - // 16x16 -> 8x8 - { 149, 53, 53 }, // a/l both not split - { 94, 20, 48 }, // a split, l not split - { 83, 53, 24 }, // l split, a not split - { 52, 18, 18 }, // a/l both split - // 32x32 -> 16x16 - { 150, 40, 39 }, // a/l both not split - { 78, 12, 26 }, // a split, l not split - { 67, 33, 11 }, // l split, a not split - { 24, 7, 5 }, // a/l both split - // 64x64 -> 32x32 - { 174, 35, 49 }, // a/l both not split - { 68, 11, 27 }, // a split, l not split - { 57, 15, 9 }, // l split, a not split - { 12, 3, 3 }, // a/l both split -}; - -static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1] = { - // 8x8 -> 4x4 - { 199, 122, 141 }, // a/l both not split - { 147, 63, 159 }, // a split, l not split - { 148, 133, 118 }, // l split, a not split - { 121, 104, 114 }, // a/l both split - // 16x16 -> 8x8 - { 174, 73, 87 }, // a/l both not split - { 92, 41, 83 }, // a split, l not split - { 82, 99, 50 }, // l split, a not split - { 53, 39, 39 }, // a/l both split - // 32x32 -> 16x16 - { 177, 58, 59 }, // a/l both not split - { 68, 26, 63 }, // a split, l not split - { 52, 79, 25 }, // l split, a not split - { 17, 14, 12 }, // a/l both split - // 64x64 -> 32x32 - { 222, 34, 30 }, // a/l both not split - { 72, 16, 44 }, // a split, l not split - { 58, 32, 12 }, // l split, a not split - { 10, 7, 6 }, // a/l both split -}; - -static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] - [INTER_MODES - 1] = { - {2, 173, 34}, // 0 = both zero mv - {7, 145, 85}, // 1 = one zero mv + one a predicted mv - {7, 166, 63}, // 2 = two predicted mvs - {7, 94, 66}, // 3 = one predicted/zero and one new mv - {8, 64, 46}, // 4 = two new mvs - {17, 81, 31}, // 5 = one intra neighbour + x - {25, 29, 30}, // 6 = two intra neighbours -}; - -/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ -const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { - -DC_PRED, 2, /* 0 = DC_NODE */ - -TM_PRED, 4, /* 1 = TM_NODE */ - -V_PRED, 6, /* 2 = V_NODE */ - 8, 12, /* 3 = COM_NODE */ - -H_PRED, 10, /* 4 = H_NODE */ - -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ - -D45_PRED, 14, /* 6 = D45_NODE */ - -D63_PRED, 16, /* 7 = D63_NODE */ - -D153_PRED, -D207_PRED /* 8 = D153_NODE */ -}; - -const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { - -INTER_OFFSET(ZEROMV), 2, - -INTER_OFFSET(NEARESTMV), 4, - -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) -}; - -const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { - -PARTITION_NONE, 2, - -PARTITION_HORZ, 4, - -PARTITION_VERT, -PARTITION_SPLIT -}; - -static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { - 9, 102, 187, 225 -}; - -static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { - 239, 183, 119, 96, 41 -}; - -static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { - 50, 126, 123, 221, 226 -}; - -static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = { - { 33, 16 }, - { 77, 74 }, - { 142, 142 }, - { 172, 170 }, - { 238, 247 } -}; - -static const struct tx_probs default_tx_probs = { - { { 3, 136, 37 }, - { 5, 52, 13 } }, - - { { 20, 152 }, - { 15, 101 } }, - - { { 100 }, - { 66 } } -}; - -void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, - unsigned int (*ct_32x32p)[2]) { - ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; - ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + - tx_count_32x32p[TX_16X16] + - tx_count_32x32p[TX_32X32]; - ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; - ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + - tx_count_32x32p[TX_32X32]; - ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; - ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; -} - -void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, - unsigned int (*ct_16x16p)[2]) { - ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; - ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; - ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; - ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; -} - -void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, - unsigned int (*ct_8x8p)[2]) { - ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; - ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; -} - -static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { - 192, 128, 64 -}; - -static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1] = { - { 235, 162, }, - { 36, 255, }, - { 34, 3, }, - { 149, 144, }, -}; - -static void init_mode_probs(FRAME_CONTEXT *fc) { - vp9_copy(fc->uv_mode_prob, default_if_uv_probs); - vp9_copy(fc->y_mode_prob, default_if_y_probs); - vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob); - vp9_copy(fc->partition_prob, default_partition_probs); - vp9_copy(fc->intra_inter_prob, default_intra_inter_p); - vp9_copy(fc->comp_inter_prob, default_comp_inter_p); - vp9_copy(fc->comp_ref_prob, default_comp_ref_p); - vp9_copy(fc->single_ref_prob, default_single_ref_p); - fc->tx_probs = default_tx_probs; - vp9_copy(fc->skip_probs, default_skip_probs); - vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); -} - -const vpx_tree_index vp9_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)] = { - -EIGHTTAP, 2, - -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP -}; - -void vp9_adapt_mode_probs(VP9_COMMON *cm) { - int i, j; - FRAME_CONTEXT *fc = cm->fc; - const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; - const FRAME_COUNTS *counts = &cm->counts; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i], - counts->intra_inter[i]); - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], - counts->comp_inter[i]); - for (i = 0; i < REF_CONTEXTS; i++) - fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], - counts->comp_ref[i]); - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - fc->single_ref_prob[i][j] = mode_mv_merge_probs( - pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) - vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], - counts->inter_mode[i], fc->inter_mode_probs[i]); - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], - counts->y_mode[i], fc->y_mode_prob[i]); - - for (i = 0; i < INTRA_MODES; ++i) - vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], - counts->uv_mode[i], fc->uv_mode_prob[i]); - - for (i = 0; i < PARTITION_CONTEXTS; i++) - vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i], - counts->partition[i], fc->partition_prob[i]); - - if (cm->interp_filter == SWITCHABLE) { - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - vpx_tree_merge_probs(vp9_switchable_interp_tree, - pre_fc->switchable_interp_prob[i], - counts->switchable_interp[i], - fc->switchable_interp_prob[i]); - } - - if (cm->tx_mode == TX_MODE_SELECT) { - int j; - unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; - unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; - unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { - tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); - for (j = 0; j < TX_SIZES - 3; ++j) - fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); - - tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); - for (j = 0; j < TX_SIZES - 2; ++j) - fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); - - tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); - for (j = 0; j < TX_SIZES - 1; ++j) - fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); - } - } - - for (i = 0; i < SKIP_CONTEXTS; ++i) - fc->skip_probs[i] = mode_mv_merge_probs( - pre_fc->skip_probs[i], counts->skip[i]); -} - -static void set_default_lf_deltas(struct loopfilter *lf) { - lf->mode_ref_delta_enabled = 1; - lf->mode_ref_delta_update = 1; - - lf->ref_deltas[INTRA_FRAME] = 1; - lf->ref_deltas[LAST_FRAME] = 0; - lf->ref_deltas[GOLDEN_FRAME] = -1; - lf->ref_deltas[ALTREF_FRAME] = -1; - - lf->mode_deltas[0] = 0; - lf->mode_deltas[1] = 0; -} - -void vp9_setup_past_independence(VP9_COMMON *cm) { - // Reset the segment feature data to the default stats: - // Features disabled, 0, with delta coding (Default state). - struct loopfilter *const lf = &cm->lf; - - int i; - vp9_clearall_segfeatures(&cm->seg); - cm->seg.abs_delta = SEGMENT_DELTADATA; - - if (cm->last_frame_seg_map && !cm->frame_parallel_decode) - memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); - - if (cm->current_frame_seg_map) - memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); - - // Reset the mode ref deltas for loop filter - vp9_zero(lf->last_ref_deltas); - vp9_zero(lf->last_mode_deltas); - set_default_lf_deltas(lf); - - // To force update of the sharpness - lf->last_sharpness_level = -1; - - vp9_default_coef_probs(cm); - init_mode_probs(cm->fc); - vp9_init_mv_probs(cm); - cm->fc->initialized = 1; - - if (cm->frame_type == KEY_FRAME || - cm->error_resilient_mode || cm->reset_frame_context == 3) { - // Reset all frame contexts. - for (i = 0; i < FRAME_CONTEXTS; ++i) - cm->frame_contexts[i] = *cm->fc; - } else if (cm->reset_frame_context == 2) { - // Reset only the frame context specified in the frame header. - cm->frame_contexts[cm->frame_context_idx] = *cm->fc; - } - - // prev_mip will only be allocated in encoder. - if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode) - memset(cm->prev_mip, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); - - vp9_zero(cm->ref_frame_sign_bias); - - cm->frame_context_idx = 0; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.h b/thirdparty/libvpx/vp9/common/vp9_entropymode.h deleted file mode 100644 index 0285be1557..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymode.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ -#define VP9_COMMON_VP9_ENTROPYMODE_H_ - -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_filter.h" -#include "vpx_dsp/vpx_filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define BLOCK_SIZE_GROUPS 4 - -#define TX_SIZE_CONTEXTS 2 - -#define INTER_OFFSET(mode) ((mode) - NEARESTMV) - -struct VP9Common; - -struct tx_probs { - vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; - vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; - vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; -}; - -struct tx_counts { - unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; - unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; - unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; - unsigned int tx_totals[TX_SIZES]; -}; - -typedef struct frame_contexts { - vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; - vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; - vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; - vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; - vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1]; - vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; - vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; - vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vpx_prob single_ref_prob[REF_CONTEXTS][2]; - vpx_prob comp_ref_prob[REF_CONTEXTS]; - struct tx_probs tx_probs; - vpx_prob skip_probs[SKIP_CONTEXTS]; - nmv_context nmvc; - int initialized; -} FRAME_CONTEXT; - -typedef struct FRAME_COUNTS { - unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; - unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; - unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; - vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; - unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] - [COEF_BANDS][COEFF_CONTEXTS]; - unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS]; - unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; - unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; - unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int single_ref[REF_CONTEXTS][2][2]; - unsigned int comp_ref[REF_CONTEXTS][2]; - struct tx_counts tx; - unsigned int skip[SKIP_CONTEXTS][2]; - nmv_context_counts mv; -} FRAME_COUNTS; - -extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; -extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] - [INTRA_MODES - 1]; -extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] - [PARTITION_TYPES - 1]; -extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; -extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)]; -extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)]; -extern const vpx_tree_index vp9_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)]; - -void vp9_setup_past_independence(struct VP9Common *cm); - -void vp9_adapt_mode_probs(struct VP9Common *cm); - -void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, - unsigned int (*ct_32x32p)[2]); -void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, - unsigned int (*ct_16x16p)[2]); -void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, - unsigned int (*ct_8x8p)[2]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.c b/thirdparty/libvpx/vp9/common/vp9_entropymv.c deleted file mode 100644 index 566ae91cf7..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymv.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_entropymv.h" - -const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { - -MV_JOINT_ZERO, 2, - -MV_JOINT_HNZVZ, 4, - -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ -}; - -const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { - -MV_CLASS_0, 2, - -MV_CLASS_1, 4, - 6, 8, - -MV_CLASS_2, -MV_CLASS_3, - 10, 12, - -MV_CLASS_4, -MV_CLASS_5, - -MV_CLASS_6, 14, - 16, 18, - -MV_CLASS_7, -MV_CLASS_8, - -MV_CLASS_9, -MV_CLASS_10, -}; - -const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { - -0, -1, -}; - -const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { - -0, 2, - -1, 4, - -2, -3 -}; - -static const nmv_context default_nmv_context = { - {32, 64, 96}, - { - { // Vertical component - 128, // sign - {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class - {216}, // class0 - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits - {{128, 128, 64}, {96, 112, 64}}, // class0_fp - {64, 96, 64}, // fp - 160, // class0_hp bit - 128, // hp - }, - { // Horizontal component - 128, // sign - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class - {208}, // class0 - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits - {{128, 128, 64}, {96, 112, 64}}, // class0_fp - {64, 96, 64}, // fp - 160, // class0_hp bit - 128, // hp - } - }, -}; - -static const uint8_t log_in_base_2[] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 -}; - -static INLINE int mv_class_base(MV_CLASS_TYPE c) { - return c ? CLASS0_SIZE << (c + 2) : 0; -} - -MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { - const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? - MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; - if (offset) - *offset = z - mv_class_base(c); - return c; -} - -static void inc_mv_component(int v, nmv_component_counts *comp_counts, - int incr, int usehp) { - int s, z, c, o, d, e, f; - assert(v != 0); /* should not be zero */ - s = v < 0; - comp_counts->sign[s] += incr; - z = (s ? -v : v) - 1; /* magnitude - 1 */ - - c = vp9_get_mv_class(z, &o); - comp_counts->classes[c] += incr; - - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - - if (c == MV_CLASS_0) { - comp_counts->class0[d] += incr; - comp_counts->class0_fp[d][f] += incr; - comp_counts->class0_hp[e] += usehp * incr; - } else { - int i; - int b = c + CLASS0_BITS - 1; // number of bits - for (i = 0; i < b; ++i) - comp_counts->bits[i][((d >> i) & 1)] += incr; - comp_counts->fp[f] += incr; - comp_counts->hp[e] += usehp * incr; - } -} - -void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { - if (counts != NULL) { - const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); - ++counts->joints[j]; - - if (mv_joint_vertical(j)) { - inc_mv_component(mv->row, &counts->comps[0], 1, 1); - } - - if (mv_joint_horizontal(j)) { - inc_mv_component(mv->col, &counts->comps[1], 1, 1); - } - } -} - -void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { - int i, j; - - nmv_context *fc = &cm->fc->nmvc; - const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc; - const nmv_context_counts *counts = &cm->counts.mv; - - vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, - fc->joints); - - for (i = 0; i < 2; ++i) { - nmv_component *comp = &fc->comps[i]; - const nmv_component *pre_comp = &pre_fc->comps[i]; - const nmv_component_counts *c = &counts->comps[i]; - - comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign); - vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, - comp->classes); - vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, - comp->class0); - - for (j = 0; j < MV_OFFSET_BITS; ++j) - comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]); - - for (j = 0; j < CLASS0_SIZE; ++j) - vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], - c->class0_fp[j], comp->class0_fp[j]); - - vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); - - if (allow_hp) { - comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); - comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp); - } - } -} - -void vp9_init_mv_probs(VP9_COMMON *cm) { - cm->fc->nmvc = default_nmv_context; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.h b/thirdparty/libvpx/vp9/common/vp9_entropymv.h deleted file mode 100644 index 2f05ad44b6..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_entropymv.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_COMMON_VP9_ENTROPYMV_H_ -#define VP9_COMMON_VP9_ENTROPYMV_H_ - -#include "./vpx_config.h" - -#include "vpx_dsp/prob.h" - -#include "vp9/common/vp9_mv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; - -void vp9_init_mv_probs(struct VP9Common *cm); - -void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); - -// Integer pel reference mv threshold for use of high-precision 1/8 mv -#define COMPANDED_MVREF_THRESH 8 - -static INLINE int use_mv_hp(const MV *ref) { - return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && - (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; -} - -#define MV_UPDATE_PROB 252 - -/* Symbols for coding which components are zero jointly */ -#define MV_JOINTS 4 -typedef enum { - MV_JOINT_ZERO = 0, /* Zero vector */ - MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ - MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ - MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ -} MV_JOINT_TYPE; - -static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { - return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; -} - -static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { - return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; -} - -/* Symbols for coding magnitude class of nonzero components */ -#define MV_CLASSES 11 -typedef enum { - MV_CLASS_0 = 0, /* (0, 2] integer pel */ - MV_CLASS_1 = 1, /* (2, 4] integer pel */ - MV_CLASS_2 = 2, /* (4, 8] integer pel */ - MV_CLASS_3 = 3, /* (8, 16] integer pel */ - MV_CLASS_4 = 4, /* (16, 32] integer pel */ - MV_CLASS_5 = 5, /* (32, 64] integer pel */ - MV_CLASS_6 = 6, /* (64, 128] integer pel */ - MV_CLASS_7 = 7, /* (128, 256] integer pel */ - MV_CLASS_8 = 8, /* (256, 512] integer pel */ - MV_CLASS_9 = 9, /* (512, 1024] integer pel */ - MV_CLASS_10 = 10, /* (1024,2048] integer pel */ -} MV_CLASS_TYPE; - -#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ -#define CLASS0_SIZE (1 << CLASS0_BITS) -#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) -#define MV_FP_SIZE 4 - -#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) -#define MV_MAX ((1 << MV_MAX_BITS) - 1) -#define MV_VALS ((MV_MAX << 1) + 1) - -#define MV_IN_USE_BITS 14 -#define MV_UPP ((1 << MV_IN_USE_BITS) - 1) -#define MV_LOW (-(1 << MV_IN_USE_BITS)) - -extern const vpx_tree_index vp9_mv_joint_tree[]; -extern const vpx_tree_index vp9_mv_class_tree[]; -extern const vpx_tree_index vp9_mv_class0_tree[]; -extern const vpx_tree_index vp9_mv_fp_tree[]; - -typedef struct { - vpx_prob sign; - vpx_prob classes[MV_CLASSES - 1]; - vpx_prob class0[CLASS0_SIZE - 1]; - vpx_prob bits[MV_OFFSET_BITS]; - vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1]; - vpx_prob fp[MV_FP_SIZE - 1]; - vpx_prob class0_hp; - vpx_prob hp; -} nmv_component; - -typedef struct { - vpx_prob joints[MV_JOINTS - 1]; - nmv_component comps[2]; -} nmv_context; - -static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) { - if (mv->row == 0) { - return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; - } else { - return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; - } -} - -MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset); - -typedef struct { - unsigned int sign[2]; - unsigned int classes[MV_CLASSES]; - unsigned int class0[CLASS0_SIZE]; - unsigned int bits[MV_OFFSET_BITS][2]; - unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE]; - unsigned int fp[MV_FP_SIZE]; - unsigned int class0_hp[2]; - unsigned int hp[2]; -} nmv_component_counts; - -typedef struct { - unsigned int joints[MV_JOINTS]; - nmv_component_counts comps[2]; -} nmv_context_counts; - -void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_enums.h b/thirdparty/libvpx/vp9/common/vp9_enums.h deleted file mode 100644 index d089f23f97..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_enums.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ENUMS_H_ -#define VP9_COMMON_VP9_ENUMS_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MI_SIZE_LOG2 3 -#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 - -#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit -#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block - -#define MI_MASK (MI_BLOCK_SIZE - 1) - -// Bitstream profiles indicated by 2-3 bits in the uncompressed header. -// 00: Profile 0. 8-bit 4:2:0 only. -// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0. -// 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling. -// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0 -// sampling. -// 111: Undefined profile. -typedef enum BITSTREAM_PROFILE { - PROFILE_0, - PROFILE_1, - PROFILE_2, - PROFILE_3, - MAX_PROFILES -} BITSTREAM_PROFILE; - -#define BLOCK_4X4 0 -#define BLOCK_4X8 1 -#define BLOCK_8X4 2 -#define BLOCK_8X8 3 -#define BLOCK_8X16 4 -#define BLOCK_16X8 5 -#define BLOCK_16X16 6 -#define BLOCK_16X32 7 -#define BLOCK_32X16 8 -#define BLOCK_32X32 9 -#define BLOCK_32X64 10 -#define BLOCK_64X32 11 -#define BLOCK_64X64 12 -#define BLOCK_SIZES 13 -#define BLOCK_INVALID BLOCK_SIZES -typedef uint8_t BLOCK_SIZE; - -typedef enum PARTITION_TYPE { - PARTITION_NONE, - PARTITION_HORZ, - PARTITION_VERT, - PARTITION_SPLIT, - PARTITION_TYPES, - PARTITION_INVALID = PARTITION_TYPES -} PARTITION_TYPE; - -typedef char PARTITION_CONTEXT; -#define PARTITION_PLOFFSET 4 // number of probability models per block size -#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) - -// block transform size -typedef uint8_t TX_SIZE; -#define TX_4X4 ((TX_SIZE)0) // 4x4 transform -#define TX_8X8 ((TX_SIZE)1) // 8x8 transform -#define TX_16X16 ((TX_SIZE)2) // 16x16 transform -#define TX_32X32 ((TX_SIZE)3) // 32x32 transform -#define TX_SIZES ((TX_SIZE)4) - -// frame transform mode -typedef enum { - ONLY_4X4 = 0, // only 4x4 transform used - ALLOW_8X8 = 1, // allow block transform size up to 8x8 - ALLOW_16X16 = 2, // allow block transform size up to 16x16 - ALLOW_32X32 = 3, // allow block transform size up to 32x32 - TX_MODE_SELECT = 4, // transform specified for each block - TX_MODES = 5, -} TX_MODE; - -typedef enum { - DCT_DCT = 0, // DCT in both horizontal and vertical - ADST_DCT = 1, // ADST in vertical, DCT in horizontal - DCT_ADST = 2, // DCT in vertical, ADST in horizontal - ADST_ADST = 3, // ADST in both directions - TX_TYPES = 4 -} TX_TYPE; - -typedef enum { - VP9_LAST_FLAG = 1 << 0, - VP9_GOLD_FLAG = 1 << 1, - VP9_ALT_FLAG = 1 << 2, -} VP9_REFFRAME; - -typedef enum { - PLANE_TYPE_Y = 0, - PLANE_TYPE_UV = 1, - PLANE_TYPES -} PLANE_TYPE; - -#define DC_PRED 0 // Average of above and left pixels -#define V_PRED 1 // Vertical -#define H_PRED 2 // Horizontal -#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) -#define D135_PRED 4 // Directional 135 deg = 180 - 45 -#define D117_PRED 5 // Directional 117 deg = 180 - 63 -#define D153_PRED 6 // Directional 153 deg = 180 - 27 -#define D207_PRED 7 // Directional 207 deg = 180 + 27 -#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) -#define TM_PRED 9 // True-motion -#define NEARESTMV 10 -#define NEARMV 11 -#define ZEROMV 12 -#define NEWMV 13 -#define MB_MODE_COUNT 14 -typedef uint8_t PREDICTION_MODE; - -#define INTRA_MODES (TM_PRED + 1) - -#define INTER_MODES (1 + NEWMV - NEARESTMV) - -#define SKIP_CONTEXTS 3 -#define INTER_MODE_CONTEXTS 7 - -/* Segment Feature Masks */ -#define MAX_MV_REF_CANDIDATES 2 - -#define INTRA_INTER_CONTEXTS 4 -#define COMP_INTER_CONTEXTS 5 -#define REF_CONTEXTS 5 - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.c b/thirdparty/libvpx/vp9/common/vp9_filter.c deleted file mode 100644 index 4b2198fc40..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_filter.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vp9/common/vp9_filter.h" - -DECLARE_ALIGNED(256, static const InterpKernel, - bilinear_filters[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0 }, - { 0, 0, 0, 120, 8, 0, 0, 0 }, - { 0, 0, 0, 112, 16, 0, 0, 0 }, - { 0, 0, 0, 104, 24, 0, 0, 0 }, - { 0, 0, 0, 96, 32, 0, 0, 0 }, - { 0, 0, 0, 88, 40, 0, 0, 0 }, - { 0, 0, 0, 80, 48, 0, 0, 0 }, - { 0, 0, 0, 72, 56, 0, 0, 0 }, - { 0, 0, 0, 64, 64, 0, 0, 0 }, - { 0, 0, 0, 56, 72, 0, 0, 0 }, - { 0, 0, 0, 48, 80, 0, 0, 0 }, - { 0, 0, 0, 40, 88, 0, 0, 0 }, - { 0, 0, 0, 32, 96, 0, 0, 0 }, - { 0, 0, 0, 24, 104, 0, 0, 0 }, - { 0, 0, 0, 16, 112, 0, 0, 0 }, - { 0, 0, 0, 8, 120, 0, 0, 0 } -}; - -// Lagrangian interpolation filter -DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0}, - { 0, 1, -5, 126, 8, -3, 1, 0}, - { -1, 3, -10, 122, 18, -6, 2, 0}, - { -1, 4, -13, 118, 27, -9, 3, -1}, - { -1, 4, -16, 112, 37, -11, 4, -1}, - { -1, 5, -18, 105, 48, -14, 4, -1}, - { -1, 5, -19, 97, 58, -16, 5, -1}, - { -1, 6, -19, 88, 68, -18, 5, -1}, - { -1, 6, -19, 78, 78, -19, 6, -1}, - { -1, 5, -18, 68, 88, -19, 6, -1}, - { -1, 5, -16, 58, 97, -19, 5, -1}, - { -1, 4, -14, 48, 105, -18, 5, -1}, - { -1, 4, -11, 37, 112, -16, 4, -1}, - { -1, 3, -9, 27, 118, -13, 4, -1}, - { 0, 2, -6, 18, 122, -10, 3, -1}, - { 0, 1, -3, 8, 126, -5, 1, 0} -}; - -// DCT based filter -DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8s[SUBPEL_SHIFTS]) = { - {0, 0, 0, 128, 0, 0, 0, 0}, - {-1, 3, -7, 127, 8, -3, 1, 0}, - {-2, 5, -13, 125, 17, -6, 3, -1}, - {-3, 7, -17, 121, 27, -10, 5, -2}, - {-4, 9, -20, 115, 37, -13, 6, -2}, - {-4, 10, -23, 108, 48, -16, 8, -3}, - {-4, 10, -24, 100, 59, -19, 9, -3}, - {-4, 11, -24, 90, 70, -21, 10, -4}, - {-4, 11, -23, 80, 80, -23, 11, -4}, - {-4, 10, -21, 70, 90, -24, 11, -4}, - {-3, 9, -19, 59, 100, -24, 10, -4}, - {-3, 8, -16, 48, 108, -23, 10, -4}, - {-2, 6, -13, 37, 115, -20, 9, -4}, - {-2, 5, -10, 27, 121, -17, 7, -3}, - {-1, 3, -6, 17, 125, -13, 5, -2}, - {0, 1, -3, 8, 127, -7, 3, -1} -}; - -// freqmultiplier = 0.5 -DECLARE_ALIGNED(256, static const InterpKernel, - sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { - { 0, 0, 0, 128, 0, 0, 0, 0}, - {-3, -1, 32, 64, 38, 1, -3, 0}, - {-2, -2, 29, 63, 41, 2, -3, 0}, - {-2, -2, 26, 63, 43, 4, -4, 0}, - {-2, -3, 24, 62, 46, 5, -4, 0}, - {-2, -3, 21, 60, 49, 7, -4, 0}, - {-1, -4, 18, 59, 51, 9, -4, 0}, - {-1, -4, 16, 57, 53, 12, -4, -1}, - {-1, -4, 14, 55, 55, 14, -4, -1}, - {-1, -4, 12, 53, 57, 16, -4, -1}, - { 0, -4, 9, 51, 59, 18, -4, -1}, - { 0, -4, 7, 49, 60, 21, -3, -2}, - { 0, -4, 5, 46, 62, 24, -3, -2}, - { 0, -4, 4, 43, 63, 26, -2, -2}, - { 0, -3, 2, 41, 63, 29, -2, -2}, - { 0, -3, 1, 38, 64, 32, -1, -3} -}; - - -const InterpKernel *vp9_filter_kernels[4] = { - sub_pel_filters_8, - sub_pel_filters_8lp, - sub_pel_filters_8s, - bilinear_filters -}; diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.h b/thirdparty/libvpx/vp9/common/vp9_filter.h deleted file mode 100644 index efa24bc67b..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_filter.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_FILTER_H_ -#define VP9_COMMON_VP9_FILTER_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_filter.h" -#include "vpx_ports/mem.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -#define EIGHTTAP 0 -#define EIGHTTAP_SMOOTH 1 -#define EIGHTTAP_SHARP 2 -#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ -#define BILINEAR 3 -// The codec can operate in four possible inter prediction filter mode: -// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. -#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) -#define SWITCHABLE 4 /* should be the last one */ - -typedef uint8_t INTERP_FILTER; - -extern const InterpKernel *vp9_filter_kernels[4]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c deleted file mode 100644 index 0f41d66985..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vp9/common/vp9_frame_buffers.h" -#include "vpx_mem/vpx_mem.h" - -int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { - assert(list != NULL); - vp9_free_internal_frame_buffers(list); - - list->num_internal_frame_buffers = - VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; - list->int_fb = - (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, - sizeof(*list->int_fb)); - return (list->int_fb == NULL); -} - -void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) { - int i; - - assert(list != NULL); - - for (i = 0; i < list->num_internal_frame_buffers; ++i) { - vpx_free(list->int_fb[i].data); - list->int_fb[i].data = NULL; - } - vpx_free(list->int_fb); - list->int_fb = NULL; -} - -int vp9_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb) { - int i; - InternalFrameBufferList *const int_fb_list = - (InternalFrameBufferList *)cb_priv; - if (int_fb_list == NULL) - return -1; - - // Find a free frame buffer. - for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { - if (!int_fb_list->int_fb[i].in_use) - break; - } - - if (i == int_fb_list->num_internal_frame_buffers) - return -1; - - if (int_fb_list->int_fb[i].size < min_size) { - int_fb_list->int_fb[i].data = - (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size); - if (!int_fb_list->int_fb[i].data) - return -1; - - // This memset is needed for fixing valgrind error from C loop filter - // due to access uninitialized memory in frame border. It could be - // removed if border is totally removed. - memset(int_fb_list->int_fb[i].data, 0, min_size); - int_fb_list->int_fb[i].size = min_size; - } - - fb->data = int_fb_list->int_fb[i].data; - fb->size = int_fb_list->int_fb[i].size; - int_fb_list->int_fb[i].in_use = 1; - - // Set the frame buffer's private data to point at the internal frame buffer. - fb->priv = &int_fb_list->int_fb[i]; - return 0; -} - -int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { - InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; - (void)cb_priv; - if (int_fb) - int_fb->in_use = 0; - return 0; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h deleted file mode 100644 index e2cfe61b66..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_ -#define VP9_COMMON_VP9_FRAME_BUFFERS_H_ - -#include "vpx/vpx_frame_buffer.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct InternalFrameBuffer { - uint8_t *data; - size_t size; - int in_use; -} InternalFrameBuffer; - -typedef struct InternalFrameBufferList { - int num_internal_frame_buffers; - InternalFrameBuffer *int_fb; -} InternalFrameBufferList; - -// Initializes |list|. Returns 0 on success. -int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list); - -// Free any data allocated to the frame buffers. -void vp9_free_internal_frame_buffers(InternalFrameBufferList *list); - -// Callback used by libvpx to request an external frame buffer. |cb_priv| -// Callback private data, which points to an InternalFrameBufferList. -// |min_size| is the minimum size in bytes needed to decode the next frame. -// |fb| pointer to the frame buffer. -int vp9_get_frame_buffer(void *cb_priv, size_t min_size, - vpx_codec_frame_buffer_t *fb); - -// Callback used by libvpx when there are no references to the frame buffer. -// |cb_priv| is not used. |fb| pointer to the frame buffer. -int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.c b/thirdparty/libvpx/vp9/common/vp9_idct.c deleted file mode 100644 index 1b420143bb..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_idct.c +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <math.h> - -#include "./vp9_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_idct.h" -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - const transform_2d IHT_4[] = { - { idct4_c, idct4_c }, // DCT_DCT = 0 - { iadst4_c, idct4_c }, // ADST_DCT = 1 - { idct4_c, iadst4_c }, // DCT_ADST = 2 - { iadst4_c, iadst4_c } // ADST_ADST = 3 - }; - - int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; - - // inverse transform row vectors - for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr); - input += 4; - outptr += 4; - } - - // inverse transform column vectors - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 4)); - } - } -} - -static const transform_2d IHT_8[] = { - { idct8_c, idct8_c }, // DCT_DCT = 0 - { iadst8_c, idct8_c }, // ADST_DCT = 1 - { idct8_c, iadst8_c }, // DCT_ADST = 2 - { iadst8_c, iadst8_c } // ADST_ADST = 3 -}; - -void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - int i, j; - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - tran_low_t temp_in[8], temp_out[8]; - const transform_2d ht = IHT_8[tx_type]; - - // inverse transform row vectors - for (i = 0; i < 8; ++i) { - ht.rows(input, outptr); - input += 8; - outptr += 8; - } - - // inverse transform column vectors - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - ht.cols(temp_in, temp_out); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); - } - } -} - -static const transform_2d IHT_16[] = { - { idct16_c, idct16_c }, // DCT_DCT = 0 - { iadst16_c, idct16_c }, // ADST_DCT = 1 - { idct16_c, iadst16_c }, // DCT_ADST = 2 - { iadst16_c, iadst16_c } // ADST_ADST = 3 -}; - -void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - int i, j; - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - tran_low_t temp_in[16], temp_out[16]; - const transform_2d ht = IHT_16[tx_type]; - - // Rows - for (i = 0; i < 16; ++i) { - ht.rows(input, outptr); - input += 16; - outptr += 16; - } - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - ht.cols(temp_in, temp_out); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -// idct -void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) - vpx_idct4x4_16_add(input, dest, stride); - else - vpx_idct4x4_1_add(input, dest, stride); -} - - -void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) - vpx_iwht4x4_16_add(input, dest, stride); - else - vpx_iwht4x4_1_add(input, dest, stride); -} - -void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - if (eob == 1) - // DC only DCT coefficient - vpx_idct8x8_1_add(input, dest, stride); - else if (eob <= 12) - vpx_idct8x8_12_add(input, dest, stride); - else - vpx_idct8x8_64_add(input, dest, stride); -} - -void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to separate different cases. */ - if (eob == 1) - /* DC only DCT coefficient. */ - vpx_idct16x16_1_add(input, dest, stride); - else if (eob <= 10) - vpx_idct16x16_10_add(input, dest, stride); - else - vpx_idct16x16_256_add(input, dest, stride); -} - -void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob) { - if (eob == 1) - vpx_idct32x32_1_add(input, dest, stride); - else if (eob <= 34) - // non-zero coeff only in upper-left 8x8 - vpx_idct32x32_34_add(input, dest, stride); - else if (eob <= 135) - // non-zero coeff only in upper-left 16x16 - vpx_idct32x32_135_add(input, dest, stride); - else - vpx_idct32x32_1024_add(input, dest, stride); -} - -// iht -void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) - vp9_idct4x4_add(input, dest, stride, eob); - else - vp9_iht4x4_16_add(input, dest, stride, tx_type); -} - -void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct8x8_add(input, dest, stride, eob); - } else { - vp9_iht8x8_64_add(input, dest, stride, tx_type); - } -} - -void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct16x16_add(input, dest, stride, eob); - } else { - vp9_iht16x16_256_add(input, dest, stride, tx_type); - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { - const highbd_transform_2d IHT_4[] = { - { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 - { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 - { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 - { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 - }; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - int i, j; - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - tran_low_t temp_in[4], temp_out[4]; - - // Inverse transform row vectors. - for (i = 0; i < 4; ++i) { - IHT_4[tx_type].rows(input, outptr, bd); - input += 4; - outptr += 4; - } - - // Inverse transform column vectors. - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - IHT_4[tx_type].cols(temp_in, temp_out, bd); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); - } - } -} - -static const highbd_transform_2d HIGH_IHT_8[] = { - { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 - { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 - { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 - { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 -}; - -void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { - int i, j; - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - tran_low_t temp_in[8], temp_out[8]; - const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Inverse transform row vectors. - for (i = 0; i < 8; ++i) { - ht.rows(input, outptr, bd); - input += 8; - outptr += 8; - } - - // Inverse transform column vectors. - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - ht.cols(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } -} - -static const highbd_transform_2d HIGH_IHT_16[] = { - { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 - { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 - { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 - { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 -}; - -void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int tx_type, int bd) { - int i, j; - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - tran_low_t temp_in[16], temp_out[16]; - const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - for (i = 0; i < 16; ++i) { - ht.rows(input, outptr, bd); - input += 16; - outptr += 16; - } - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - ht.cols(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -// idct -void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - if (eob > 1) - vpx_highbd_idct4x4_16_add(input, dest, stride, bd); - else - vpx_highbd_idct4x4_1_add(input, dest, stride, bd); -} - - -void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - if (eob > 1) - vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); - else - vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); -} - -void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - // DC only DCT coefficient - if (eob == 1) { - vpx_highbd_idct8x8_1_add(input, dest, stride, bd); - } else if (eob <= 10) { - vpx_highbd_idct8x8_10_add(input, dest, stride, bd); - } else { - vpx_highbd_idct8x8_64_add(input, dest, stride, bd); - } -} - -void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd) { - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to separate different cases. - // DC only DCT coefficient. - if (eob == 1) { - vpx_highbd_idct16x16_1_add(input, dest, stride, bd); - } else if (eob <= 10) { - vpx_highbd_idct16x16_10_add(input, dest, stride, bd); - } else { - vpx_highbd_idct16x16_256_add(input, dest, stride, bd); - } -} - -void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd) { - // Non-zero coeff only in upper-left 8x8 - if (eob == 1) { - vpx_highbd_idct32x32_1_add(input, dest, stride, bd); - } else if (eob <= 34) { - vpx_highbd_idct32x32_34_add(input, dest, stride, bd); - } else { - vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); - } -} - -// iht -void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd) { - if (tx_type == DCT_DCT) - vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); - else - vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); -} - -void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd) { - if (tx_type == DCT_DCT) { - vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); - } else { - vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); - } -} - -void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd) { - if (tx_type == DCT_DCT) { - vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); - } else { - vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.h b/thirdparty/libvpx/vp9/common/vp9_idct.h deleted file mode 100644 index b5a3fbf362..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_idct.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_IDCT_H_ -#define VP9_COMMON_VP9_IDCT_H_ - -#include <assert.h> - -#include "./vpx_config.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_enums.h" -#include "vpx_dsp/inv_txfm.h" -#include "vpx_dsp/txfm_common.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); - -typedef struct { - transform_1d cols, rows; // vertical and horizontal -} transform_2d; - -#if CONFIG_VP9_HIGHBITDEPTH -typedef void (*highbd_transform_1d)(const tran_low_t*, tran_low_t*, int bd); - -typedef struct { - highbd_transform_1d cols, rows; // vertical and horizontal -} highbd_transform_2d; -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); -void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob); - -void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob); -void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob); -void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, - int stride, int eob); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd); -void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd); -void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd); -void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd); -void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, - uint8_t *dest, int stride, int eob, int bd); -#endif // CONFIG_VP9_HIGHBITDEPTH -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.c b/thirdparty/libvpx/vp9/common/vp9_loopfilter.c deleted file mode 100644 index 183dec4e71..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_loopfilter.c +++ /dev/null @@ -1,1697 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_reconinter.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_seg_common.h" - -// 64 bit masks for left transform size. Each 1 represents a position where -// we should apply a loop filter across the left border of an 8x8 block -// boundary. -// -// In the case of TX_16X16-> ( in low order byte first we end up with -// a mask that looks like this -// -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// 10101010 -// -// A loopfilter should be applied to every other 8x8 horizontally. -static const uint64_t left_64x64_txform_mask[TX_SIZES]= { - 0xffffffffffffffffULL, // TX_4X4 - 0xffffffffffffffffULL, // TX_8x8 - 0x5555555555555555ULL, // TX_16x16 - 0x1111111111111111ULL, // TX_32x32 -}; - -// 64 bit masks for above transform size. Each 1 represents a position where -// we should apply a loop filter across the top border of an 8x8 block -// boundary. -// -// In the case of TX_32x32 -> ( in low order byte first we end up with -// a mask that looks like this -// -// 11111111 -// 00000000 -// 00000000 -// 00000000 -// 11111111 -// 00000000 -// 00000000 -// 00000000 -// -// A loopfilter should be applied to every other 4 the row vertically. -static const uint64_t above_64x64_txform_mask[TX_SIZES]= { - 0xffffffffffffffffULL, // TX_4X4 - 0xffffffffffffffffULL, // TX_8x8 - 0x00ff00ff00ff00ffULL, // TX_16x16 - 0x000000ff000000ffULL, // TX_32x32 -}; - -// 64 bit masks for prediction sizes (left). Each 1 represents a position -// where left border of an 8x8 block. These are aligned to the right most -// appropriate bit, and then shifted into place. -// -// In the case of TX_16x32 -> ( low order byte first ) we end up with -// a mask that looks like this : -// -// 10000000 -// 10000000 -// 10000000 -// 10000000 -// 00000000 -// 00000000 -// 00000000 -// 00000000 -static const uint64_t left_prediction_mask[BLOCK_SIZES] = { - 0x0000000000000001ULL, // BLOCK_4X4, - 0x0000000000000001ULL, // BLOCK_4X8, - 0x0000000000000001ULL, // BLOCK_8X4, - 0x0000000000000001ULL, // BLOCK_8X8, - 0x0000000000000101ULL, // BLOCK_8X16, - 0x0000000000000001ULL, // BLOCK_16X8, - 0x0000000000000101ULL, // BLOCK_16X16, - 0x0000000001010101ULL, // BLOCK_16X32, - 0x0000000000000101ULL, // BLOCK_32X16, - 0x0000000001010101ULL, // BLOCK_32X32, - 0x0101010101010101ULL, // BLOCK_32X64, - 0x0000000001010101ULL, // BLOCK_64X32, - 0x0101010101010101ULL, // BLOCK_64X64 -}; - -// 64 bit mask to shift and set for each prediction size. -static const uint64_t above_prediction_mask[BLOCK_SIZES] = { - 0x0000000000000001ULL, // BLOCK_4X4 - 0x0000000000000001ULL, // BLOCK_4X8 - 0x0000000000000001ULL, // BLOCK_8X4 - 0x0000000000000001ULL, // BLOCK_8X8 - 0x0000000000000001ULL, // BLOCK_8X16, - 0x0000000000000003ULL, // BLOCK_16X8 - 0x0000000000000003ULL, // BLOCK_16X16 - 0x0000000000000003ULL, // BLOCK_16X32, - 0x000000000000000fULL, // BLOCK_32X16, - 0x000000000000000fULL, // BLOCK_32X32, - 0x000000000000000fULL, // BLOCK_32X64, - 0x00000000000000ffULL, // BLOCK_64X32, - 0x00000000000000ffULL, // BLOCK_64X64 -}; -// 64 bit mask to shift and set for each prediction size. A bit is set for -// each 8x8 block that would be in the left most block of the given block -// size in the 64x64 block. -static const uint64_t size_mask[BLOCK_SIZES] = { - 0x0000000000000001ULL, // BLOCK_4X4 - 0x0000000000000001ULL, // BLOCK_4X8 - 0x0000000000000001ULL, // BLOCK_8X4 - 0x0000000000000001ULL, // BLOCK_8X8 - 0x0000000000000101ULL, // BLOCK_8X16, - 0x0000000000000003ULL, // BLOCK_16X8 - 0x0000000000000303ULL, // BLOCK_16X16 - 0x0000000003030303ULL, // BLOCK_16X32, - 0x0000000000000f0fULL, // BLOCK_32X16, - 0x000000000f0f0f0fULL, // BLOCK_32X32, - 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, - 0x00000000ffffffffULL, // BLOCK_64X32, - 0xffffffffffffffffULL, // BLOCK_64X64 -}; - -// These are used for masking the left and above borders. -static const uint64_t left_border = 0x1111111111111111ULL; -static const uint64_t above_border = 0x000000ff000000ffULL; - -// 16 bit masks for uv transform sizes. -static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= { - 0xffff, // TX_4X4 - 0xffff, // TX_8x8 - 0x5555, // TX_16x16 - 0x1111, // TX_32x32 -}; - -static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= { - 0xffff, // TX_4X4 - 0xffff, // TX_8x8 - 0x0f0f, // TX_16x16 - 0x000f, // TX_32x32 -}; - -// 16 bit left mask to shift and set for each uv prediction size. -static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { - 0x0001, // BLOCK_4X4, - 0x0001, // BLOCK_4X8, - 0x0001, // BLOCK_8X4, - 0x0001, // BLOCK_8X8, - 0x0001, // BLOCK_8X16, - 0x0001, // BLOCK_16X8, - 0x0001, // BLOCK_16X16, - 0x0011, // BLOCK_16X32, - 0x0001, // BLOCK_32X16, - 0x0011, // BLOCK_32X32, - 0x1111, // BLOCK_32X64 - 0x0011, // BLOCK_64X32, - 0x1111, // BLOCK_64X64 -}; -// 16 bit above mask to shift and set for uv each prediction size. -static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { - 0x0001, // BLOCK_4X4 - 0x0001, // BLOCK_4X8 - 0x0001, // BLOCK_8X4 - 0x0001, // BLOCK_8X8 - 0x0001, // BLOCK_8X16, - 0x0001, // BLOCK_16X8 - 0x0001, // BLOCK_16X16 - 0x0001, // BLOCK_16X32, - 0x0003, // BLOCK_32X16, - 0x0003, // BLOCK_32X32, - 0x0003, // BLOCK_32X64, - 0x000f, // BLOCK_64X32, - 0x000f, // BLOCK_64X64 -}; - -// 64 bit mask to shift and set for each uv prediction size -static const uint16_t size_mask_uv[BLOCK_SIZES] = { - 0x0001, // BLOCK_4X4 - 0x0001, // BLOCK_4X8 - 0x0001, // BLOCK_8X4 - 0x0001, // BLOCK_8X8 - 0x0001, // BLOCK_8X16, - 0x0001, // BLOCK_16X8 - 0x0001, // BLOCK_16X16 - 0x0011, // BLOCK_16X32, - 0x0003, // BLOCK_32X16, - 0x0033, // BLOCK_32X32, - 0x3333, // BLOCK_32X64, - 0x00ff, // BLOCK_64X32, - 0xffff, // BLOCK_64X64 -}; -static const uint16_t left_border_uv = 0x1111; -static const uint16_t above_border_uv = 0x000f; - -static const int mode_lf_lut[MB_MODE_COUNT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES - 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) -}; - -static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { - int lvl; - - // For each possible value for the loop filter fill out limits - for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { - // Set loop filter parameters that control sharpness. - int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); - - if (sharpness_lvl > 0) { - if (block_inside_limit > (9 - sharpness_lvl)) - block_inside_limit = (9 - sharpness_lvl); - } - - if (block_inside_limit < 1) - block_inside_limit = 1; - - memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); - memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), - SIMD_WIDTH); - } -} - -static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, - const MODE_INFO *mi) { - return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]] - [mode_lf_lut[mi->mode]]; -} - -void vp9_loop_filter_init(VP9_COMMON *cm) { - loop_filter_info_n *lfi = &cm->lf_info; - struct loopfilter *lf = &cm->lf; - int lvl; - - // init limits for given sharpness - update_sharpness(lfi, lf->sharpness_level); - lf->last_sharpness_level = lf->sharpness_level; - - // init hev threshold const vectors - for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) - memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); -} - -void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { - int seg_id; - // n_shift is the multiplier for lf_deltas - // the multiplier is 1 for when filter_lvl is between 0 and 31; - // 2 when filter_lvl is between 32 and 63 - const int scale = 1 << (default_filt_lvl >> 5); - loop_filter_info_n *const lfi = &cm->lf_info; - struct loopfilter *const lf = &cm->lf; - const struct segmentation *const seg = &cm->seg; - - // update limits if sharpness has changed - if (lf->last_sharpness_level != lf->sharpness_level) { - update_sharpness(lfi, lf->sharpness_level); - lf->last_sharpness_level = lf->sharpness_level; - } - - for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { - int lvl_seg = default_filt_lvl; - if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { - const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); - lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? - data : default_filt_lvl + data, - 0, MAX_LOOP_FILTER); - } - - if (!lf->mode_ref_delta_enabled) { - // we could get rid of this if we assume that deltas are set to - // zero when not in use; encoder always uses deltas - memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); - } else { - int ref, mode; - const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; - lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); - - for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { - for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { - const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale - + lf->mode_deltas[mode] * scale; - lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); - } - } - } - } -} - -static void filter_selectively_vert_row2(int subsampling_factor, - uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl) { - const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; - const int lfl_forward = subsampling_factor ? 4 : 8; - const unsigned int dual_one = 1 | (1 << lfl_forward); - unsigned int mask; - uint8_t *ss[2]; - ss[0] = s; - - for (mask = - (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; - mask; mask = (mask & ~dual_one) >> 1) { - if (mask & dual_one) { - const loop_filter_thresh *lfis[2]; - lfis[0] = lfthr + *lfl; - lfis[1] = lfthr + *(lfl + lfl_forward); - ss[1] = ss[0] + 8 * pitch; - - if (mask_16x16 & dual_one) { - if ((mask_16x16 & dual_one) == dual_one) { - vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, - lfis[0]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; - vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr); - } - } - - if (mask_8x8 & dual_one) { - if ((mask_8x8 & dual_one) == dual_one) { - vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, - lfis[0]->hev_thr, lfis[1]->mblim, - lfis[1]->lim, lfis[1]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; - vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } - - if (mask_4x4 & dual_one) { - if ((mask_4x4 & dual_one) == dual_one) { - vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, - lfis[0]->hev_thr, lfis[1]->mblim, - lfis[1]->lim, lfis[1]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; - vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } - - if (mask_4x4_int & dual_one) { - if ((mask_4x4_int & dual_one) == dual_one) { - vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; - vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr); - } - } - } - - ss[0] += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert_row2(int subsampling_factor, - uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl, int bd) { - const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; - const int lfl_forward = subsampling_factor ? 4 : 8; - const unsigned int dual_one = 1 | (1 << lfl_forward); - unsigned int mask; - uint16_t *ss[2]; - ss[0] = s; - - for (mask = - (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; - mask; mask = (mask & ~dual_one) >> 1) { - if (mask & dual_one) { - const loop_filter_thresh *lfis[2]; - lfis[0] = lfthr + *lfl; - lfis[1] = lfthr + *(lfl + lfl_forward); - ss[1] = ss[0] + 8 * pitch; - - if (mask_16x16 & dual_one) { - if ((mask_16x16 & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; - vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - - if (mask_8x8 & dual_one) { - if ((mask_8x8 & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; - vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - - if (mask_4x4 & dual_one) { - if ((mask_4x4 & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; - vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - - if (mask_4x4_int & dual_one) { - if ((mask_4x4_int & dual_one) == dual_one) { - vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, - lfis[0]->lim, lfis[0]->hev_thr, - lfis[1]->mblim, lfis[1]->lim, - lfis[1]->hev_thr, bd); - } else { - const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; - vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, - lfi->mblim, lfi->lim, lfi->hev_thr, bd); - } - } - } - - ss[0] += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void filter_selectively_horiz(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl) { - unsigned int mask; - int count; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= count) { - count = 1; - if (mask & 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask_16x16 & 1) { - if ((mask_16x16 & 3) == 3) { - vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - count = 2; - } else { - vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } else if (mask_8x8 & 1) { - if ((mask_8x8 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - - if ((mask_4x4_int & 3) == 3) { - vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr); - } else { - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - else if (mask_4x4_int & 2) - vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr); - } - count = 2; - } else { - vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } else if (mask_4x4 & 1) { - if ((mask_4x4 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr); - if ((mask_4x4_int & 3) == 3) { - vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, lfin->mblim, - lfin->lim, lfin->hev_thr); - } else { - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - else if (mask_4x4_int & 2) - vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr); - } - count = 2; - } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - - if (mask_4x4_int & 1) - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } else { - vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); - } - } - s += 8 * count; - lfl += count; - mask_16x16 >>= count; - mask_8x8 >>= count; - mask_4x4 >>= count; - mask_4x4_int >>= count; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl, int bd) { - unsigned int mask; - int count; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= count) { - count = 1; - if (mask & 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask_16x16 & 1) { - if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - count = 2; - } else { - vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } else if (mask_8x8 & 1) { - if ((mask_8x8 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - - if ((mask_4x4_int & 3) == 3) { - vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - } else { - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } else if (mask_4x4_int & 2) { - vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } - } - count = 2; - } else { - vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - } else if (mask_4x4 & 1) { - if ((mask_4x4 & 3) == 3) { - // Next block's thresholds. - const loop_filter_thresh *lfin = lfthr + *(lfl + 1); - - vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - if ((mask_4x4_int & 3) == 3) { - vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr, bd); - } else { - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } else if (mask_4x4_int & 2) { - vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, bd); - } - } - count = 2; - } else { - vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - - if (mask_4x4_int & 1) { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, bd); - } - } - } else { - vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } - s += 8 * count; - lfl += count; - mask_16x16 >>= count; - mask_8x8 >>= count; - mask_4x4 >>= count; - mask_4x4_int >>= count; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -// This function ors into the current lfm structure, where to do loop -// filters for the specific mi we are looking at. It uses information -// including the block_size_type (32x16, 32x32, etc.), the transform size, -// whether there were any coefficients encoded, and the loop filter strength -// block we are currently looking at. Shift is used to position the -// 1's we produce. -static void build_masks(const loop_filter_info_n *const lfi_n, - const MODE_INFO *mi, const int shift_y, - const int shift_uv, - LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->sb_type; - const TX_SIZE tx_size_y = mi->tx_size; - const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); - const int filter_level = get_filter_level(lfi_n, mi); - uint64_t *const left_y = &lfm->left_y[tx_size_y]; - uint64_t *const above_y = &lfm->above_y[tx_size_y]; - uint64_t *const int_4x4_y = &lfm->int_4x4_y; - uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; - uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; - uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; - int i; - - // If filter level is 0 we don't loop filter. - if (!filter_level) { - return; - } else { - const int w = num_8x8_blocks_wide_lookup[block_size]; - const int h = num_8x8_blocks_high_lookup[block_size]; - int index = shift_y; - for (i = 0; i < h; i++) { - memset(&lfm->lfl_y[index], filter_level, w); - index += 8; - } - } - - // These set 1 in the current block size for the block size edges. - // For instance if the block size is 32x16, we'll set: - // above = 1111 - // 0000 - // and - // left = 1000 - // = 1000 - // NOTE : In this example the low bit is left most ( 1000 ) is stored as - // 1, not 8... - // - // U and V set things on a 16 bit scale. - // - *above_y |= above_prediction_mask[block_size] << shift_y; - *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; - *left_y |= left_prediction_mask[block_size] << shift_y; - *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; - - // If the block has no coefficients and is not intra we skip applying - // the loop filter on block edges. - if (mi->skip && is_inter_block(mi)) - return; - - // Here we are adding a mask for the transform size. The transform - // size mask is set to be correct for a 64x64 prediction block size. We - // mask to match the size of the block we are working on and then shift it - // into place.. - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - *above_uv |= (size_mask_uv[block_size] & - above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - *left_uv |= (size_mask_uv[block_size] & - left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - - // Here we are trying to determine what to do with the internal 4x4 block - // boundaries. These differ from the 4x4 boundaries on the outside edge of - // an 8x8 in that the internal ones can be skipped and don't depend on - // the prediction block size. - if (tx_size_y == TX_4X4) - *int_4x4_y |= size_mask[block_size] << shift_y; - - if (tx_size_uv == TX_4X4) - *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; -} - -// This function does the same thing as the one above with the exception that -// it only affects the y masks. It exists because for blocks < 16x16 in size, -// we only update u and v masks on the first block. -static void build_y_mask(const loop_filter_info_n *const lfi_n, - const MODE_INFO *mi, const int shift_y, - LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->sb_type; - const TX_SIZE tx_size_y = mi->tx_size; - const int filter_level = get_filter_level(lfi_n, mi); - uint64_t *const left_y = &lfm->left_y[tx_size_y]; - uint64_t *const above_y = &lfm->above_y[tx_size_y]; - uint64_t *const int_4x4_y = &lfm->int_4x4_y; - int i; - - if (!filter_level) { - return; - } else { - const int w = num_8x8_blocks_wide_lookup[block_size]; - const int h = num_8x8_blocks_high_lookup[block_size]; - int index = shift_y; - for (i = 0; i < h; i++) { - memset(&lfm->lfl_y[index], filter_level, w); - index += 8; - } - } - - *above_y |= above_prediction_mask[block_size] << shift_y; - *left_y |= left_prediction_mask[block_size] << shift_y; - - if (mi->skip && is_inter_block(mi)) - return; - - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - - if (tx_size_y == TX_4X4) - *int_4x4_y |= size_mask[block_size] << shift_y; -} - -void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, - const int mi_col, LOOP_FILTER_MASK *lfm) { - int i; - - // The largest loopfilter we have is 16x16 so we use the 16x16 mask - // for 32x32 transforms also. - lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; - lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; - lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; - lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; - - // We do at least 8 tap filter on every 32x32 even if the transform size - // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and - // remove it from the 4x4. - lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; - lfm->left_y[TX_4X4] &= ~left_border; - lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; - lfm->above_y[TX_4X4] &= ~above_border; - lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; - lfm->left_uv[TX_4X4] &= ~left_border_uv; - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; - lfm->above_uv[TX_4X4] &= ~above_border_uv; - - // We do some special edge handling. - if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { - const uint64_t rows = cm->mi_rows - mi_row; - - // Each pixel inside the border gets a 1, - const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); - const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); - - // Remove values completely outside our border. - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= mask_y; - lfm->above_y[i] &= mask_y; - lfm->left_uv[i] &= mask_uv; - lfm->above_uv[i] &= mask_uv; - } - lfm->int_4x4_y &= mask_y; - lfm->int_4x4_uv &= mask_uv; - - // We don't apply a wide loop filter on the last uv block row. If set - // apply the shorter one instead. - if (rows == 1) { - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; - lfm->above_uv[TX_16X16] = 0; - } - if (rows == 5) { - lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; - lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); - } - } - - if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { - const uint64_t columns = cm->mi_cols - mi_col; - - // Each pixel inside the border gets a 1, the multiply copies the border - // to where we need it. - const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; - const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; - - // Internal edges are not applied on the last column of the image so - // we mask 1 more for the internal edges - const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; - - // Remove the bits outside the image edge. - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= mask_y; - lfm->above_y[i] &= mask_y; - lfm->left_uv[i] &= mask_uv; - lfm->above_uv[i] &= mask_uv; - } - lfm->int_4x4_y &= mask_y; - lfm->int_4x4_uv &= mask_uv_int; - - // We don't apply a wide loop filter on the last uv column. If set - // apply the shorter one instead. - if (columns == 1) { - lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; - lfm->left_uv[TX_16X16] = 0; - } - if (columns == 5) { - lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); - lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); - } - } - // We don't apply a loop filter on the first column in the image, mask that - // out. - if (mi_col == 0) { - for (i = 0; i < TX_32X32; i++) { - lfm->left_y[i] &= 0xfefefefefefefefeULL; - lfm->left_uv[i] &= 0xeeee; - } - } - - // Assert if we try to apply 2 different loop filters at the same position. - assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); - assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); - assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); - assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); - assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); - assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); - assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); - assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); - assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); - assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); - assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); - assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); - assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); - assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); - assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); - assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); -} - -// This function sets up the bit masks for the entire 64x64 region represented -// by mi_row, mi_col. -void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi, const int mode_info_stride, - LOOP_FILTER_MASK *lfm) { - int idx_32, idx_16, idx_8; - const loop_filter_info_n *const lfi_n = &cm->lf_info; - MODE_INFO **mip = mi; - MODE_INFO **mip2 = mi; - - // These are offsets to the next mi in the 64x64 block. It is what gets - // added to the mi ptr as we go through each loop. It helps us to avoid - // setting up special row and column counters for each index. The last step - // brings us out back to the starting position. - const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, - -(mode_info_stride << 2) - 4}; - const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, - -(mode_info_stride << 1) - 2}; - const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; - - // Following variables represent shifts to position the current block - // mask over the appropriate block. A shift of 36 to the left will move - // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left - // 4 rows to the appropriate spot. - const int shift_32_y[] = {0, 4, 32, 36}; - const int shift_16_y[] = {0, 2, 16, 18}; - const int shift_8_y[] = {0, 1, 8, 9}; - const int shift_32_uv[] = {0, 2, 8, 10}; - const int shift_16_uv[] = {0, 1, 4, 5}; - const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? - cm->mi_rows - mi_row : MI_BLOCK_SIZE); - const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? - cm->mi_cols - mi_col : MI_BLOCK_SIZE); - - vp9_zero(*lfm); - assert(mip[0] != NULL); - - switch (mip[0]->sb_type) { - case BLOCK_64X64: - build_masks(lfi_n, mip[0] , 0, 0, lfm); - break; - case BLOCK_64X32: - build_masks(lfi_n, mip[0], 0, 0, lfm); - mip2 = mip + mode_info_stride * 4; - if (4 >= max_rows) - break; - build_masks(lfi_n, mip2[0], 32, 8, lfm); - break; - case BLOCK_32X64: - build_masks(lfi_n, mip[0], 0, 0, lfm); - mip2 = mip + 4; - if (4 >= max_cols) - break; - build_masks(lfi_n, mip2[0], 4, 2, lfm); - break; - default: - for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { - const int shift_y = shift_32_y[idx_32]; - const int shift_uv = shift_32_uv[idx_32]; - const int mi_32_col_offset = ((idx_32 & 1) << 2); - const int mi_32_row_offset = ((idx_32 >> 1) << 2); - if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) - continue; - switch (mip[0]->sb_type) { - case BLOCK_32X32: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - break; - case BLOCK_32X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_32_row_offset + 2 >= max_rows) - continue; - mip2 = mip + mode_info_stride * 2; - build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); - break; - case BLOCK_16X32: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_32_col_offset + 2 >= max_cols) - continue; - mip2 = mip + 2; - build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); - break; - default: - for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { - const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; - const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; - const int mi_16_col_offset = mi_32_col_offset + - ((idx_16 & 1) << 1); - const int mi_16_row_offset = mi_32_row_offset + - ((idx_16 >> 1) << 1); - - if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) - continue; - - switch (mip[0]->sb_type) { - case BLOCK_16X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - break; - case BLOCK_16X8: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_16_row_offset + 1 >= max_rows) - continue; - mip2 = mip + mode_info_stride; - build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); - break; - case BLOCK_8X16: - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - if (mi_16_col_offset +1 >= max_cols) - continue; - mip2 = mip + 1; - build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); - break; - default: { - const int shift_y = shift_32_y[idx_32] + - shift_16_y[idx_16] + - shift_8_y[0]; - build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); - mip += offset[0]; - for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { - const int shift_y = shift_32_y[idx_32] + - shift_16_y[idx_16] + - shift_8_y[idx_8]; - const int mi_8_col_offset = mi_16_col_offset + - ((idx_8 & 1)); - const int mi_8_row_offset = mi_16_row_offset + - ((idx_8 >> 1)); - - if (mi_8_col_offset >= max_cols || - mi_8_row_offset >= max_rows) - continue; - build_y_mask(lfi_n, mip[0], shift_y, lfm); - } - break; - } - } - } - break; - } - } - break; - } -} - -static void filter_selectively_vert(uint8_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl) { - unsigned int mask; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask & 1) { - if (mask_16x16 & 1) { - vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } else if (mask_8x8 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - } - } - if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); - s += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_filter_selectively_vert(uint16_t *s, int pitch, - unsigned int mask_16x16, - unsigned int mask_8x8, - unsigned int mask_4x4, - unsigned int mask_4x4_int, - const loop_filter_thresh *lfthr, - const uint8_t *lfl, int bd) { - unsigned int mask; - - for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; - mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfthr + *lfl; - - if (mask & 1) { - if (mask_16x16 & 1) { - vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } else if (mask_8x8 & 1) { - vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } else if (mask_4x4 & 1) { - vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - } - } - if (mask_4x4_int & 1) - vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, bd); - s += 8; - lfl += 1; - mask_16x16 >>= 1; - mask_8x8 >>= 1; - mask_4x4 >>= 1; - mask_4x4_int >>= 1; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_filter_block_plane_non420(VP9_COMMON *cm, - struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, - int mi_row, int mi_col) { - const int ss_x = plane->subsampling_x; - const int ss_y = plane->subsampling_y; - const int row_step = 1 << ss_y; - const int col_step = 1 << ss_x; - const int row_step_stride = cm->mi_stride * row_step; - struct buf_2d *const dst = &plane->dst; - uint8_t* const dst0 = dst->buf; - unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; - unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; - unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; - unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; - uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; - int r, c; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { - unsigned int mask_16x16_c = 0; - unsigned int mask_8x8_c = 0; - unsigned int mask_4x4_c = 0; - unsigned int border_mask; - - // Determine the vertical edges that need filtering - for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { - const MODE_INFO *mi = mi_8x8[c]; - const BLOCK_SIZE sb_type = mi[0].sb_type; - const int skip_this = mi[0].skip && is_inter_block(mi); - // left edge of current unit is block/partition edge -> no skip - const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? - !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; - const int skip_this_c = skip_this && !block_edge_left; - // top edge of current unit is block/partition edge -> no skip - const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? - !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; - const int skip_this_r = skip_this && !block_edge_above; - const TX_SIZE tx_size = get_uv_tx_size(mi, plane); - const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; - const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; - - // Filter level can vary per MI - if (!(lfl[(r << 3) + (c >> ss_x)] = - get_filter_level(&cm->lf_info, mi))) - continue; - - // Build masks based on the transform size of each block - if (tx_size == TX_32X32) { - if (!skip_this_c && ((c >> ss_x) & 3) == 0) { - if (!skip_border_4x4_c) - mask_16x16_c |= 1 << (c >> ss_x); - else - mask_8x8_c |= 1 << (c >> ss_x); - } - if (!skip_this_r && ((r >> ss_y) & 3) == 0) { - if (!skip_border_4x4_r) - mask_16x16[r] |= 1 << (c >> ss_x); - else - mask_8x8[r] |= 1 << (c >> ss_x); - } - } else if (tx_size == TX_16X16) { - if (!skip_this_c && ((c >> ss_x) & 1) == 0) { - if (!skip_border_4x4_c) - mask_16x16_c |= 1 << (c >> ss_x); - else - mask_8x8_c |= 1 << (c >> ss_x); - } - if (!skip_this_r && ((r >> ss_y) & 1) == 0) { - if (!skip_border_4x4_r) - mask_16x16[r] |= 1 << (c >> ss_x); - else - mask_8x8[r] |= 1 << (c >> ss_x); - } - } else { - // force 8x8 filtering on 32x32 boundaries - if (!skip_this_c) { - if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) - mask_8x8_c |= 1 << (c >> ss_x); - else - mask_4x4_c |= 1 << (c >> ss_x); - } - - if (!skip_this_r) { - if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) - mask_8x8[r] |= 1 << (c >> ss_x); - else - mask_4x4[r] |= 1 << (c >> ss_x); - } - - if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) - mask_4x4_int[r] |= 1 << (c >> ss_x); - } - } - - // Disable filtering on the leftmost column - border_mask = ~(mi_col == 0); -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - cm->lf_info.lfthr, &lfl[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - cm->lf_info.lfthr, &lfl[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - mi_8x8 += row_step_stride; - } - - // Now do horizontal pass - dst->buf = dst0; - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { - const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; - const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; - - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16[r]; - mask_8x8_r = mask_8x8[r]; - mask_4x4_r = mask_4x4[r]; - } -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - cm->lf_info.lfthr, &lfl[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - cm->lf_info.lfthr, &lfl[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 8 * dst->stride; - } -} - -void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { - struct buf_2d *const dst = &plane->dst; - uint8_t *const dst0 = dst->buf; - int r; - uint64_t mask_16x16 = lfm->left_y[TX_16X16]; - uint64_t mask_8x8 = lfm->left_y[TX_8X8]; - uint64_t mask_4x4 = lfm->left_y[TX_4X4]; - uint64_t mask_4x4_int = lfm->int_4x4_y; - - assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); - - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - // Disable filtering on the leftmost column. -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->subsampling_x, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, - &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - dst->buf += 16 * dst->stride; - mask_16x16 >>= 16; - mask_8x8 >>= 16; - mask_4x4 >>= 16; - mask_4x4_int >>= 16; - } - - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_y[TX_16X16]; - mask_8x8 = lfm->above_y[TX_8X8]; - mask_4x4 = lfm->above_y[TX_4X4]; - mask_4x4_int = lfm->int_4x4_y; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xff; - mask_8x8_r = mask_8x8 & 0xff; - mask_4x4_r = mask_4x4 & 0xff; - } - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, - cm->lf_info.lfthr, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, - cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 8 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; - } -} - -void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm) { - struct buf_2d *const dst = &plane->dst; - uint8_t *const dst0 = dst->buf; - int r, c; - uint8_t lfl_uv[16]; - - uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; - uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; - uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; - uint16_t mask_4x4_int = lfm->int_4x4_uv; - - assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); - - // Vertical pass: do 2 rows at one time - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { - for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { - lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; - lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; - } - - // Disable filtering on the leftmost column. -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2(plane->subsampling_x, - CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, &lfl_uv[r << 1], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, - (unsigned int)mask_16x16, - (unsigned int)mask_8x8, - (unsigned int)mask_4x4, - (unsigned int)mask_4x4_int, - cm->lf_info.lfthr, &lfl_uv[r << 1]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; - } - - // Horizontal pass - dst->buf = dst0; - mask_16x16 = lfm->above_uv[TX_16X16]; - mask_8x8 = lfm->above_uv[TX_8X8]; - mask_4x4 = lfm->above_uv[TX_4X4]; - mask_4x4_int = lfm->int_4x4_uv; - - for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; - const unsigned int mask_4x4_int_r = - skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); - unsigned int mask_16x16_r; - unsigned int mask_8x8_r; - unsigned int mask_4x4_r; - - if (mi_row + r == 0) { - mask_16x16_r = 0; - mask_8x8_r = 0; - mask_4x4_r = 0; - } else { - mask_16x16_r = mask_16x16 & 0xf; - mask_8x8_r = mask_8x8 & 0xf; - mask_4x4_r = mask_4x4 & 0xf; - } - -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), - dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, - cm->lf_info.lfthr, &lfl_uv[r << 1], - (int)cm->bit_depth); - } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, - &lfl_uv[r << 1]); -#if CONFIG_VP9_HIGHBITDEPTH - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 8 * dst->stride; - mask_16x16 >>= 4; - mask_8x8 >>= 4; - mask_4x4 >>= 4; - mask_4x4_int >>= 4; - } -} - -static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only) { - const int num_planes = y_only ? 1 : MAX_MB_PLANE; - enum lf_path path; - int mi_row, mi_col; - - if (y_only) - path = LF_PATH_444; - else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) - path = LF_PATH_420; - else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) - path = LF_PATH_444; - else - path = LF_PATH_SLOW; - - for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); - - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { - int plane; - - vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - - // TODO(jimbankoski): For 444 only need to do y mask. - vp9_adjust_mask(cm, mi_row, mi_col, lfm); - - vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); - for (plane = 1; plane < num_planes; ++plane) { - switch (path) { - case LF_PATH_420: - vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_444: - vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_SLOW: - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); - break; - } - } - } - } -} - -void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, - VP9_COMMON *cm, MACROBLOCKD *xd, - int frame_filter_level, - int y_only, int partial_frame) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - if (!frame_filter_level) return; - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); -} - -// Used by the encoder to build the loopfilter masks. -// TODO(slavarnway): Do the encoder the same way the decoder does it and -// build the masks in line as part of the encode process. -void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, - int partial_frame) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - int mi_col, mi_row; - if (!frame_filter_level) return; - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - - vp9_loop_filter_frame_init(cm, frame_filter_level); - - for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - // vp9_setup_mask() zeros lfm - vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, - get_lfm(&cm->lf, mi_row, mi_col)); - } - } -} - -// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 -// or greater area. -static const uint8_t first_block_in_16x16[8][8] = { - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {1, 0, 1, 0, 1, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0} -}; - -// This function sets up the bit masks for a block represented -// by mi_row, mi_col in a 64x64 region. -// TODO(SJL): This function only works for yv12. -void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, - int mi_col, int bw, int bh) { - const BLOCK_SIZE block_size = mi->sb_type; - const TX_SIZE tx_size_y = mi->tx_size; - const loop_filter_info_n *const lfi_n = &cm->lf_info; - const int filter_level = get_filter_level(lfi_n, mi); - const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); - LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); - uint64_t *const left_y = &lfm->left_y[tx_size_y]; - uint64_t *const above_y = &lfm->above_y[tx_size_y]; - uint64_t *const int_4x4_y = &lfm->int_4x4_y; - uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; - uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; - uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; - const int row_in_sb = (mi_row & 7); - const int col_in_sb = (mi_col & 7); - const int shift_y = col_in_sb + (row_in_sb << 3); - const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); - const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; - - if (!filter_level) { - return; - } else { - int index = shift_y; - int i; - for (i = 0; i < bh; i++) { - memset(&lfm->lfl_y[index], filter_level, bw); - index += 8; - } - } - - // These set 1 in the current block size for the block size edges. - // For instance if the block size is 32x16, we'll set: - // above = 1111 - // 0000 - // and - // left = 1000 - // = 1000 - // NOTE : In this example the low bit is left most ( 1000 ) is stored as - // 1, not 8... - // - // U and V set things on a 16 bit scale. - // - *above_y |= above_prediction_mask[block_size] << shift_y; - *left_y |= left_prediction_mask[block_size] << shift_y; - - if (build_uv) { - *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; - *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; - } - - // If the block has no coefficients and is not intra we skip applying - // the loop filter on block edges. - if (mi->skip && is_inter_block(mi)) - return; - - // Add a mask for the transform size. The transform size mask is set to - // be correct for a 64x64 prediction block size. Mask to match the size of - // the block we are working on and then shift it into place. - *above_y |= (size_mask[block_size] & - above_64x64_txform_mask[tx_size_y]) << shift_y; - *left_y |= (size_mask[block_size] & - left_64x64_txform_mask[tx_size_y]) << shift_y; - - if (build_uv) { - *above_uv |= (size_mask_uv[block_size] & - above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - - *left_uv |= (size_mask_uv[block_size] & - left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; - } - - // Try to determine what to do with the internal 4x4 block boundaries. These - // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the - // internal ones can be skipped and don't depend on the prediction block size. - if (tx_size_y == TX_4X4) - *int_4x4_y |= size_mask[block_size] << shift_y; - - if (build_uv && tx_size_uv == TX_4X4) - *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; -} - -void vp9_loop_filter_data_reset( - LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, - struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { - lf_data->frame_buffer = frame_buffer; - lf_data->cm = cm; - lf_data->start = 0; - lf_data->stop = 0; - lf_data->y_only = 0; - memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); -} - -void vp9_reset_lfm(VP9_COMMON *const cm) { - if (cm->lf.filter_level) { - memset(cm->lf.lfm, 0, - ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * - sizeof(*cm->lf.lfm)); - } -} - -int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { - (void)unused; - loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only); - return 1; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.h b/thirdparty/libvpx/vp9/common/vp9_loopfilter.h deleted file mode 100644 index fca8830fa1..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_loopfilter.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_LOOPFILTER_H_ -#define VP9_COMMON_VP9_LOOPFILTER_H_ - -#include "vpx_ports/mem.h" -#include "./vpx_config.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_seg_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_LOOP_FILTER 63 -#define MAX_SHARPNESS 7 - -#define SIMD_WIDTH 16 - -#define MAX_REF_LF_DELTAS 4 -#define MAX_MODE_LF_DELTAS 2 - -enum lf_path { - LF_PATH_420, - LF_PATH_444, - LF_PATH_SLOW, -}; - -// Need to align this structure so when it is declared and -// passed it can be loaded into vector registers. -typedef struct { - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]); - DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]); -} loop_filter_thresh; - -typedef struct { - loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; - uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; -} loop_filter_info_n; - -// This structure holds bit masks for all 8x8 blocks in a 64x64 region. -// Each 1 bit represents a position in which we want to apply the loop filter. -// Left_ entries refer to whether we apply a filter on the border to the -// left of the block. Above_ entries refer to whether or not to apply a -// filter on the above border. Int_ entries refer to whether or not to -// apply borders on the 4x4 edges within the 8x8 block that each bit -// represents. -// Since each transform is accompanied by a potentially different type of -// loop filter there is a different entry in the array for each transform size. -typedef struct { - uint64_t left_y[TX_SIZES]; - uint64_t above_y[TX_SIZES]; - uint64_t int_4x4_y; - uint16_t left_uv[TX_SIZES]; - uint16_t above_uv[TX_SIZES]; - uint16_t int_4x4_uv; - uint8_t lfl_y[64]; -} LOOP_FILTER_MASK; - -struct loopfilter { - int filter_level; - int last_filt_level; - - int sharpness_level; - int last_sharpness_level; - - uint8_t mode_ref_delta_enabled; - uint8_t mode_ref_delta_update; - - // 0 = Intra, Last, GF, ARF - signed char ref_deltas[MAX_REF_LF_DELTAS]; - signed char last_ref_deltas[MAX_REF_LF_DELTAS]; - - // 0 = ZERO_MV, MV - signed char mode_deltas[MAX_MODE_LF_DELTAS]; - signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; - - LOOP_FILTER_MASK *lfm; - int lfm_stride; -}; - -/* assorted loopfilter functions which get used elsewhere */ -struct VP9Common; -struct macroblockd; -struct VP9LfSyncData; - -// This function sets up the bit masks for the entire 64x64 region represented -// by mi_row, mi_col. -void vp9_setup_mask(struct VP9Common *const cm, - const int mi_row, const int mi_col, - MODE_INFO **mi_8x8, const int mode_info_stride, - LOOP_FILTER_MASK *lfm); - -void vp9_filter_block_plane_ss00(struct VP9Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); - -void vp9_filter_block_plane_ss11(struct VP9Common *const cm, - struct macroblockd_plane *const plane, - int mi_row, - LOOP_FILTER_MASK *lfm); - -void vp9_filter_block_plane_non420(struct VP9Common *cm, - struct macroblockd_plane *plane, - MODE_INFO **mi_8x8, - int mi_row, int mi_col); - -void vp9_loop_filter_init(struct VP9Common *cm); - -// Update the loop filter for the current frame. -// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame() -// calls this function directly. -void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); - -void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, - struct VP9Common *cm, - struct macroblockd *mbd, - int filter_level, - int y_only, int partial_frame); - -// Get the superblock lfm for a given mi_row, mi_col. -static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, - const int mi_row, const int mi_col) { - return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; -} - -void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, - int mi_col, int bw, int bh); -void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, - const int mi_col, LOOP_FILTER_MASK *lfm); -void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level, - int partial_frame); -void vp9_reset_lfm(struct VP9Common *const cm); - -typedef struct LoopFilterWorkerData { - YV12_BUFFER_CONFIG *frame_buffer; - struct VP9Common *cm; - struct macroblockd_plane planes[MAX_MB_PLANE]; - - int start; - int stop; - int y_only; -} LFWorkerData; - -void vp9_loop_filter_data_reset( - LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, - struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]); - -// Operates on the rows described by 'lf_data'. -int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mv.h b/thirdparty/libvpx/vp9/common/vp9_mv.h deleted file mode 100644 index 5d89da8c25..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_mv.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_MV_H_ -#define VP9_COMMON_VP9_MV_H_ - -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct mv { - int16_t row; - int16_t col; -} MV; - -typedef union int_mv { - uint32_t as_int; - MV as_mv; -} int_mv; /* facilitates faster equality tests and copies */ - -typedef struct mv32 { - int32_t row; - int32_t col; -} MV32; - -static INLINE int is_zero_mv(const MV *mv) { - return *((const uint32_t *)mv) == 0; -} - -static INLINE int is_equal_mv(const MV *a, const MV *b) { - return *((const uint32_t *)a) == *((const uint32_t *)b); -} - -static INLINE void clamp_mv(MV *mv, int min_col, int max_col, - int min_row, int max_row) { - mv->col = clamp(mv->col, min_col, max_col); - mv->row = clamp(mv->row, min_row, max_row); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_MV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.c b/thirdparty/libvpx/vp9/common/vp9_mvref_common.c deleted file mode 100644 index 0eb01a51ba..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_mvref_common.c +++ /dev/null @@ -1,201 +0,0 @@ - -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_mvref_common.h" - -// This function searches the neighborhood of a given MB/SB -// to try and find candidate reference vectors. -static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int block, int mi_row, int mi_col, - uint8_t *mode_context) { - const int *ref_sign_bias = cm->ref_frame_sign_bias; - int i, refmv_count = 0; - const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; - int different_ref_found = 0; - int context_counter = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? - cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; - const TileInfo *const tile = &xd->tile; - - // Blank the reference vector list - memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); - - // The nearest 2 blocks are treated differently - // if the size < 8x8 we get the mv from the bmi substructure, - // and we also need to keep a mode count. - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]; - // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate_mi->mode]; - different_ref_found = 1; - - if (candidate_mi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - else if (candidate_mi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - } - } - - // Check the rest of the neighbors in much the same way - // as before except we don't need to keep track of sub blocks or - // mode counts. - for (; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - different_ref_found = 1; - - if (candidate_mi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); - else if (candidate_mi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // Check the last frame's mode and mv info. - if (cm->use_prev_frame_mvs) { - if (prev_frame_mvs->ref_frame[0] == ref_frame) { - ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); - } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { - ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // Since we couldn't find 2 mvs from the same reference frame - // go back through the neighbors and find motion vectors from - // different reference frames. - if (different_ref_found) { - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - - // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, - refmv_count, mv_ref_list, Done); - } - } - } - - // Since we still don't have a candidate we'll try the last frame. - if (cm->use_prev_frame_mvs) { - if (prev_frame_mvs->ref_frame[0] != ref_frame && - prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { - int_mv mv = prev_frame_mvs->mv[0]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); - } - - if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && - prev_frame_mvs->ref_frame[1] != ref_frame && - prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { - int_mv mv = prev_frame_mvs->mv[1]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); - } - } - - Done: - - mode_context[ref_frame] = counter_to_context[context_counter]; - - // Clamp vectors - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) - clamp_mv_ref(&mv_ref_list[i].as_mv, xd); -} - -void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col, - uint8_t *mode_context) { - find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, mode_context); -} - -void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, - int_mv *mvlist, int_mv *nearest_mv, - int_mv *near_mv) { - int i; - // Make sure all the candidates are properly clamped etc - for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp); - clamp_mv2(&mvlist[i].as_mv, xd); - } - *nearest_mv = mvlist[0]; - *near_mv = mvlist[1]; -} - -void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, - int_mv *nearest_mv, int_mv *near_mv, - uint8_t *mode_context) { - int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi[0]; - b_mode_info *bmi = mi->bmi; - int n; - - assert(MAX_MV_REF_CANDIDATES == 2); - - find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, - mi_row, mi_col, mode_context); - - near_mv->as_int = 0; - switch (block) { - case 0: - nearest_mv->as_int = mv_list[0].as_int; - near_mv->as_int = mv_list[1].as_int; - break; - case 1: - case 2: - nearest_mv->as_int = bmi[0].as_mv[ref].as_int; - for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n) - if (nearest_mv->as_int != mv_list[n].as_int) { - near_mv->as_int = mv_list[n].as_int; - break; - } - break; - case 3: { - int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; - candidates[0] = bmi[1].as_mv[ref]; - candidates[1] = bmi[0].as_mv[ref]; - candidates[2] = mv_list[0]; - candidates[3] = mv_list[1]; - - nearest_mv->as_int = bmi[2].as_mv[ref].as_int; - for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) - if (nearest_mv->as_int != candidates[n].as_int) { - near_mv->as_int = candidates[n].as_int; - break; - } - break; - } - default: - assert(0 && "Invalid block index."); - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.h b/thirdparty/libvpx/vp9/common/vp9_mvref_common.h deleted file mode 100644 index 4380843e24..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_mvref_common.h +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ -#define VP9_COMMON_VP9_MVREF_COMMON_H_ - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3) -#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\ - VP9_INTERP_EXTEND) << 3) - -#define MVREF_NEIGHBOURS 8 - -typedef struct position { - int row; - int col; -} POSITION; - -typedef enum { - BOTH_ZERO = 0, - ZERO_PLUS_PREDICTED = 1, - BOTH_PREDICTED = 2, - NEW_PLUS_NON_INTRA = 3, - BOTH_NEW = 4, - INTRA_PLUS_NON_INTRA = 5, - BOTH_INTRA = 6, - INVALID_CASE = 9 -} motion_vector_context; - -// This is used to figure out a context for the ref blocks. The code flattens -// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by -// adding 9 for each intra block, 3 for each zero mv and 1 for each new -// motion vector. This single number is then converted into a context -// with a single lookup ( counter_to_context ). -static const int mode_2_counter[MB_MODE_COUNT] = { - 9, // DC_PRED - 9, // V_PRED - 9, // H_PRED - 9, // D45_PRED - 9, // D135_PRED - 9, // D117_PRED - 9, // D153_PRED - 9, // D207_PRED - 9, // D63_PRED - 9, // TM_PRED - 0, // NEARESTMV - 0, // NEARMV - 3, // ZEROMV - 1, // NEWMV -}; - -// There are 3^3 different combinations of 3 counts that can be either 0,1 or -// 2. However the actual count can never be greater than 2 so the highest -// counter we need is 18. 9 is an invalid counter that's never used. -static const int counter_to_context[19] = { - BOTH_PREDICTED, // 0 - NEW_PLUS_NON_INTRA, // 1 - BOTH_NEW, // 2 - ZERO_PLUS_PREDICTED, // 3 - NEW_PLUS_NON_INTRA, // 4 - INVALID_CASE, // 5 - BOTH_ZERO, // 6 - INVALID_CASE, // 7 - INVALID_CASE, // 8 - INTRA_PLUS_NON_INTRA, // 9 - INTRA_PLUS_NON_INTRA, // 10 - INVALID_CASE, // 11 - INTRA_PLUS_NON_INTRA, // 12 - INVALID_CASE, // 13 - INVALID_CASE, // 14 - INVALID_CASE, // 15 - INVALID_CASE, // 16 - INVALID_CASE, // 17 - BOTH_INTRA // 18 -}; - -static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { - // 4X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 4X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X4 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X8 - {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, - // 8X16 - {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, - // 16X8 - {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, - // 16X16 - {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 16X32 - {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, - // 32X16 - {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 32X32 - {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, - // 32X64 - {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, - // 64X32 - {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, - // 64X64 - {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} -}; - -static const int idx_n_column_to_subblock[4][2] = { - {1, 2}, - {1, 3}, - {3, 2}, - {3, 3} -}; - -// clamp_mv_ref -#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units - -static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, - xd->mb_to_right_edge + MV_BORDER, - xd->mb_to_top_edge - MV_BORDER, - xd->mb_to_bottom_edge + MV_BORDER); -} - -// This function returns either the appropriate sub block or block's mv -// on whether the block_size < 8x8 and we have check_sub_blocks set. -static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, - int search_col, int block_idx) { - return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 - ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] - .as_mv[which_mv] - : candidate->mv[which_mv]; -} - - -// Performs mv sign inversion if indicated by the reference frame combination. -static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, - const MV_REFERENCE_FRAME this_ref_frame, - const int *ref_sign_bias) { - int_mv mv = mi->mv[ref]; - if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - return mv; -} - -// This macro is used to add a motion vector mv_ref list if it isn't -// already in the list. If it's the second motion vector it will also -// skip all additional processing and jump to Done! -#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ - do { \ - if (refmv_count) { \ - if ((mv).as_int != (mv_ref_list)[0].as_int) { \ - (mv_ref_list)[(refmv_count)] = (mv); \ - goto Done; \ - } \ - } else { \ - (mv_ref_list)[(refmv_count)++] = (mv); \ - } \ - } while (0) - -// If either reference frame is different, not INTRA, and they -// are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \ - mv_ref_list, Done) \ - do { \ - if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - if (has_second_ref(mbmi) && \ - (mbmi)->ref_frame[1] != ref_frame && \ - (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ - ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - } \ - } while (0) - - -// Checks that the given mi_row, mi_col and search point -// are inside the borders of the tile. -static INLINE int is_inside(const TileInfo *const tile, - int mi_col, int mi_row, int mi_rows, - const POSITION *mi_pos) { - return !(mi_row + mi_pos->row < 0 || - mi_col + mi_pos->col < tile->mi_col_start || - mi_row + mi_pos->row >= mi_rows || - mi_col + mi_pos->col >= tile->mi_col_end); -} - -// TODO(jingning): this mv clamping function should be block size dependent. -static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); -} - -static INLINE void lower_mv_precision(MV *mv, int allow_hp) { - const int use_hp = allow_hp && use_mv_hp(mv); - if (!use_hp) { - if (mv->row & 1) - mv->row += (mv->row > 0 ? -1 : 1); - if (mv->col & 1) - mv->col += (mv->col > 0 ? -1 : 1); - } -} - -typedef void (*find_mv_refs_sync)(void *const data, int mi_row); -void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, int mi_row, int mi_col, - uint8_t *mode_context); - -// check a list of motion vectors by sad score using a number rows of pixels -// above and a number cols of pixels in the left to select the one with best -// score to use as ref motion vector -void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, - int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); - -void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, - int block, int ref, int mi_row, int mi_col, - int_mv *nearest_mv, int_mv *near_mv, - uint8_t *mode_context); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h b/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h deleted file mode 100644 index 3fd935e628..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ONYXC_INT_H_ -#define VP9_COMMON_VP9_ONYXC_INT_H_ - -#include "./vpx_config.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx_util/vpx_thread.h" -#include "./vp9_rtcd.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_frame_buffers.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_tile_common.h" - -#if CONFIG_VP9_POSTPROC -#include "vp9/common/vp9_postproc.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define REFS_PER_FRAME 3 - -#define REF_FRAMES_LOG2 3 -#define REF_FRAMES (1 << REF_FRAMES_LOG2) - -// 4 scratch frames for the new frames to support a maximum of 4 cores decoding -// in parallel, 3 for scaled references on the encoder. -// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number -// of framebuffers. -// TODO(jkoleszar): These 3 extra references could probably come from the -// normal reference pool. -#define FRAME_BUFFERS (REF_FRAMES + 7) - -#define FRAME_CONTEXTS_LOG2 2 -#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) - -#define NUM_PING_PONG_BUFFERS 2 - -extern const struct { - PARTITION_CONTEXT above; - PARTITION_CONTEXT left; -} partition_context_lookup[BLOCK_SIZES]; - - -typedef enum { - SINGLE_REFERENCE = 0, - COMPOUND_REFERENCE = 1, - REFERENCE_MODE_SELECT = 2, - REFERENCE_MODES = 3, -} REFERENCE_MODE; - -typedef struct { - int_mv mv[2]; - MV_REFERENCE_FRAME ref_frame[2]; -} MV_REF; - -typedef struct { - int ref_count; - MV_REF *mvs; - int mi_rows; - int mi_cols; - vpx_codec_frame_buffer_t raw_frame_buffer; - YV12_BUFFER_CONFIG buf; - - // The Following variables will only be used in frame parallel decode. - - // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means - // that no FrameWorker owns, or is decoding, this buffer. - VPxWorker *frame_worker_owner; - - // row and col indicate which position frame has been decoded to in real - // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX - // when the frame is fully decoded. - int row; - int col; -} RefCntBuffer; - -typedef struct BufferPool { - // Protect BufferPool from being accessed by several FrameWorkers at - // the same time during frame parallel decode. - // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. -#if CONFIG_MULTITHREAD - pthread_mutex_t pool_mutex; -#endif - - // Private data associated with the frame buffer callbacks. - void *cb_priv; - - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - RefCntBuffer frame_bufs[FRAME_BUFFERS]; - - // Frame buffers allocated internally by the codec. - InternalFrameBufferList int_frame_buffers; -} BufferPool; - -typedef struct VP9Common { - struct vpx_internal_error_info error; - vpx_color_space_t color_space; - vpx_color_range_t color_range; - int width; - int height; - int render_width; - int render_height; - int last_width; - int last_height; - - // TODO(jkoleszar): this implies chroma ss right now, but could vary per - // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to - // support additional planes. - int subsampling_x; - int subsampling_y; - -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth; // Marks if we need to use 16bit frame buffers. -#endif - - YV12_BUFFER_CONFIG *frame_to_show; - RefCntBuffer *prev_frame; - - // TODO(hkuang): Combine this with cur_buf in macroblockd. - RefCntBuffer *cur_frame; - - int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ - - // Prepare ref_frame_map for the next frame. - // Only used in frame parallel decode. - int next_ref_frame_map[REF_FRAMES]; - - // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and - // roll new_fb_idx into it. - - // Each frame can reference REFS_PER_FRAME buffers - RefBuffer frame_refs[REFS_PER_FRAME]; - - int new_fb_idx; - -#if CONFIG_VP9_POSTPROC - YV12_BUFFER_CONFIG post_proc_buffer; - YV12_BUFFER_CONFIG post_proc_buffer_int; -#endif - - FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ - FRAME_TYPE frame_type; - - int show_frame; - int last_show_frame; - int show_existing_frame; - - // Flag signaling that the frame is encoded using only INTRA modes. - uint8_t intra_only; - uint8_t last_intra_only; - - int allow_high_precision_mv; - - // Flag signaling that the frame context should be reset to default values. - // 0 or 1 implies don't reset, 2 reset just the context specified in the - // frame header, 3 reset all contexts. - int reset_frame_context; - - // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in - // MODE_INFO (8-pixel) units. - int MBs; - int mb_rows, mi_rows; - int mb_cols, mi_cols; - int mi_stride; - - /* profile settings */ - TX_MODE tx_mode; - - int base_qindex; - int y_dc_delta_q; - int uv_dc_delta_q; - int uv_ac_delta_q; - int16_t y_dequant[MAX_SEGMENTS][2]; - int16_t uv_dequant[MAX_SEGMENTS][2]; - - /* We allocate a MODE_INFO struct for each macroblock, together with - an extra row on top and column on the left to simplify prediction. */ - int mi_alloc_size; - MODE_INFO *mip; /* Base of allocated array */ - MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ - - // TODO(agrange): Move prev_mi into encoder structure. - // prev_mip and prev_mi will only be allocated in VP9 encoder. - MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ - MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ - - // Separate mi functions between encoder and decoder. - int (*alloc_mi)(struct VP9Common *cm, int mi_size); - void (*free_mi)(struct VP9Common *cm); - void (*setup_mi)(struct VP9Common *cm); - - // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible - // area will be NULL. - MODE_INFO **mi_grid_base; - MODE_INFO **mi_grid_visible; - MODE_INFO **prev_mi_grid_base; - MODE_INFO **prev_mi_grid_visible; - - // Whether to use previous frame's motion vectors for prediction. - int use_prev_frame_mvs; - - // Persistent mb segment id map used in prediction. - int seg_map_idx; - int prev_seg_map_idx; - - uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; - uint8_t *last_frame_seg_map; - uint8_t *current_frame_seg_map; - int seg_map_alloc_size; - - INTERP_FILTER interp_filter; - - loop_filter_info_n lf_info; - - int refresh_frame_context; /* Two state 0 = NO, 1 = YES */ - - int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ - - struct loopfilter lf; - struct segmentation seg; - - // TODO(hkuang): Remove this as it is the same as frame_parallel_decode - // in pbi. - int frame_parallel_decode; // frame-based threading. - - // Context probabilities for reference frame prediction - MV_REFERENCE_FRAME comp_fixed_ref; - MV_REFERENCE_FRAME comp_var_ref[2]; - REFERENCE_MODE reference_mode; - - FRAME_CONTEXT *fc; /* this frame entropy */ - FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS - unsigned int frame_context_idx; /* Context to use/update */ - FRAME_COUNTS counts; - - unsigned int current_video_frame; - BITSTREAM_PROFILE profile; - - // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3. - vpx_bit_depth_t bit_depth; - vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer - -#if CONFIG_VP9_POSTPROC - struct postproc_state postproc_state; -#endif - - int error_resilient_mode; - int frame_parallel_decoding_mode; - - int log2_tile_cols, log2_tile_rows; - int byte_alignment; - int skip_loop_filter; - - // Private data associated with the frame buffer callbacks. - void *cb_priv; - vpx_get_frame_buffer_cb_fn_t get_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_fb_cb; - - // Handles memory for the codec. - InternalFrameBufferList int_frame_buffers; - - // External BufferPool passed from outside. - BufferPool *buffer_pool; - - PARTITION_CONTEXT *above_seg_context; - ENTROPY_CONTEXT *above_context; - int above_context_alloc_cols; -} VP9_COMMON; - -// TODO(hkuang): Don't need to lock the whole pool after implementing atomic -// frame reference count. -void lock_buffer_pool(BufferPool *const pool); -void unlock_buffer_pool(BufferPool *const pool); - -static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) { - if (index < 0 || index >= REF_FRAMES) - return NULL; - if (cm->ref_frame_map[index] < 0) - return NULL; - assert(cm->ref_frame_map[index] < FRAME_BUFFERS); - return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; -} - -static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { - return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; -} - -static INLINE int get_free_fb(VP9_COMMON *cm) { - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - int i; - - lock_buffer_pool(cm->buffer_pool); - for (i = 0; i < FRAME_BUFFERS; ++i) - if (frame_bufs[i].ref_count == 0) - break; - - if (i != FRAME_BUFFERS) { - frame_bufs[i].ref_count = 1; - } else { - // Reset i to be INVALID_IDX to indicate no free buffer found. - i = INVALID_IDX; - } - - unlock_buffer_pool(cm->buffer_pool); - return i; -} - -static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { - const int ref_index = *idx; - - if (ref_index >= 0 && bufs[ref_index].ref_count > 0) - bufs[ref_index].ref_count--; - - *idx = new_idx; - - bufs[new_idx].ref_count++; -} - -static INLINE int mi_cols_aligned_to_sb(int n_mis) { - return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); -} - -static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { - return cm->frame_type == KEY_FRAME || cm->intra_only; -} - -static INLINE void set_partition_probs(const VP9_COMMON *const cm, - MACROBLOCKD *const xd) { - xd->partition_probs = - frame_is_intra_only(cm) ? - &vp9_kf_partition_probs[0] : - (const vpx_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob; -} - -static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, - tran_low_t *dqcoeff) { - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - xd->plane[i].dqcoeff = dqcoeff; - xd->above_context[i] = cm->above_context + - i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); - - if (get_plane_type(i) == PLANE_TYPE_Y) { - memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); - } else { - memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); - } - xd->fc = cm->fc; - } - - xd->above_seg_context = cm->above_seg_context; - xd->mi_stride = cm->mi_stride; - xd->error_info = &cm->error; - - set_partition_probs(cm, xd); -} - -static INLINE const vpx_prob* get_partition_probs(const MACROBLOCKD *xd, - int ctx) { - return xd->partition_probs[ctx]; -} - -static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { - const int above_idx = mi_col * 2; - const int left_idx = (mi_row * 2) & 15; - int i; - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; - pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; - } -} - -static INLINE int calc_mi_size(int len) { - // len is in mi units. - return len + MI_BLOCK_SIZE; -} - -static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, - int mi_row, int bh, - int mi_col, int bw, - int mi_rows, int mi_cols) { - xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); - xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; - xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); - xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; - - // Are edges available for intra prediction? - xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; - xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; -} - -static INLINE void update_partition_context(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE subsize, - BLOCK_SIZE bsize) { - PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - - // num_4x4_blocks_wide_lookup[bsize] / 2 - const int bs = num_8x8_blocks_wide_lookup[bsize]; - - // update the partition context at the end notes. set partition bits - // of block sizes larger than the current one to be one, and partition - // bits of smaller block sizes to be zero. - memset(above_ctx, partition_context_lookup[subsize].above, bs); - memset(left_ctx, partition_context_lookup[subsize].left, bs); -} - -static INLINE int partition_plane_context(const MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - const int bsl = mi_width_log2_lookup[bsize]; - int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; - - assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); - assert(bsl >= 0); - - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_ppflags.h b/thirdparty/libvpx/vp9/common/vp9_ppflags.h deleted file mode 100644 index 12b989f43a..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_ppflags.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_PPFLAGS_H_ -#define VP9_COMMON_VP9_PPFLAGS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - VP9D_NOFILTERING = 0, - VP9D_DEBLOCK = 1 << 0, - VP9D_DEMACROBLOCK = 1 << 1, - VP9D_ADDNOISE = 1 << 2, - VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3, - VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4, - VP9D_DEBUG_TXT_DC_DIFF = 1 << 5, - VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, - VP9D_DEBUG_DRAW_MV = 1 << 7, - VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, - VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, - VP9D_MFQE = 1 << 10 -}; - -typedef struct { - int post_proc_flag; - int deblocking_level; - int noise_level; -} vp9_ppflags_t; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.c b/thirdparty/libvpx/vp9/common/vp9_pred_common.c deleted file mode 100644 index 8f90e70e73..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_pred_common.c +++ /dev/null @@ -1,314 +0,0 @@ - -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_seg_common.h" - -// Returns a context number for the given MB prediction signal -int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - const MODE_INFO *const left_mi = xd->left_mi; - const int left_type = left_mi && is_inter_block(left_mi) ? - left_mi->interp_filter : SWITCHABLE_FILTERS; - const MODE_INFO *const above_mi = xd->above_mi; - const int above_type = above_mi && is_inter_block(above_mi) ? - above_mi->interp_filter : SWITCHABLE_FILTERS; - - if (left_type == above_type) - return left_type; - else if (left_type == SWITCHABLE_FILTERS) - return above_type; - else if (above_type == SWITCHABLE_FILTERS) - return left_type; - else - return SWITCHABLE_FILTERS; -} - -int vp9_get_reference_mode_context(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - int ctx; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) - // neither edge uses comp pred (0/1) - ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ - (left_mi->ref_frame[0] == cm->comp_fixed_ref); - else if (!has_second_ref(above_mi)) - // one of two edges uses comp pred (2/3) - ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(above_mi)); - else if (!has_second_ref(left_mi)) - // one of two edges uses comp pred (2/3) - ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(left_mi)); - else // both edges use comp pred (4) - ctx = 4; - } else if (has_above || has_left) { // one edge available - const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - - if (!has_second_ref(edge_mi)) - // edge does not use comp pred (0/1) - ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; - else - // edge uses comp pred (3) - ctx = 3; - } else { // no edges available (1) - ctx = 1; - } - assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS); - return ctx; -} - -// Returns a context number for the given MB prediction signal -int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - int pred_context; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int above_in_image = !!above_mi; - const int left_in_image = !!left_mi; - - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; - const int var_ref_idx = !fix_ref_idx; - - if (above_in_image && left_in_image) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - - if (above_intra && left_intra) { // intra/intra (2) - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter - const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - - if (!has_second_ref(edge_mi)) // single pred (1/3) - pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); - else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] - != cm->comp_var_ref[1]); - } else { // inter/inter - const int l_sg = !has_second_ref(left_mi); - const int a_sg = !has_second_ref(above_mi); - const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] - : above_mi->ref_frame[var_ref_idx]; - const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] - : left_mi->ref_frame[var_ref_idx]; - - if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { - pred_context = 0; - } else if (l_sg && a_sg) { // single/single - if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) || - (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0])) - pred_context = 4; - else if (vrfa == vrfl) - pred_context = 3; - else - pred_context = 1; - } else if (l_sg || a_sg) { // single/comp - const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; - const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; - if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1]) - pred_context = 1; - else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1]) - pred_context = 2; - else - pred_context = 4; - } else if (vrfa == vrfl) { // comp/comp - pred_context = 4; - } else { - pred_context = 2; - } - } - } else if (above_in_image || left_in_image) { // one edge available - const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; - - if (!is_inter_block(edge_mi)) { - pred_context = 2; - } else { - if (has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] - != cm->comp_var_ref[1]); - else - pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); - } - } else { // no edges available (2) - pred_context = 2; - } - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - - return pred_context; -} - -int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { - int pred_context; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - - if (above_intra && left_intra) { // intra/intra - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - if (!has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); - else - pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || - edge_mi->ref_frame[1] == LAST_FRAME); - } else { // inter/inter - const int above_has_second = has_second_ref(above_mi); - const int left_has_second = has_second_ref(left_mi); - const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; - - if (above_has_second && left_has_second) { - pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || - left0 == LAST_FRAME || left1 == LAST_FRAME); - } else if (above_has_second || left_has_second) { - const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; - - if (rfs == LAST_FRAME) - pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); - else - pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME); - } else { - pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME); - } - } - } else if (has_above || has_left) { // one edge available - const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - if (!is_inter_block(edge_mi)) { // intra - pred_context = 2; - } else { // inter - if (!has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); - else - pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || - edge_mi->ref_frame[1] == LAST_FRAME); - } - } else { // no edges available - pred_context = 2; - } - - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} - -int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { - int pred_context; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - - if (above_intra && left_intra) { // intra/intra - pred_context = 2; - } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - if (!has_second_ref(edge_mi)) { - if (edge_mi->ref_frame[0] == LAST_FRAME) - pred_context = 3; - else - pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); - } else { - pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || - edge_mi->ref_frame[1] == GOLDEN_FRAME); - } - } else { // inter/inter - const int above_has_second = has_second_ref(above_mi); - const int left_has_second = has_second_ref(left_mi); - const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; - - if (above_has_second && left_has_second) { - if (above0 == left0 && above1 == left1) - pred_context = 3 * (above0 == GOLDEN_FRAME || - above1 == GOLDEN_FRAME || - left0 == GOLDEN_FRAME || - left1 == GOLDEN_FRAME); - else - pred_context = 2; - } else if (above_has_second || left_has_second) { - const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; - const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; - - if (rfs == GOLDEN_FRAME) - pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); - else if (rfs == ALTREF_FRAME) - pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME; - else - pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); - } else { - if (above0 == LAST_FRAME && left0 == LAST_FRAME) { - pred_context = 3; - } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) { - const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0 - : above0; - pred_context = 4 * (edge0 == GOLDEN_FRAME); - } else { - pred_context = 2 * (above0 == GOLDEN_FRAME) + - 2 * (left0 == GOLDEN_FRAME); - } - } - } - } else if (has_above || has_left) { // one edge available - const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - - if (!is_inter_block(edge_mi) || - (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) - pred_context = 2; - else if (!has_second_ref(edge_mi)) - pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); - else - pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || - edge_mi->ref_frame[1] == GOLDEN_FRAME); - } else { // no edges available (2) - pred_context = 2; - } - assert(pred_context >= 0 && pred_context < REF_CONTEXTS); - return pred_context; -} diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.h b/thirdparty/libvpx/vp9/common/vp9_pred_common.h deleted file mode 100644 index f3c676e953..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_pred_common.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_PRED_COMMON_H_ -#define VP9_COMMON_VP9_PRED_COMMON_H_ - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vpx_dsp/vpx_dsp_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE int get_segment_id(const VP9_COMMON *cm, - const uint8_t *segment_ids, - BLOCK_SIZE bsize, int mi_row, int mi_col) { - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[bsize]; - const int bh = num_8x8_blocks_high_lookup[bsize]; - const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); - const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); - int x, y, segment_id = MAX_SEGMENTS; - - for (y = 0; y < ymis; ++y) - for (x = 0; x < xmis; ++x) - segment_id = - VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - return segment_id; -} - -static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int above_sip = (above_mi != NULL) ? - above_mi->seg_id_predicted : 0; - const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; - - return above_sip + left_sip; -} - -static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, - const MACROBLOCKD *xd) { - return seg->pred_probs[vp9_get_pred_context_seg_id(xd)]; -} - -static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; - const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; - return above_skip + left_skip; -} - -static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->skip_probs[vp9_get_skip_context(xd)]; -} - -int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); - -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real macroblocks. -// The prediction flags in these dummy entries are initialized to 0. -// 0 - inter/inter, inter/--, --/inter, --/-- -// 1 - intra/inter, inter/intra -// 2 - intra/--, --/intra -// 3 - intra/intra -static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mi); - const int left_intra = !is_inter_block(left_mi); - return left_intra && above_intra ? 3 : left_intra || above_intra; - } else if (has_above || has_left) { // one edge available - return 2 * !is_inter_block(has_above ? above_mi : left_mi); - } - return 0; -} - -static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; -} - -int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)]; -} - -int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, - const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd); - return cm->fc->comp_ref_prob[pred_context]; -} - -int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0]; -} - -int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); - -static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, - const MACROBLOCKD *xd) { - return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1]; -} - -// Returns a context number for the given MB prediction signal -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real blocks. -// The prediction flags in these dummy entries are initialized to 0. -static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; - const MODE_INFO *const above_mi = xd->above_mi; - const MODE_INFO *const left_mi = xd->left_mi; - const int has_above = !!above_mi; - const int has_left = !!left_mi; - int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size - : max_tx_size; - int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size - : max_tx_size; - if (!has_left) - left_ctx = above_ctx; - - if (!has_above) - above_ctx = left_ctx; - - return (above_ctx + left_ctx) > max_tx_size; -} - -static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, - const struct tx_probs *tx_probs) { - switch (max_tx_size) { - case TX_8X8: - return tx_probs->p8x8[ctx]; - case TX_16X16: - return tx_probs->p16x16[ctx]; - case TX_32X32: - return tx_probs->p32x32[ctx]; - default: - assert(0 && "Invalid max_tx_size."); - return NULL; - } -} - -static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, - const MACROBLOCKD *xd, - const struct tx_probs *tx_probs) { - return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); -} - -static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, - struct tx_counts *tx_counts) { - switch (max_tx_size) { - case TX_8X8: - return tx_counts->p8x8[ctx]; - case TX_16X16: - return tx_counts->p16x16[ctx]; - case TX_32X32: - return tx_counts->p32x32[ctx]; - default: - assert(0 && "Invalid max_tx_size."); - return NULL; - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.c b/thirdparty/libvpx/vp9/common/vp9_quant_common.c deleted file mode 100644 index d83f3c1a2f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_quant_common.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_seg_common.h" - -static const int16_t dc_qlookup[QINDEX_RANGE] = { - 4, 8, 8, 9, 10, 11, 12, 12, - 13, 14, 15, 16, 17, 18, 19, 19, - 20, 21, 22, 23, 24, 25, 26, 26, - 27, 28, 29, 30, 31, 32, 32, 33, - 34, 35, 36, 37, 38, 38, 39, 40, - 41, 42, 43, 43, 44, 45, 46, 47, - 48, 48, 49, 50, 51, 52, 53, 53, - 54, 55, 56, 57, 57, 58, 59, 60, - 61, 62, 62, 63, 64, 65, 66, 66, - 67, 68, 69, 70, 70, 71, 72, 73, - 74, 74, 75, 76, 77, 78, 78, 79, - 80, 81, 81, 82, 83, 84, 85, 85, - 87, 88, 90, 92, 93, 95, 96, 98, - 99, 101, 102, 104, 105, 107, 108, 110, - 111, 113, 114, 116, 117, 118, 120, 121, - 123, 125, 127, 129, 131, 134, 136, 138, - 140, 142, 144, 146, 148, 150, 152, 154, - 156, 158, 161, 164, 166, 169, 172, 174, - 177, 180, 182, 185, 187, 190, 192, 195, - 199, 202, 205, 208, 211, 214, 217, 220, - 223, 226, 230, 233, 237, 240, 243, 247, - 250, 253, 257, 261, 265, 269, 272, 276, - 280, 284, 288, 292, 296, 300, 304, 309, - 313, 317, 322, 326, 330, 335, 340, 344, - 349, 354, 359, 364, 369, 374, 379, 384, - 389, 395, 400, 406, 411, 417, 423, 429, - 435, 441, 447, 454, 461, 467, 475, 482, - 489, 497, 505, 513, 522, 530, 539, 549, - 559, 569, 579, 590, 602, 614, 626, 640, - 654, 668, 684, 700, 717, 736, 755, 775, - 796, 819, 843, 869, 896, 925, 955, 988, - 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, -}; - -#if CONFIG_VP9_HIGHBITDEPTH -static const int16_t dc_qlookup_10[QINDEX_RANGE] = { - 4, 9, 10, 13, 15, 17, 20, 22, - 25, 28, 31, 34, 37, 40, 43, 47, - 50, 53, 57, 60, 64, 68, 71, 75, - 78, 82, 86, 90, 93, 97, 101, 105, - 109, 113, 116, 120, 124, 128, 132, 136, - 140, 143, 147, 151, 155, 159, 163, 166, - 170, 174, 178, 182, 185, 189, 193, 197, - 200, 204, 208, 212, 215, 219, 223, 226, - 230, 233, 237, 241, 244, 248, 251, 255, - 259, 262, 266, 269, 273, 276, 280, 283, - 287, 290, 293, 297, 300, 304, 307, 310, - 314, 317, 321, 324, 327, 331, 334, 337, - 343, 350, 356, 362, 369, 375, 381, 387, - 394, 400, 406, 412, 418, 424, 430, 436, - 442, 448, 454, 460, 466, 472, 478, 484, - 490, 499, 507, 516, 525, 533, 542, 550, - 559, 567, 576, 584, 592, 601, 609, 617, - 625, 634, 644, 655, 666, 676, 687, 698, - 708, 718, 729, 739, 749, 759, 770, 782, - 795, 807, 819, 831, 844, 856, 868, 880, - 891, 906, 920, 933, 947, 961, 975, 988, - 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, - 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, - 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, - 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, - 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, - 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, - 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, - 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, - 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, - 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, - 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, -}; - -static const int16_t dc_qlookup_12[QINDEX_RANGE] = { - 4, 12, 18, 25, 33, 41, 50, 60, - 70, 80, 91, 103, 115, 127, 140, 153, - 166, 180, 194, 208, 222, 237, 251, 266, - 281, 296, 312, 327, 343, 358, 374, 390, - 405, 421, 437, 453, 469, 484, 500, 516, - 532, 548, 564, 580, 596, 611, 627, 643, - 659, 674, 690, 706, 721, 737, 752, 768, - 783, 798, 814, 829, 844, 859, 874, 889, - 904, 919, 934, 949, 964, 978, 993, 1008, - 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, - 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, - 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, - 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, - 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, - 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, - 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, - 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, - 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, - 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, - 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, - 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, - 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, - 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, - 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, - 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, - 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, - 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, - 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, - 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, - 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, - 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, - 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, -}; -#endif - -static const int16_t ac_qlookup[QINDEX_RANGE] = { - 4, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, - 104, 106, 108, 110, 112, 114, 116, 118, - 120, 122, 124, 126, 128, 130, 132, 134, - 136, 138, 140, 142, 144, 146, 148, 150, - 152, 155, 158, 161, 164, 167, 170, 173, - 176, 179, 182, 185, 188, 191, 194, 197, - 200, 203, 207, 211, 215, 219, 223, 227, - 231, 235, 239, 243, 247, 251, 255, 260, - 265, 270, 275, 280, 285, 290, 295, 300, - 305, 311, 317, 323, 329, 335, 341, 347, - 353, 359, 366, 373, 380, 387, 394, 401, - 408, 416, 424, 432, 440, 448, 456, 465, - 474, 483, 492, 501, 510, 520, 530, 540, - 550, 560, 571, 582, 593, 604, 615, 627, - 639, 651, 663, 676, 689, 702, 715, 729, - 743, 757, 771, 786, 801, 816, 832, 848, - 864, 881, 898, 915, 933, 951, 969, 988, - 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, - 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, - 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, - 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, -}; - -#if CONFIG_VP9_HIGHBITDEPTH -static const int16_t ac_qlookup_10[QINDEX_RANGE] = { - 4, 9, 11, 13, 16, 18, 21, 24, - 27, 30, 33, 37, 40, 44, 48, 51, - 55, 59, 63, 67, 71, 75, 79, 83, - 88, 92, 96, 100, 105, 109, 114, 118, - 122, 127, 131, 136, 140, 145, 149, 154, - 158, 163, 168, 172, 177, 181, 186, 190, - 195, 199, 204, 208, 213, 217, 222, 226, - 231, 235, 240, 244, 249, 253, 258, 262, - 267, 271, 275, 280, 284, 289, 293, 297, - 302, 306, 311, 315, 319, 324, 328, 332, - 337, 341, 345, 349, 354, 358, 362, 367, - 371, 375, 379, 384, 388, 392, 396, 401, - 409, 417, 425, 433, 441, 449, 458, 466, - 474, 482, 490, 498, 506, 514, 523, 531, - 539, 547, 555, 563, 571, 579, 588, 596, - 604, 616, 628, 640, 652, 664, 676, 688, - 700, 713, 725, 737, 749, 761, 773, 785, - 797, 809, 825, 841, 857, 873, 889, 905, - 922, 938, 954, 970, 986, 1002, 1018, 1038, - 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, - 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, - 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, - 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, - 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, - 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, - 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, - 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, - 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, - 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, - 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, - 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, - 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, -}; - -static const int16_t ac_qlookup_12[QINDEX_RANGE] = { - 4, 13, 19, 27, 35, 44, 54, 64, - 75, 87, 99, 112, 126, 139, 154, 168, - 183, 199, 214, 230, 247, 263, 280, 297, - 314, 331, 349, 366, 384, 402, 420, 438, - 456, 475, 493, 511, 530, 548, 567, 586, - 604, 623, 642, 660, 679, 698, 716, 735, - 753, 772, 791, 809, 828, 846, 865, 884, - 902, 920, 939, 957, 976, 994, 1012, 1030, - 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, - 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, - 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, - 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, - 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, - 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, - 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, - 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, - 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, - 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, - 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, - 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, - 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, - 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, - 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, - 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, - 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, - 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, - 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, - 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, - 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, - 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, - 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, - 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, -}; -#endif - -int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { -#if CONFIG_VP9_HIGHBITDEPTH - switch (bit_depth) { - case VPX_BITS_8: - return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_10: - return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_12: - return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -#else - (void) bit_depth; - return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; -#endif -} - -int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { -#if CONFIG_VP9_HIGHBITDEPTH - switch (bit_depth) { - case VPX_BITS_8: - return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_10: - return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; - case VPX_BITS_12: - return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } -#else - (void) bit_depth; - return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; -#endif -} - -int vp9_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex) { - if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { - const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); - const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ? - data : base_qindex + data; - return clamp(seg_qindex, 0, MAXQ); - } else { - return base_qindex; - } -} - diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.h b/thirdparty/libvpx/vp9/common/vp9_quant_common.h deleted file mode 100644 index 4bae4a8967..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_quant_common.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_ -#define VP9_COMMON_VP9_QUANT_COMMON_H_ - -#include "vpx/vpx_codec.h" -#include "vp9/common/vp9_seg_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MINQ 0 -#define MAXQ 255 -#define QINDEX_RANGE (MAXQ - MINQ + 1) -#define QINDEX_BITS 8 - -int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); -int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); - -int vp9_get_qindex(const struct segmentation *seg, int segment_id, - int base_qindex); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.c b/thirdparty/libvpx/vp9/common/vp9_reconinter.c deleted file mode 100644 index 84718e9703..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconinter.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "./vpx_scale_rtcd.h" -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_reconintra.h" - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *src_mv, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y, int bd) { - const int is_q4 = precision == MV_PRECISION_Q4; - const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, - is_q4 ? src_mv->col : src_mv->col * 2 }; - MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); - const int subpel_x = mv.col & SUBPEL_MASK; - const int subpel_y = mv.row & SUBPEL_MASK; - - src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - - highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, - bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -void vp9_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *src_mv, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y) { - const int is_q4 = precision == MV_PRECISION_Q4; - const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, - is_q4 ? src_mv->col : src_mv->col * 2 }; - MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); - const int subpel_x = mv.col & SUBPEL_MASK; - const int subpel_y = mv.row & SUBPEL_MASK; - - src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - - inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); -} - -static INLINE int round_mv_comp_q4(int value) { - return (value < 0 ? value - 2 : value + 2) / 4; -} - -static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { - MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + - mi->bmi[1].as_mv[idx].as_mv.row + - mi->bmi[2].as_mv[idx].as_mv.row + - mi->bmi[3].as_mv[idx].as_mv.row), - round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + - mi->bmi[1].as_mv[idx].as_mv.col + - mi->bmi[2].as_mv[idx].as_mv.col + - mi->bmi[3].as_mv[idx].as_mv.col) }; - return res; -} - -static INLINE int round_mv_comp_q2(int value) { - return (value < 0 ? value - 1 : value + 1) / 2; -} - -static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) { - MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row + - mi->bmi[block1].as_mv[idx].as_mv.row), - round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col + - mi->bmi[block1].as_mv[idx].as_mv.col) }; - return res; -} - -// TODO(jkoleszar): yet another mv clamping function :-( -MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, - int bw, int bh, int ss_x, int ss_y) { - // If the MV points so far into the UMV border that no visible pixels - // are used for reconstruction, the subpel part of the MV can be - // discarded and the MV limited to 16 pixels with equivalent results. - const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS; - const int spel_right = spel_left - SUBPEL_SHIFTS; - const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; - const int spel_bottom = spel_top - SUBPEL_SHIFTS; - MV clamped_mv = { - src_mv->row * (1 << (1 - ss_y)), - src_mv->col * (1 << (1 - ss_x)) - }; - assert(ss_x <= 1); - assert(ss_y <= 1); - - clamp_mv(&clamped_mv, - xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, - xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, - xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, - xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); - - return clamped_mv; -} - -MV average_split_mvs(const struct macroblockd_plane *pd, - const MODE_INFO *mi, int ref, int block) { - const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); - MV res = {0, 0}; - switch (ss_idx) { - case 0: - res = mi->bmi[block].as_mv[ref].as_mv; - break; - case 1: - res = mi_mv_pred_q2(mi, ref, block, block + 2); - break; - case 2: - res = mi_mv_pred_q2(mi, ref, block, block + 1); - break; - case 3: - res = mi_mv_pred_q4(mi, ref); - break; - default: - assert(ss_idx <= 3 && ss_idx >= 0); - } - return res; -} - -static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi[0]; - const int is_compound = has_second_ref(mi); - const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; - int ref; - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = &pd->pre[ref]; - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - const MV mv = mi->sb_type < BLOCK_8X8 - ? average_split_mvs(pd, mi, ref, block) - : mi->mv[ref].as_mv; - - // TODO(jkoleszar): This clamping is done in the incorrect place for the - // scaling case. It needs to be done on the scaled MV, not the pre-scaling - // MV. Note however that it performs the subsampling aware scaling so - // that the result is always q4. - // mv_precision precision is MV_PRECISION_Q4. - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, - pd->subsampling_x, - pd->subsampling_y); - - uint8_t *pre; - MV32 scaled_mv; - int xs, ys, subpel_x, subpel_y; - const int is_scaled = vp9_is_scaled(sf); - - if (is_scaled) { - // Co-ordinate of containing block to pixel precision. - const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); - const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); -#if CONFIG_BETTER_HW_COMPATIBILITY - assert(xd->mi[0]->sb_type != BLOCK_4X8 && - xd->mi[0]->sb_type != BLOCK_8X4); - assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && - mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); -#endif - if (plane == 0) - pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; - else if (plane == 1) - pre_buf->buf = xd->block_refs[ref]->buf->u_buffer; - else - pre_buf->buf = xd->block_refs[ref]->buf->v_buffer; - - pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y, - pre_buf->stride, sf); - pre = pre_buf->buf; - scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); - xs = sf->x_step_q4; - ys = sf->y_step_q4; - } else { - pre = pre_buf->buf + (y * pre_buf->stride + x); - scaled_mv.row = mv_q4.row; - scaled_mv.col = mv_q4.col; - xs = ys = 16; - } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride - + (scaled_mv.col >> SUBPEL_BITS); - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, - xd->bd); - } else { - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); - } -#else - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH - } -} - -static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, - int mi_row, int mi_col, - int plane_from, int plane_to) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - for (plane = plane_from; plane <= plane_to; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, - &xd->plane[plane]); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const int bw = 4 * num_4x4_w; - const int bh = 4 * num_4x4_h; - - if (xd->mi[0]->sb_type < BLOCK_8X8) { - int i = 0, x, y; - assert(bsize == BLOCK_8X8); - for (y = 0; y < num_4x4_h; ++y) - for (x = 0; x < num_4x4_w; ++x) - build_inter_predictors(xd, plane, i++, bw, bh, - 4 * x, 4 * y, 4, 4, mi_x, mi_y); - } else { - build_inter_predictors(xd, plane, 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); - } - } -} - -void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); -} - -void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane); -} - -void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, - MAX_MB_PLANE - 1); -} - -void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, - MAX_MB_PLANE - 1); -} - -void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col) { - uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, - src->v_buffer}; - const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, - src->uv_stride}; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &planes[i]; - setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, - pd->subsampling_x, pd->subsampling_y); - } -} - -void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *sf) { - if (src != NULL) { - int i; - uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, - src->v_buffer}; - const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, - src->uv_stride}; - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, - sf, pd->subsampling_x, pd->subsampling_y); - } - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.h b/thirdparty/libvpx/vp9/common/vp9_reconinter.h deleted file mode 100644 index 07745e3aaa..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconinter.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_RECONINTER_H_ -#define VP9_COMMON_VP9_RECONINTER_H_ - -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_filter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE void inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys) { - sf->predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd) { - sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, - int ref, int block); - -MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, - int bw, int bh, int ss_x, int ss_y); - -void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane); - -void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); - -void vp9_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *mv_q3, - const struct scale_factors *sf, - int w, int h, int do_avg, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const MV *mv_q3, - const struct scale_factors *sf, - int w, int h, int do_avg, - const InterpKernel *kernel, - enum mv_precision precision, - int x, int y, int bd); -#endif - -static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, - const struct scale_factors *sf) { - const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset; - const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset; - return y * stride + x; -} - -static INLINE void setup_pred_plane(struct buf_2d *dst, - uint8_t *src, int stride, - int mi_row, int mi_col, - const struct scale_factors *scale, - int subsampling_x, int subsampling_y) { - const int x = (MI_SIZE * mi_col) >> subsampling_x; - const int y = (MI_SIZE * mi_row) >> subsampling_y; - dst->buf = src + scaled_buffer_offset(x, y, stride, scale); - dst->stride = stride; -} - -void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col); - -void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *sf); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.c b/thirdparty/libvpx/vp9/common/vp9_reconintra.c deleted file mode 100644 index 445785835a..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconintra.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" - -#if CONFIG_VP9_HIGHBITDEPTH -#include "vpx_dsp/vpx_dsp_common.h" -#endif // CONFIG_VP9_HIGHBITDEPTH -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/vpx_once.h" - -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_onyxc_int.h" - -const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { - DCT_DCT, // DC - ADST_DCT, // V - DCT_ADST, // H - DCT_DCT, // D45 - ADST_ADST, // D135 - ADST_DCT, // D117 - DCT_ADST, // D153 - DCT_ADST, // D207 - ADST_DCT, // D63 - ADST_ADST, // TM -}; - -enum { - NEED_LEFT = 1 << 1, - NEED_ABOVE = 1 << 2, - NEED_ABOVERIGHT = 1 << 3, -}; - -static const uint8_t extend_modes[INTRA_MODES] = { - NEED_ABOVE | NEED_LEFT, // DC - NEED_ABOVE, // V - NEED_LEFT, // H - NEED_ABOVERIGHT, // D45 - NEED_LEFT | NEED_ABOVE, // D135 - NEED_LEFT | NEED_ABOVE, // D117 - NEED_LEFT | NEED_ABOVE, // D153 - NEED_LEFT, // D207 - NEED_ABOVERIGHT, // D63 - NEED_LEFT | NEED_ABOVE, // TM -}; - -typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left); - -static intra_pred_fn pred[INTRA_MODES][TX_SIZES]; -static intra_pred_fn dc_pred[2][2][TX_SIZES]; - -#if CONFIG_VP9_HIGHBITDEPTH -typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, const uint16_t *left, - int bd); -static intra_high_pred_fn pred_high[INTRA_MODES][4]; -static intra_high_pred_fn dc_pred_high[2][2][4]; -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void vp9_init_intra_predictors_internal(void) { -#define INIT_ALL_SIZES(p, type) \ - p[TX_4X4] = vpx_##type##_predictor_4x4; \ - p[TX_8X8] = vpx_##type##_predictor_8x8; \ - p[TX_16X16] = vpx_##type##_predictor_16x16; \ - p[TX_32X32] = vpx_##type##_predictor_32x32 - - INIT_ALL_SIZES(pred[V_PRED], v); - INIT_ALL_SIZES(pred[H_PRED], h); - INIT_ALL_SIZES(pred[D207_PRED], d207); - INIT_ALL_SIZES(pred[D45_PRED], d45); - INIT_ALL_SIZES(pred[D63_PRED], d63); - INIT_ALL_SIZES(pred[D117_PRED], d117); - INIT_ALL_SIZES(pred[D135_PRED], d135); - INIT_ALL_SIZES(pred[D153_PRED], d153); - INIT_ALL_SIZES(pred[TM_PRED], tm); - - INIT_ALL_SIZES(dc_pred[0][0], dc_128); - INIT_ALL_SIZES(dc_pred[0][1], dc_top); - INIT_ALL_SIZES(dc_pred[1][0], dc_left); - INIT_ALL_SIZES(dc_pred[1][1], dc); - -#if CONFIG_VP9_HIGHBITDEPTH - INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); - INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); - INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207); - INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45); - INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); - INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117); - INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135); - INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153); - INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm); - - INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128); - INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top); - INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left); - INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc); -#endif // CONFIG_VP9_HIGHBITDEPTH - -#undef intra_pred_allsizes -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void build_intra_predictors_high(const MACROBLOCKD *xd, - const uint8_t *ref8, - int ref_stride, - uint8_t *dst8, - int dst_stride, - PREDICTION_MODE mode, - TX_SIZE tx_size, - int up_available, - int left_available, - int right_available, - int x, int y, - int plane, int bd) { - int i; - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - DECLARE_ALIGNED(16, uint16_t, left_col[32]); - DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); - uint16_t *above_row = above_data + 16; - const uint16_t *const_above_row = above_row; - const int bs = 4 << tx_size; - int frame_width, frame_height; - int x0, y0; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int need_left = extend_modes[mode] & NEED_LEFT; - const int need_above = extend_modes[mode] & NEED_ABOVE; - const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; - int base = 128 << (bd - 8); - // 127 127 127 .. 127 127 127 127 127 127 - // 129 A B .. Y Z - // 129 C D .. W X - // 129 E F .. U V - // 129 G H .. S T T T T T - // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. - - // Get current frame pointer, width and height. - if (plane == 0) { - frame_width = xd->cur_buf->y_width; - frame_height = xd->cur_buf->y_height; - } else { - frame_width = xd->cur_buf->uv_width; - frame_height = xd->cur_buf->uv_height; - } - - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // NEED_LEFT - if (need_left) { - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; - } - } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } - } else { - vpx_memset16(left_col, base + 1, bs); - } - } - - // NEED_ABOVE - if (need_above) { - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + bs <= frame_width) { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - } - } - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } else { - vpx_memset16(above_row, base - 1, bs); - above_row[-1] = base - 1; - } - } - - // NEED_ABOVERIGHT - if (need_aboveright) { - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); - } - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); - else - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - above_row[-1] = left_available ? above_ref[-1] : (base + 1); - } - } - } else { - vpx_memset16(above_row, base - 1, bs * 2); - above_row[-1] = base - 1; - } - } - - // predict - if (mode == DC_PRED) { - dc_pred_high[left_available][up_available][tx_size](dst, dst_stride, - const_above_row, - left_col, xd->bd); - } else { - pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, - xd->bd); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, - int ref_stride, uint8_t *dst, int dst_stride, - PREDICTION_MODE mode, TX_SIZE tx_size, - int up_available, int left_available, - int right_available, int x, int y, - int plane) { - int i; - DECLARE_ALIGNED(16, uint8_t, left_col[32]); - DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); - uint8_t *above_row = above_data + 16; - const uint8_t *const_above_row = above_row; - const int bs = 4 << tx_size; - int frame_width, frame_height; - int x0, y0; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - - // 127 127 127 .. 127 127 127 127 127 127 - // 129 A B .. Y Z - // 129 C D .. W X - // 129 E F .. U V - // 129 G H .. S T T T T T - // .. - - // Get current frame pointer, width and height. - if (plane == 0) { - frame_width = xd->cur_buf->y_width; - frame_height = xd->cur_buf->y_height; - } else { - frame_width = xd->cur_buf->uv_width; - frame_height = xd->cur_buf->uv_height; - } - - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // NEED_LEFT - if (extend_modes[mode] & NEED_LEFT) { - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) - left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; - } - } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; - } - } else { - memset(left_col, 129, bs); - } - } - - // NEED_ABOVE - if (extend_modes[mode] & NEED_ABOVE) { - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + bs <= frame_width) { - memcpy(above_row, above_ref, bs); - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs); - } - } - above_row[-1] = left_available ? above_ref[-1] : 129; - } else { - memset(above_row, 127, bs); - above_row[-1] = 127; - } - } - - // NEED_ABOVERIGHT - if (extend_modes[mode] & NEED_ABOVERIGHT) { - if (up_available) { - const uint8_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs); - } else { - memcpy(above_row, above_ref, bs); - memset(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); - } else { - memcpy(above_row, above_ref, bs); - memset(above_row + bs, above_row[bs - 1], bs); - } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r); - memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); - } - } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs); - else - memset(above_row + bs, above_row[bs - 1], bs); - } - } - above_row[-1] = left_available ? above_ref[-1] : 129; - } else { - memset(above_row, 127, bs * 2); - above_row[-1] = 127; - } - } - - // predict - if (mode == DC_PRED) { - dc_pred[left_available][up_available][tx_size](dst, dst_stride, - const_above_row, left_col); - } else { - pred[mode][tx_size](dst, dst_stride, const_above_row, left_col); - } -} - -void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, - TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int aoff, int loff, int plane) { - const int bw = (1 << bwl_in); - const int txw = (1 << tx_size); - const int have_top = loff || (xd->above_mi != NULL); - const int have_left = aoff || (xd->left_mi != NULL); - const int have_right = (aoff + txw) < bw; - const int x = aoff * 4; - const int y = loff * 4; - -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, - tx_size, have_top, have_left, have_right, - x, y, plane, xd->bd); - return; - } -#endif - build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, - have_top, have_left, have_right, x, y, plane); -} - -void vp9_init_intra_predictors(void) { - once(vp9_init_intra_predictors_internal); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.h b/thirdparty/libvpx/vp9/common/vp9_reconintra.h deleted file mode 100644 index de453808b7..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_reconintra.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_RECONINTRA_H_ -#define VP9_COMMON_VP9_RECONINTRA_H_ - -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_init_intra_predictors(void); - -void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, - TX_SIZE tx_size, PREDICTION_MODE mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, - int aoff, int loff, int plane); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_rtcd.c b/thirdparty/libvpx/vp9/common/vp9_rtcd.c deleted file mode 100644 index 2dfa09f50e..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_rtcd.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vp9_rtcd.h" -#include "vpx_ports/vpx_once.h" - -void vp9_rtcd() { - // TODO(JBB): Remove this once, by insuring that both the encoder and - // decoder setup functions are protected by once(); - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.c b/thirdparty/libvpx/vp9/common/vp9_scale.c deleted file mode 100644 index b763b925b3..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scale.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_dsp_rtcd.h" -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_scale.h" -#include "vpx_dsp/vpx_filter.h" - -static INLINE int scaled_x(int val, const struct scale_factors *sf) { - return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT); -} - -static INLINE int scaled_y(int val, const struct scale_factors *sf) { - return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); -} - -static int unscaled_value(int val, const struct scale_factors *sf) { - (void) sf; - return val; -} - -static int get_fixed_point_scale_factor(int other_size, int this_size) { - // Calculate scaling factor once for each reference frame - // and use fixed point scaling factors in decoding and encoding routines. - // Hardware implementations can calculate scale factor in device driver - // and use multiplication and shifting on hardware instead of division. - return (other_size << REF_SCALE_SHIFT) / this_size; -} - -MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) { - const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK; - const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK; - const MV32 res = { - scaled_y(mv->row, sf) + y_off_q4, - scaled_x(mv->col, sf) + x_off_q4 - }; - return res; -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, - int use_highbd) { -#else -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h) { -#endif - if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { - sf->x_scale_fp = REF_INVALID_SCALE; - sf->y_scale_fp = REF_INVALID_SCALE; - return; - } - - sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); - sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); - sf->x_step_q4 = scaled_x(16, sf); - sf->y_step_q4 = scaled_y(16, sf); - - if (vp9_is_scaled(sf)) { - sf->scale_value_x = scaled_x; - sf->scale_value_y = scaled_y; - } else { - sf->scale_value_x = unscaled_value; - sf->scale_value_y = unscaled_value; - } - - // TODO(agrange): Investigate the best choice of functions to use here - // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what - // to do at full-pel offsets. The current selection, where the filter is - // applied in one direction only, and not at all for 0,0, seems to give the - // best quality, but it may be worth trying an additional mode that does - // do the filtering on full-pel. - - if (sf->x_step_q4 == 16) { - if (sf->y_step_q4 == 16) { - // No scaling in either direction. - sf->predict[0][0][0] = vpx_convolve_copy; - sf->predict[0][0][1] = vpx_convolve_avg; - sf->predict[0][1][0] = vpx_convolve8_vert; - sf->predict[0][1][1] = vpx_convolve8_avg_vert; - sf->predict[1][0][0] = vpx_convolve8_horiz; - sf->predict[1][0][1] = vpx_convolve8_avg_horiz; - } else { - // No scaling in x direction. Must always scale in the y direction. - sf->predict[0][0][0] = vpx_scaled_vert; - sf->predict[0][0][1] = vpx_scaled_avg_vert; - sf->predict[0][1][0] = vpx_scaled_vert; - sf->predict[0][1][1] = vpx_scaled_avg_vert; - sf->predict[1][0][0] = vpx_scaled_2d; - sf->predict[1][0][1] = vpx_scaled_avg_2d; - } - } else { - if (sf->y_step_q4 == 16) { - // No scaling in the y direction. Must always scale in the x direction. - sf->predict[0][0][0] = vpx_scaled_horiz; - sf->predict[0][0][1] = vpx_scaled_avg_horiz; - sf->predict[0][1][0] = vpx_scaled_2d; - sf->predict[0][1][1] = vpx_scaled_avg_2d; - sf->predict[1][0][0] = vpx_scaled_horiz; - sf->predict[1][0][1] = vpx_scaled_avg_horiz; - } else { - // Must always scale in both directions. - sf->predict[0][0][0] = vpx_scaled_2d; - sf->predict[0][0][1] = vpx_scaled_avg_2d; - sf->predict[0][1][0] = vpx_scaled_2d; - sf->predict[0][1][1] = vpx_scaled_avg_2d; - sf->predict[1][0][0] = vpx_scaled_2d; - sf->predict[1][0][1] = vpx_scaled_avg_2d; - } - } - - // 2D subpel motion always gets filtered in both directions - - if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) { - sf->predict[1][1][0] = vpx_scaled_2d; - sf->predict[1][1][1] = vpx_scaled_avg_2d; - } else { - sf->predict[1][1][0] = vpx_convolve8; - sf->predict[1][1][1] = vpx_convolve8_avg; - } - -#if CONFIG_VP9_HIGHBITDEPTH - if (use_highbd) { - if (sf->x_step_q4 == 16) { - if (sf->y_step_q4 == 16) { - // No scaling in either direction. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; - } else { - // No scaling in x direction. Must always scale in the y direction. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; - } - } else { - if (sf->y_step_q4 == 16) { - // No scaling in the y direction. Must always scale in the x direction. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; - } else { - // Must always scale in both directions. - sf->highbd_predict[0][0][0] = vpx_highbd_convolve8; - sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg; - sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; - sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; - sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; - sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; - } - } - // 2D subpel motion always gets filtered in both directions. - sf->highbd_predict[1][1][0] = vpx_highbd_convolve8; - sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg; - } -#endif -} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.h b/thirdparty/libvpx/vp9/common/vp9_scale.h deleted file mode 100644 index 5e91041079..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scale.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SCALE_H_ -#define VP9_COMMON_VP9_SCALE_H_ - -#include "vp9/common/vp9_mv.h" -#include "vpx_dsp/vpx_convolve.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define REF_SCALE_SHIFT 14 -#define REF_NO_SCALE (1 << REF_SCALE_SHIFT) -#define REF_INVALID_SCALE -1 - -struct scale_factors { - int x_scale_fp; // horizontal fixed point scale factor - int y_scale_fp; // vertical fixed point scale factor - int x_step_q4; - int y_step_q4; - - int (*scale_value_x)(int val, const struct scale_factors *sf); - int (*scale_value_y)(int val, const struct scale_factors *sf); - - convolve_fn_t predict[2][2][2]; // horiz, vert, avg -#if CONFIG_VP9_HIGHBITDEPTH - highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg -#endif -}; - -MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h, - int use_high); -#else -void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, - int other_w, int other_h, - int this_w, int this_h); -#endif - -static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) { - return sf->x_scale_fp != REF_INVALID_SCALE && - sf->y_scale_fp != REF_INVALID_SCALE; -} - -static INLINE int vp9_is_scaled(const struct scale_factors *sf) { - return vp9_is_valid_scale(sf) && - (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); -} - -static INLINE int valid_ref_frame_size(int ref_width, int ref_height, - int this_width, int this_height) { - return 2 * this_width >= ref_width && - 2 * this_height >= ref_height && - this_width <= 16 * ref_width && - this_height <= 16 * ref_height; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_SCALE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.c b/thirdparty/libvpx/vp9/common/vp9_scan.c deleted file mode 100644 index 8b8b09f4a3..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scan.c +++ /dev/null @@ -1,725 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vp9/common/vp9_scan.h" - -DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { - 0, 4, 1, 5, - 8, 2, 12, 9, - 3, 6, 13, 10, - 7, 14, 11, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = { - 0, 4, 8, 1, - 12, 5, 9, 2, - 13, 6, 10, 3, - 7, 14, 11, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = { - 0, 1, 4, 2, - 5, 3, 6, 8, - 9, 7, 12, 10, - 13, 11, 14, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { - 0, 8, 1, 16, 9, 2, 17, 24, - 10, 3, 18, 25, 32, 11, 4, 26, - 33, 19, 40, 12, 34, 27, 5, 41, - 20, 48, 13, 35, 42, 28, 21, 6, - 49, 56, 36, 43, 29, 7, 14, 50, - 57, 44, 22, 37, 15, 51, 58, 30, - 45, 23, 52, 59, 38, 31, 60, 53, - 46, 39, 61, 54, 47, 62, 55, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = { - 0, 8, 16, 1, 24, 9, 32, 17, - 2, 40, 25, 10, 33, 18, 48, 3, - 26, 41, 11, 56, 19, 34, 4, 49, - 27, 42, 12, 35, 20, 57, 50, 28, - 5, 43, 13, 36, 58, 51, 21, 44, - 6, 29, 59, 37, 14, 52, 22, 7, - 45, 60, 30, 15, 38, 53, 23, 46, - 31, 61, 39, 54, 47, 62, 55, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = { - 0, 1, 2, 8, 9, 3, 16, 10, - 4, 17, 11, 24, 5, 18, 25, 12, - 19, 26, 32, 6, 13, 20, 33, 27, - 7, 34, 40, 21, 28, 41, 14, 35, - 48, 42, 29, 36, 49, 22, 43, 15, - 56, 37, 50, 44, 30, 57, 23, 51, - 58, 45, 38, 52, 31, 59, 53, 46, - 60, 39, 61, 47, 54, 55, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { - 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, - 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, - 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, - 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, - 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, - 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, - 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, - 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, - 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, - 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, - 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, - 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, - 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, - 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, - 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, - 251, - 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = { - 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, - 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, - 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, - 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, - 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, - 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, - 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, - 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, - 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, - 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, - 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, - 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, - 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, - 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, - 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, - 236, - 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = { - 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, - 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, - 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, - 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, - 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, - 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, - 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, - 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, - 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, - 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, - 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, - 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, - 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, - 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, - 158, - 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, - 175, - 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, - 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { - 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, - 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, - 68, 131, 37, 100, - 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, - 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, - 102, 352, 8, 197, - 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, - 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, - 41, 417, 199, 136, - 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, - 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, - 295, 420, 106, 451, - 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, - 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, - 453, 139, 44, 234, - 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, - 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, - 486, 77, 204, 362, - 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, - 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, - 111, 238, 48, 143, - 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, - 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, - 393, 300, 269, 176, 145, - 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, - 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, - 550, 519, 488, 457, 426, 395, - 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, - 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, - 210, 179, 117, 86, 55, 738, 707, - 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, - 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, - 645, 552, 521, 428, 397, 304, - 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, - 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, - 864, 833, 802, 771, 740, 709, - 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, - 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, - 710, 679, 617, 586, 555, 493, - 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, - 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, - 743, 619, 495, 371, 247, 123, - 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, - 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, - 898, 836, 805, 774, 712, 681, - 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, - 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, - 651, 620, 589, 558, 527, - 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, - 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, - 559, 497, 466, 435, 373, - 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, - 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, - 499, 375, 251, 127, - 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, - 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, - 685, 654, 592, 561, - 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, - 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, - 438, 407, 376, 345, - 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, - 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, - 967, 874, 843, 750, - 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, - 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, - 564, 533, 440, 409, - 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, - 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, - 752, 721, 690, 659, - 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, - 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, - 350, 319, 1002, 971, - 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, - 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, - 537, 444, 413, 972, - 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, - 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, - 570, 539, 508, 477, - 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, - 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, - 1007, 883, 759, 635, 511, - 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, - 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, - 884, 853, 822, 791, - 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, - 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, - 1011, 887, 763, 639, - 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, - 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, - 702, 671, 1013, 982, - 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, - 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, - 1016, 985, 954, 923, - 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, - 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, - 990, 959, 1022, 991, 1023, -}; - -// Neighborhood 2-tuples for various scans and blocksizes, -// in {top, left} order for each position in corresponding scan order. -DECLARE_ALIGNED(16, static const int16_t, - default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, - 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, 9, 2, 2, 6, 6, 2, 2, 3, - 3, 10, 10, 7, 7, 11, 11, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, 8, 6, 6, 8, 8, 9, 9, 12, - 12, 10, 10, 13, 13, 14, 14, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, 32, 17, 17, 2, - 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, 48, 48, 11, 11, 26, - 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, 12, 49, 49, 42, 42, 20, - 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, 13, 13, 36, 36, 5, 5, 21, 21, - 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, - 30, 45, 45, 15, 15, 38, 38, 23, 23, 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, - 47, 47, 55, 55, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, 16, 10, 10, 16, 16, - 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, 5, 5, 12, 12, 19, 19, - 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, 27, 40, 40, 13, 13, 34, 34, - 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, 42, 42, 14, 14, 48, 48, 36, - 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, 50, 57, 57, 44, 44, 37, 37, - 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, 38, 38, 60, 60, 46, 46, 53, - 53, 54, 54, 61, 61, 62, 62, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, 2, 10, 17, 17, - 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, 4, 11, 26, 33, 19, - 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, 41, 20, 27, 13, 20, 5, - 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, - 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, 37, 44, 15, 22, 44, 51, - 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, 39, - 46, 54, 61, 47, 54, 55, 62, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, 64, - 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, 65, 65, - 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, 128, 3, 3, - 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, 113, 113, 3, 3, - 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, 67, 20, 20, 83, 83, - 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, 52, 52, 99, 99, 5, 5, - 130, 130, 68, 68, 192, 192, 161, 161, 21, 21, 115, 115, 84, 84, 37, 37, - 146, 146, 208, 208, 53, 53, 5, 5, 100, 100, 177, 177, 131, 131, 69, 69, - 6, 6, 224, 224, 116, 116, 22, 22, 162, 162, 85, 85, 147, 147, 38, 38, - 193, 193, 101, 101, 54, 54, 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, - 209, 209, 7, 7, 117, 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, - 225, 225, 39, 39, 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, - 71, 71, 210, 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, - 56, 56, 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, - 72, 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, - 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, 151, - 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, 10, 10, - 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, 121, 213, 213, - 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, 10, 10, 90, 90, - 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, 27, 199, 199, 43, 43, - 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, 11, 11, 75, 75, 138, 138, - 200, 200, 215, 215, 91, 91, 12, 12, 28, 28, 185, 185, 107, 107, 154, 154, - 44, 44, 231, 231, 216, 216, 60, 60, 123, 123, 12, 12, 76, 76, 201, 201, - 170, 170, 232, 232, 139, 139, 92, 92, 13, 13, 108, 108, 29, 29, 186, 186, - 217, 217, 155, 155, 45, 45, 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, - 77, 77, 14, 14, 171, 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, - 46, 46, 156, 156, 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, - 31, 31, 172, 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, - 110, 110, 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, - 142, 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, - 220, 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, - 175, 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, - 223, 239, 239, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, 17, - 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, 19, 19, - 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, 7, 35, 35, - 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, 65, 65, 51, 51, - 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, 52, 23, 23, 81, 81, - 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, 82, 82, 96, 96, 68, 68, - 24, 24, 97, 97, 83, 83, 39, 39, 96, 96, 54, 54, 11, 11, 69, 69, - 98, 98, 112, 112, 84, 84, 25, 25, 40, 40, 55, 55, 113, 113, 99, 99, - 12, 12, 70, 70, 112, 112, 85, 85, 26, 26, 114, 114, 100, 100, 128, 128, - 41, 41, 56, 56, 71, 71, 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, - 128, 128, 72, 72, 130, 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, - 42, 42, 144, 144, 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, - 88, 88, 132, 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, - 160, 160, 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, - 74, 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, - 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, 135, - 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, 60, 60, - 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, 45, 165, 165, - 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, 122, 122, 152, 152, - 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, 181, 224, 224, 107, 107, - 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, 168, 168, 210, 210, 46, 46, - 138, 138, 92, 92, 183, 183, 225, 225, 211, 211, 240, 240, 197, 197, 169, 169, - 123, 123, 154, 154, 198, 198, 77, 77, 212, 212, 184, 184, 108, 108, 226, 226, - 199, 199, 62, 62, 227, 227, 241, 241, 139, 139, 213, 213, 170, 170, 185, 185, - 155, 155, 228, 228, 242, 242, 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, - 215, 215, 229, 229, 140, 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, - 156, 156, 244, 244, 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, - 202, 202, 246, 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, - 157, 187, 187, 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, - 203, 203, 142, 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, - 219, 174, 174, 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, - 206, 206, 236, 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, - 238, 253, 253, 254, 254, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, 32, - 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, 64, 64, - 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, 80, 35, 50, - 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, 5, 20, 36, 51, - 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, 67, 112, 112, 37, 52, - 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, 113, 128, 22, 37, 53, 68, - 84, 99, 99, 114, 128, 128, 114, 129, 69, 84, 38, 53, 7, 22, 7, 7, - 129, 144, 23, 38, 54, 69, 100, 115, 85, 100, 115, 130, 144, 144, 130, 145, - 39, 54, 70, 85, 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, - 8, 8, 86, 101, 131, 146, 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, - 117, 132, 102, 117, 161, 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, - 9, 9, 176, 176, 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, - 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, - 10, 10, 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, - 164, 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, - 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, 58, - 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, 209, 224, - 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 12, 12, - 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, 122, 137, 91, 106, - 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, 183, 211, 226, 153, 168, - 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, 76, 91, 13, 13, 183, 198, - 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, 227, 242, 92, 107, - 61, 76, 139, 154, 14, 29, 14, 14, 184, 199, 213, 228, 108, 123, 199, 214, - 228, 243, 77, 92, 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, - 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, - 215, 230, 31, 46, 171, 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, - 47, 62, 216, 231, 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, - 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, - 203, 218, 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, - 234, 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, - 250, 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, - 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, - 238, 239, 254, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, - default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { - 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, 33, 64, - 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, 97, 128, 3, 34, - 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, 160, 160, 4, 35, 67, 98, - 192, 192, 4, 4, 130, 161, 161, 192, 36, 67, 99, 130, 5, 36, 68, 99, - 193, 224, 162, 193, 224, 224, 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, - 225, 256, 256, 256, 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, - 195, 226, 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, - 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, 165, 196, - 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, 352, 352, 197, 228, - 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, 103, 134, 353, 384, - 166, 197, 229, 260, 40, 71, 8, 8, 384, 384, 135, 166, 354, 385, 323, 354, - 198, 229, 292, 323, 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, - 230, 261, 355, 386, 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, - 199, 230, 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, - 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, 294, 325, - 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, 74, 105, 419, 450, - 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, 11, 42, 106, 137, 480, 480, - 450, 481, 358, 389, 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, - 327, 358, 11, 11, 481, 512, 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, - 482, 513, 512, 512, 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, - 452, 483, 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, - 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, 453, 484, - 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, 140, 171, 515, - 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, - 361, 576, 576, 13, 13, 267, 298, 546, 577, 77, 108, 204, 235, 455, 486, 577, - 608, 299, 330, 109, 140, 547, 578, 14, 45, 14, 14, 141, 172, 578, 609, 331, - 362, 46, 77, 173, 204, 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, - 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, - 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, 49, 80, - 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, 608, 608, - 484, 515, 360, 391, 236, 267, 112, 143, 19, 19, 640, 640, 609, 640, 516, 547, - 485, 516, 392, 423, 361, 392, 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, - 20, 20, 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, - 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, - 145, 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, 580, - 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, - 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, 146, 177, 115, - 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, 674, 705, 643, 674, 581, 612, - 550, 581, 519, 550, 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, - 302, 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, 706, 737, 675, - 706, 582, 613, 551, 582, 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, - 179, 210, 86, 117, 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, - 87, 118, 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, - 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, 365, - 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, 800, 800, 769, - 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, - 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, 180, 211, 149, - 180, 118, 149, 56, 87, 25, 56, 25, 25, 832, 832, 801, 832, 770, 801, 739, - 770, 708, 739, 677, 708, 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, - 491, 522, 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, 274, - 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, - 57, 26, 26, 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, - 616, 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, - 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, 834, 865, - 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, 431, 462, 338, - 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, - 618, 463, 494, 339, 370, 215, 246, 91, 122, 864, 864, 740, 771, 616, 647, - 492, 523, 368, 399, 244, 275, 120, 151, 27, 27, 896, 896, 865, 896, 772, 803, - 741, 772, 648, 679, 617, 648, 524, 555, 493, 524, 400, 431, 369, 400, 276, - 307, 245, 276, 152, 183, 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, - 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, - 525, 556, 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, - 277, 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, 929, - 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, 712, 743, - 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, 495, 526, 464, - 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, 278, 309, 247, 278, - 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 30, 30, - 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, 713, 744, 682, - 713, 651, 682, 589, 620, 558, 589, 527, 558, 465, 496, 434, 465, 403, 434, - 341, 372, 310, 341, 279, 310, 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, - 31, 62, 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, - 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, - 125, 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, 219, - 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, - 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, 621, 652, 528, - 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, 156, 187, 125, 156, - 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, 746, 777, 684, 715, 653, - 684, 622, 653, 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, - 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, 964, 995, 933, - 964, 902, 933, 871, 902, 840, 871, 809, 840, 778, 809, 747, 778, 716, 747, - 685, 716, 654, 685, 623, 654, 592, 623, 561, 592, 530, 561, 499, 530, 468, - 499, 437, 468, 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, - 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, - 872, 810, 841, 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, - 531, 562, 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, - 252, 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, - 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, 222, - 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, 347, 378, - 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, 252, 283, 904, - 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, - 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, 874, 905, 812, - 843, 781, 812, 750, 781, 688, 719, 657, 688, 626, 657, 564, 595, 533, 564, - 502, 533, 440, 471, 409, 440, 378, 409, 316, 347, 285, 316, 254, 285, 968, - 999, 937, 968, 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, - 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, 534, 565, 503, - 534, 472, 503, 441, 472, 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, - 255, 286, 969, 1000, 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, - 752, 690, 721, 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, - 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, - 846, 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, - 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, 876, - 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, - 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, 381, 412, 940, - 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, 692, 723, 661, 692, - 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, 413, 444, 382, 413, 972, - 1003, 941, 972, 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, - 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, 538, 569, 507, - 538, 476, 507, 445, 476, 414, 445, 383, 414, 973, 1004, 942, 973, 911, 942, - 849, 880, 818, 849, 787, 818, 725, 756, 694, 725, 663, 694, 601, 632, 570, - 601, 539, 570, 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, - 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, - 1006, 851, 882, 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, - 508, 539, 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, - 571, 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, - 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, 945, - 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, 728, 759, - 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, 511, 542, 977, - 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, - 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, 854, 885, 823, - 854, 730, 761, 699, 730, 606, 637, 575, 606, 979, 1010, 855, 886, 731, 762, - 607, 638, 884, 915, 760, 791, 636, 667, 916, 947, 885, 916, 792, 823, 761, - 792, 668, 699, 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, - 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, 918, 949, 887, - 918, 856, 887, 825, 856, 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, - 639, 670, 981, 1012, 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, - 764, 702, 733, 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, - 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, - 920, 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, - 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, 798, - 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, - 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, 892, 923, 924, - 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, 957, 988, 926, 957, - 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_4x4[16]) = { - 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_4x4[16]) = { - 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_4x4[16]) = { - 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_8x8[64]) = { - 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, - 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, - 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, - 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_8x8[64]) = { - 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, - 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, - 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, - 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_8x8[64]) = { - 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, - 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, - 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, - 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_16x16[256]) = { - 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, - 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, - 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, - 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, - 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, - 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, - 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, - 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, - 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, - 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, - 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, - 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, - 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, - 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, - 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, - 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_16x16[256]) = { - 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, 86, - 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, 115, 130, - 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, 119, 142, 167, - 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, 116, 135, 161, 185, - 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, 112, 133, 154, 179, 205, - 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, 106, 121, 146, 169, 196, 212, - 41, 46, 49, 56, 63, 70, 79, 90, 98, 107, 122, 138, 159, 182, 207, 222, - 52, 57, 62, 69, 75, 83, 93, 102, 110, 120, 134, 150, 176, 195, 215, 226, - 66, 71, 78, 82, 91, 97, 108, 113, 127, 136, 148, 168, 188, 202, 221, 232, - 80, 89, 92, 101, 105, 114, 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, - 95, 104, 109, 117, 123, 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, - 111, 118, 124, 129, 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, - 243, 126, 132, 137, 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, - 244, 246, 141, 149, 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, - 242, 249, 251, 152, 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, - 245, 247, 252, 253, 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, - 241, 248, 250, 254, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_16x16[256]) = { - 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, 179, - 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, 178, 196, - 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, 164, 186, 201, - 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, 153, 169, 193, 208, - 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, 133, 161, 176, 198, 214, - 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, 120, 140, 165, 185, 205, 221, - 22, 27, 32, 41, 48, 60, 73, 85, 99, 116, 130, 151, 175, 190, 211, 225, - 29, 35, 42, 49, 59, 69, 81, 95, 108, 125, 139, 155, 182, 197, 217, 229, - 38, 45, 51, 61, 68, 80, 93, 105, 118, 134, 150, 168, 191, 207, 223, 234, - 50, 56, 63, 74, 83, 94, 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, - 62, 70, 76, 87, 97, 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, - 75, 82, 90, 102, 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, - 89, 100, 111, 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, - 103, 115, 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, - 252, 121, 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, - 251, 254, 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, - 249, 253, 255, -}; - -DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_32x32[1024]) = { - 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, 170, 193, 204, - 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, 377, 405, 455, 471, 495, - 527, 1, 4, 8, 15, 22, 30, 45, 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, - 228, 234, 237, 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, - 3, 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, 208, 217, 224, - 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, 440, 493, 525, 550, 567, - 6, 11, 16, 23, 31, 43, 60, 73, 90, 109, 126, 150, 173, 196, 211, 220, 226, - 232, 236, 239, 296, 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, - 9, 14, 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, 223, 244, - 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, 582, 596, 617, 645, - 13, 20, 26, 35, 44, 54, 72, 85, 105, 123, 140, 163, 182, 205, 216, 225, - 254, 271, 294, 314, 353, 373, 400, 423, 468, 491, 522, 548, 595, 616, 644, - 666, 21, 27, 33, 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, - 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, 643, 665, - 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, 159, 178, 197, 212, 221, 230, - 292, 312, 326, 334, 398, 421, 437, 446, 520, 546, 564, 574, 642, 664, 679, - 687, 34, 40, 46, 56, 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, - 340, 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, 723, - 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, 194, 252, 268, 290, - 311, 351, 370, 396, 420, 466, 488, 518, 545, 593, 613, 640, 663, 704, 722, - 746, 765, 51, 59, 66, 76, 89, 99, 119, 131, 149, 168, 181, 200, 267, 289, - 310, 325, 369, 395, 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, - 745, 764, 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, 288, - 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, 661, 677, 686, - 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, 137, 155, 242, 251, 266, 287, - 339, 350, 368, 393, 452, 465, 486, 515, 580, 592, 611, 637, 692, 703, 720, - 743, 788, 798, 813, 833, 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, - 308, 349, 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, - 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, 185, 264, - 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, 609, 635, 659, 676, - 718, 741, 761, 775, 811, 831, 847, 858, 117, 128, 136, 148, 160, 175, 188, - 198, 284, 306, 322, 332, 390, 415, 433, 444, 512, 540, 560, 572, 634, 658, - 675, 685, 740, 760, 774, 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, - 249, 263, 283, 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, - 691, 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, 174, - 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, 539, 589, 607, - 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, 874, 886, 902, 915, 176, - 187, 195, 202, 261, 281, 304, 321, 363, 387, 413, 432, 481, 509, 538, 559, - 606, 631, 656, 674, 715, 737, 758, 773, 808, 827, 844, 856, 885, 901, 914, - 923, 192, 199, 206, 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, - 558, 571, 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, - 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, 480, 507, - 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, 825, 866, 873, 884, - 899, 930, 936, 945, 957, 246, 259, 278, 302, 345, 361, 384, 411, 460, 479, - 506, 536, 587, 604, 628, 654, 698, 713, 734, 756, 793, 806, 824, 842, 872, - 883, 898, 912, 935, 944, 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, - 478, 505, 535, 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, - 854, 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, 409, - 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, 770, 780, 822, - 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, 975, 336, 344, 359, 381, - 449, 459, 477, 503, 577, 586, 602, 625, 689, 697, 711, 731, 785, 792, 804, - 821, 865, 871, 881, 895, 929, 934, 942, 953, 977, 981, 987, 995, 343, 358, - 380, 408, 458, 476, 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, - 803, 820, 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, - 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, 729, 752, - 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, 962, 970, 985, 993, - 1000, 1005, 378, 406, 427, 441, 500, 531, 554, 569, 622, 649, 669, 682, 728, - 751, 768, 779, 818, 837, 851, 860, 892, 907, 918, 925, 950, 961, 969, 974, - 992, 999, 1004, 1007, 448, 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, - 727, 784, 790, 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, - 984, 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, - 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, 938, 948, - 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, 529, 553, 597, - 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, 850, 876, 889, 905, 917, - 937, 947, 959, 968, 982, 989, 997, 1003, 1011, 1015, 1019, 1022, 496, 528, - 552, 568, 618, 646, 667, 681, 724, 748, 766, 778, 814, 834, 849, 859, 888, - 904, 916, 924, 946, 958, 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, - 1023, -}; - -const scan_order vp9_default_scan_orders[TX_SIZES] = { - {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, - {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, - {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, -}; - -const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES] = { - { // TX_4X4 - {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, - {row_scan_4x4, vp9_row_iscan_4x4, row_scan_4x4_neighbors}, - {col_scan_4x4, vp9_col_iscan_4x4, col_scan_4x4_neighbors}, - {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors} - }, { // TX_8X8 - {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, - {row_scan_8x8, vp9_row_iscan_8x8, row_scan_8x8_neighbors}, - {col_scan_8x8, vp9_col_iscan_8x8, col_scan_8x8_neighbors}, - {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors} - }, { // TX_16X16 - {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, - {row_scan_16x16, vp9_row_iscan_16x16, row_scan_16x16_neighbors}, - {col_scan_16x16, vp9_col_iscan_16x16, col_scan_16x16_neighbors}, - {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors} - }, { // TX_32X32 - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, - } -}; diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.h b/thirdparty/libvpx/vp9/common/vp9_scan.h deleted file mode 100644 index 4c1ee8107c..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_scan.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SCAN_H_ -#define VP9_COMMON_VP9_SCAN_H_ - -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_enums.h" -#include "vp9/common/vp9_blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_NEIGHBORS 2 - -typedef struct { - const int16_t *scan; - const int16_t *iscan; - const int16_t *neighbors; -} scan_order; - -extern const scan_order vp9_default_scan_orders[TX_SIZES]; -extern const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES]; - -static INLINE int get_coef_context(const int16_t *neighbors, - const uint8_t *token_cache, int c) { - return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + - token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; -} - -static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, - PLANE_TYPE type, int block_idx) { - const MODE_INFO *const mi = xd->mi[0]; - - if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { - return &vp9_default_scan_orders[tx_size]; - } else { - const PREDICTION_MODE mode = get_y_mode(mi, block_idx); - return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_SCAN_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.c b/thirdparty/libvpx/vp9/common/vp9_seg_common.c deleted file mode 100644 index 7af61629a0..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_seg_common.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_quant_common.h" - -static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; - -static const int seg_feature_data_max[SEG_LVL_MAX] = { - MAXQ, MAX_LOOP_FILTER, 3, 0 }; - -// These functions provide access to new segment level features. -// Eventually these function may be "optimized out" but for the moment, -// the coding mechanism is still subject to change so these provide a -// convenient single point of change. - -void vp9_clearall_segfeatures(struct segmentation *seg) { - vp9_zero(seg->feature_data); - vp9_zero(seg->feature_mask); - seg->aq_av_offset = 0; -} - -void vp9_enable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - seg->feature_mask[segment_id] |= 1 << feature_id; -} - -int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { - return seg_feature_data_max[feature_id]; -} - -int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { - return seg_feature_data_signed[feature_id]; -} - -void vp9_set_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id, int seg_data) { - assert(seg_data <= seg_feature_data_max[feature_id]); - if (seg_data < 0) { - assert(seg_feature_data_signed[feature_id]); - assert(-seg_data <= seg_feature_data_max[feature_id]); - } - - seg->feature_data[segment_id][feature_id] = seg_data; -} - -const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = { - 2, 4, 6, 8, 10, 12, - 0, -1, -2, -3, -4, -5, -6, -7 -}; - - -// TBD? Functions to read and write segment data with range / validity checking diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.h b/thirdparty/libvpx/vp9/common/vp9_seg_common.h deleted file mode 100644 index 99a9440c17..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_seg_common.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_SEG_COMMON_H_ -#define VP9_COMMON_VP9_SEG_COMMON_H_ - -#include "vpx_dsp/prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define SEGMENT_DELTADATA 0 -#define SEGMENT_ABSDATA 1 - -#define MAX_SEGMENTS 8 -#define SEG_TREE_PROBS (MAX_SEGMENTS-1) - -#define PREDICTION_PROBS 3 - -// Segment level features. -typedef enum { - SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... - SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... - SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame - SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode - SEG_LVL_MAX = 4 // Number of features supported -} SEG_LVL_FEATURES; - - -struct segmentation { - uint8_t enabled; - uint8_t update_map; - uint8_t update_data; - uint8_t abs_delta; - uint8_t temporal_update; - - vpx_prob tree_probs[SEG_TREE_PROBS]; - vpx_prob pred_probs[PREDICTION_PROBS]; - - int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; - uint32_t feature_mask[MAX_SEGMENTS]; - int aq_av_offset; -}; - -static INLINE int segfeature_active(const struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id) { - return seg->enabled && - (seg->feature_mask[segment_id] & (1 << feature_id)); -} - -void vp9_clearall_segfeatures(struct segmentation *seg); - -void vp9_enable_segfeature(struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id); - -int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id); - -int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id); - -void vp9_set_segdata(struct segmentation *seg, - int segment_id, - SEG_LVL_FEATURES feature_id, - int seg_data); - -static INLINE int get_segdata(const struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - return seg->feature_data[segment_id][feature_id]; -} - -extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_SEG_COMMON_H_ - diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.c b/thirdparty/libvpx/vp9/common/vp9_thread_common.c deleted file mode 100644 index db78d6be89..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_thread_common.c +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_thread_common.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_loopfilter.h" - -#if CONFIG_MULTITHREAD -static INLINE void mutex_lock(pthread_mutex_t *const mutex) { - const int kMaxTryLocks = 4000; - int locked = 0; - int i; - - for (i = 0; i < kMaxTryLocks; ++i) { - if (!pthread_mutex_trylock(mutex)) { - locked = 1; - break; - } - } - - if (!locked) - pthread_mutex_lock(mutex); -} -#endif // CONFIG_MULTITHREAD - -static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - - if (r && !(c & (nsync - 1))) { - pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; - mutex_lock(mutex); - - while (c > lf_sync->cur_sb_col[r - 1] - nsync) { - pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); - } - pthread_mutex_unlock(mutex); - } -#else - (void)lf_sync; - (void)r; - (void)c; -#endif // CONFIG_MULTITHREAD -} - -static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, - const int sb_cols) { -#if CONFIG_MULTITHREAD - const int nsync = lf_sync->sync_range; - int cur; - // Only signal when there are enough filtered SB for next row to run. - int sig = 1; - - if (c < sb_cols - 1) { - cur = c; - if (c % nsync) - sig = 0; - } else { - cur = sb_cols + nsync; - } - - if (sig) { - mutex_lock(&lf_sync->mutex_[r]); - - lf_sync->cur_sb_col[r] = cur; - - pthread_cond_signal(&lf_sync->cond_[r]); - pthread_mutex_unlock(&lf_sync->mutex_[r]); - } -#else - (void)lf_sync; - (void)r; - (void)c; - (void)sb_cols; -#endif // CONFIG_MULTITHREAD -} - -// Implement row loopfiltering for each thread. -static INLINE -void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, - VP9_COMMON *const cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only, - VP9LfSync *const lf_sync) { - const int num_planes = y_only ? 1 : MAX_MB_PLANE; - const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; - int mi_row, mi_col; - enum lf_path path; - if (y_only) - path = LF_PATH_444; - else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) - path = LF_PATH_420; - else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) - path = LF_PATH_444; - else - path = LF_PATH_SLOW; - - for (mi_row = start; mi_row < stop; - mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { - MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); - - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { - const int r = mi_row >> MI_BLOCK_SIZE_LOG2; - const int c = mi_col >> MI_BLOCK_SIZE_LOG2; - int plane; - - sync_read(lf_sync, r, c); - - vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - - vp9_adjust_mask(cm, mi_row, mi_col, lfm); - - vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); - for (plane = 1; plane < num_planes; ++plane) { - switch (path) { - case LF_PATH_420: - vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_444: - vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); - break; - case LF_PATH_SLOW: - vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, - mi_row, mi_col); - break; - } - } - - sync_write(lf_sync, r, c, sb_cols); - } - } -} - -// Row-based multi-threaded loopfilter hook -static int loop_filter_row_worker(VP9LfSync *const lf_sync, - LFWorkerData *const lf_data) { - thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, - lf_data->start, lf_data->stop, lf_data->y_only, - lf_sync); - return 1; -} - -static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, - VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int start, int stop, int y_only, - VPxWorker *workers, int nworkers, - VP9LfSync *lf_sync) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - // Number of superblock rows and cols - const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - // Decoder may allocate more threads than number of tiles based on user's - // input. - const int tile_cols = 1 << cm->log2_tile_cols; - const int num_workers = VPXMIN(nworkers, tile_cols); - int i; - - if (!lf_sync->sync_range || sb_rows != lf_sync->rows || - num_workers > lf_sync->num_workers) { - vp9_loop_filter_dealloc(lf_sync); - vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); - } - - // Initialize cur_sb_col to -1 for all SB rows. - memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); - - // Set up loopfilter thread data. - // The decoder is capping num_workers because it has been observed that using - // more threads on the loopfilter than there are cores will hurt performance - // on Android. This is because the system will only schedule the tile decode - // workers on cores equal to the number of tile columns. Then if the decoder - // tries to use more threads for the loopfilter, it will hurt performance - // because of contention. If the multithreading code changes in the future - // then the number of workers used by the loopfilter should be revisited. - for (i = 0; i < num_workers; ++i) { - VPxWorker *const worker = &workers[i]; - LFWorkerData *const lf_data = &lf_sync->lfdata[i]; - - worker->hook = (VPxWorkerHook)loop_filter_row_worker; - worker->data1 = lf_sync; - worker->data2 = lf_data; - - // Loopfilter data - vp9_loop_filter_data_reset(lf_data, frame, cm, planes); - lf_data->start = start + i * MI_BLOCK_SIZE; - lf_data->stop = stop; - lf_data->y_only = y_only; - - // Start loopfiltering - if (i == num_workers - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - // Wait till all rows are finished - for (i = 0; i < num_workers; ++i) { - winterface->sync(&workers[i]); - } -} - -void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - VP9_COMMON *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int frame_filter_level, - int y_only, int partial_frame, - VPxWorker *workers, int num_workers, - VP9LfSync *lf_sync) { - int start_mi_row, end_mi_row, mi_rows_to_filter; - - if (!frame_filter_level) return; - - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); - } - end_mi_row = start_mi_row + mi_rows_to_filter; - vp9_loop_filter_frame_init(cm, frame_filter_level); - - loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, - y_only, workers, num_workers, lf_sync); -} - -// Set up nsync by width. -static INLINE int get_sync_range(int width) { - // nsync numbers are picked by testing. For example, for 4k - // video, using 4 gives best performance. - if (width < 640) - return 1; - else if (width <= 1280) - return 2; - else if (width <= 4096) - return 4; - else - return 8; -} - -// Allocate memory for lf row synchronization -void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, - int width, int num_workers) { - lf_sync->rows = rows; -#if CONFIG_MULTITHREAD - { - int i; - - CHECK_MEM_ERROR(cm, lf_sync->mutex_, - vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); - if (lf_sync->mutex_) { - for (i = 0; i < rows; ++i) { - pthread_mutex_init(&lf_sync->mutex_[i], NULL); - } - } - - CHECK_MEM_ERROR(cm, lf_sync->cond_, - vpx_malloc(sizeof(*lf_sync->cond_) * rows)); - if (lf_sync->cond_) { - for (i = 0; i < rows; ++i) { - pthread_cond_init(&lf_sync->cond_[i], NULL); - } - } - } -#endif // CONFIG_MULTITHREAD - - CHECK_MEM_ERROR(cm, lf_sync->lfdata, - vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); - lf_sync->num_workers = num_workers; - - CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, - vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); - - // Set up nsync. - lf_sync->sync_range = get_sync_range(width); -} - -// Deallocate lf synchronization related mutex and data -void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { - if (lf_sync != NULL) { -#if CONFIG_MULTITHREAD - int i; - - if (lf_sync->mutex_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_mutex_destroy(&lf_sync->mutex_[i]); - } - vpx_free(lf_sync->mutex_); - } - if (lf_sync->cond_ != NULL) { - for (i = 0; i < lf_sync->rows; ++i) { - pthread_cond_destroy(&lf_sync->cond_[i]); - } - vpx_free(lf_sync->cond_); - } -#endif // CONFIG_MULTITHREAD - vpx_free(lf_sync->lfdata); - vpx_free(lf_sync->cur_sb_col); - // clear the structure as the source of this call may be a resize in which - // case this call will be followed by an _alloc() which may fail. - vp9_zero(*lf_sync); - } -} - -// Accumulate frame counts. -void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, - const FRAME_COUNTS *counts, int is_dec) { - int i, j, k, l, m; - - for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - for (j = 0; j < INTRA_MODES; j++) - accum->y_mode[i][j] += counts->y_mode[i][j]; - - for (i = 0; i < INTRA_MODES; i++) - for (j = 0; j < INTRA_MODES; j++) - accum->uv_mode[i][j] += counts->uv_mode[i][j]; - - for (i = 0; i < PARTITION_CONTEXTS; i++) - for (j = 0; j < PARTITION_TYPES; j++) - accum->partition[i][j] += counts->partition[i][j]; - - if (is_dec) { - int n; - for (i = 0; i < TX_SIZES; i++) - for (j = 0; j < PLANE_TYPES; j++) - for (k = 0; k < REF_TYPES; k++) - for (l = 0; l < COEF_BANDS; l++) - for (m = 0; m < COEFF_CONTEXTS; m++) { - accum->eob_branch[i][j][k][l][m] += - counts->eob_branch[i][j][k][l][m]; - for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - accum->coef[i][j][k][l][m][n] += - counts->coef[i][j][k][l][m][n]; - } - } else { - for (i = 0; i < TX_SIZES; i++) - for (j = 0; j < PLANE_TYPES; j++) - for (k = 0; k < REF_TYPES; k++) - for (l = 0; l < COEF_BANDS; l++) - for (m = 0; m < COEFF_CONTEXTS; m++) - accum->eob_branch[i][j][k][l][m] += - counts->eob_branch[i][j][k][l][m]; - // In the encoder, coef is only updated at frame - // level, so not need to accumulate it here. - // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - // accum->coef[i][j][k][l][m][n] += - // counts->coef[i][j][k][l][m][n]; - } - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - for (j = 0; j < SWITCHABLE_FILTERS; j++) - accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; - - for (i = 0; i < INTER_MODE_CONTEXTS; i++) - for (j = 0; j < INTER_MODES; j++) - accum->inter_mode[i][j] += counts->inter_mode[i][j]; - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->intra_inter[i][j] += counts->intra_inter[i][j]; - - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->comp_inter[i][j] += counts->comp_inter[i][j]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - for (k = 0; k < 2; k++) - accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; - - for (i = 0; i < REF_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->comp_ref[i][j] += counts->comp_ref[i][j]; - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) - accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; - - for (j = 0; j < TX_SIZES - 1; j++) - accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; - - for (j = 0; j < TX_SIZES - 2; j++) - accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; - } - - for (i = 0; i < TX_SIZES; i++) - accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; - - for (i = 0; i < SKIP_CONTEXTS; i++) - for (j = 0; j < 2; j++) - accum->skip[i][j] += counts->skip[i][j]; - - for (i = 0; i < MV_JOINTS; i++) - accum->mv.joints[i] += counts->mv.joints[i]; - - for (k = 0; k < 2; k++) { - nmv_component_counts *const comps = &accum->mv.comps[k]; - const nmv_component_counts *const comps_t = &counts->mv.comps[k]; - - for (i = 0; i < 2; i++) { - comps->sign[i] += comps_t->sign[i]; - comps->class0_hp[i] += comps_t->class0_hp[i]; - comps->hp[i] += comps_t->hp[i]; - } - - for (i = 0; i < MV_CLASSES; i++) - comps->classes[i] += comps_t->classes[i]; - - for (i = 0; i < CLASS0_SIZE; i++) { - comps->class0[i] += comps_t->class0[i]; - for (j = 0; j < MV_FP_SIZE; j++) - comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; - } - - for (i = 0; i < MV_OFFSET_BITS; i++) - for (j = 0; j < 2; j++) - comps->bits[i][j] += comps_t->bits[i][j]; - - for (i = 0; i < MV_FP_SIZE; i++) - comps->fp[i] += comps_t->fp[i]; - } -} diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.h b/thirdparty/libvpx/vp9/common/vp9_thread_common.h deleted file mode 100644 index b3b60c253f..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_thread_common.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_ -#define VP9_COMMON_VP9_THREAD_COMMON_H_ -#include "./vpx_config.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vpx_util/vpx_thread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct FRAME_COUNTS; - -// Loopfilter row synchronization -typedef struct VP9LfSyncData { -#if CONFIG_MULTITHREAD - pthread_mutex_t *mutex_; - pthread_cond_t *cond_; -#endif - // Allocate memory to store the loop-filtered superblock index in each row. - int *cur_sb_col; - // The optimal sync_range for different resolution and platform should be - // determined by testing. Currently, it is chosen to be a power-of-2 number. - int sync_range; - int rows; - - // Row-based parallel loopfilter data - LFWorkerData *lfdata; - int num_workers; -} VP9LfSync; - -// Allocate memory for loopfilter row synchronization. -void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows, - int width, int num_workers); - -// Deallocate loopfilter synchronization related mutex and data. -void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); - -// Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, - struct VP9Common *cm, - struct macroblockd_plane planes[MAX_MB_PLANE], - int frame_filter_level, - int y_only, int partial_frame, - VPxWorker *workers, int num_workers, - VP9LfSync *lf_sync); - -void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, - const struct FRAME_COUNTS *counts, int is_dec); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_THREAD_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.c b/thirdparty/libvpx/vp9/common/vp9_tile_common.c deleted file mode 100644 index 9fcb97c854..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_tile_common.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9/common/vp9_tile_common.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vpx_dsp/vpx_dsp_common.h" - -#define MIN_TILE_WIDTH_B64 4 -#define MAX_TILE_WIDTH_B64 64 - -static int get_tile_offset(int idx, int mis, int log2) { - const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2; - const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2; - return VPXMIN(offset, mis); -} - -void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) { - tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows); - tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows); -} - -void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) { - tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols); - tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols); -} - -void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) { - vp9_tile_set_row(tile, cm, row); - vp9_tile_set_col(tile, cm, col); -} - -static int get_min_log2_tile_cols(const int sb64_cols) { - int min_log2 = 0; - while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) - ++min_log2; - return min_log2; -} - -static int get_max_log2_tile_cols(const int sb64_cols) { - int max_log2 = 1; - while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) - ++max_log2; - return max_log2 - 1; -} - -void vp9_get_tile_n_bits(int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols) { - const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; - *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); - *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); - assert(*min_log2_tile_cols <= *max_log2_tile_cols); -} diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.h b/thirdparty/libvpx/vp9/common/vp9_tile_common.h deleted file mode 100644 index ae58805de1..0000000000 --- a/thirdparty/libvpx/vp9/common/vp9_tile_common.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_TILE_COMMON_H_ -#define VP9_COMMON_VP9_TILE_COMMON_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; - -typedef struct TileInfo { - int mi_row_start, mi_row_end; - int mi_col_start, mi_col_end; -} TileInfo; - -// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on -// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' -void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, - int row, int col); - -void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row); -void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col); - -void vp9_get_tile_n_bits(int mi_cols, - int *min_log2_tile_cols, int *max_log2_tile_cols); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c deleted file mode 100644 index 1c77b57ff1..0000000000 --- a/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vp9_rtcd.h" -#include "vpx_dsp/x86/inv_txfm_sse2.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" -#include "vpx_ports/mem.h" - -void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - __m128i in[2]; - const __m128i zero = _mm_setzero_si128(); - const __m128i eight = _mm_set1_epi16(8); - - in[0] = load_input_data(input); - in[1] = load_input_data(input + 8); - - switch (tx_type) { - case 0: // DCT_DCT - idct4_sse2(in); - idct4_sse2(in); - break; - case 1: // ADST_DCT - idct4_sse2(in); - iadst4_sse2(in); - break; - case 2: // DCT_ADST - iadst4_sse2(in); - idct4_sse2(in); - break; - case 3: // ADST_ADST - iadst4_sse2(in); - iadst4_sse2(in); - break; - default: - assert(0); - break; - } - - // Final round and shift - in[0] = _mm_add_epi16(in[0], eight); - in[1] = _mm_add_epi16(in[1], eight); - - in[0] = _mm_srai_epi16(in[0], 4); - in[1] = _mm_srai_epi16(in[1], 4); - - // Reconstruction and Store - { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); - __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *)(dest + stride))); - d2 = _mm_unpacklo_epi32( - d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3))); - d0 = _mm_unpacklo_epi8(d0, zero); - d2 = _mm_unpacklo_epi8(d2, zero); - d0 = _mm_add_epi16(d0, in[0]); - d2 = _mm_add_epi16(d2, in[1]); - d0 = _mm_packus_epi16(d0, d2); - // store result[0] - *(int *)dest = _mm_cvtsi128_si32(d0); - // store result[1] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); - // store result[2] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); - // store result[3] - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); - } -} - -void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, - int tx_type) { - __m128i in[8]; - const __m128i zero = _mm_setzero_si128(); - const __m128i final_rounding = _mm_set1_epi16(1 << 4); - - // load input data - in[0] = load_input_data(input); - in[1] = load_input_data(input + 8 * 1); - in[2] = load_input_data(input + 8 * 2); - in[3] = load_input_data(input + 8 * 3); - in[4] = load_input_data(input + 8 * 4); - in[5] = load_input_data(input + 8 * 5); - in[6] = load_input_data(input + 8 * 6); - in[7] = load_input_data(input + 8 * 7); - - switch (tx_type) { - case 0: // DCT_DCT - idct8_sse2(in); - idct8_sse2(in); - break; - case 1: // ADST_DCT - idct8_sse2(in); - iadst8_sse2(in); - break; - case 2: // DCT_ADST - iadst8_sse2(in); - idct8_sse2(in); - break; - case 3: // ADST_ADST - iadst8_sse2(in); - iadst8_sse2(in); - break; - default: - assert(0); - break; - } - - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 5); - in[1] = _mm_srai_epi16(in[1], 5); - in[2] = _mm_srai_epi16(in[2], 5); - in[3] = _mm_srai_epi16(in[3], 5); - in[4] = _mm_srai_epi16(in[4], 5); - in[5] = _mm_srai_epi16(in[5], 5); - in[6] = _mm_srai_epi16(in[6], 5); - in[7] = _mm_srai_epi16(in[7], 5); - - RECON_AND_STORE(dest + 0 * stride, in[0]); - RECON_AND_STORE(dest + 1 * stride, in[1]); - RECON_AND_STORE(dest + 2 * stride, in[2]); - RECON_AND_STORE(dest + 3 * stride, in[3]); - RECON_AND_STORE(dest + 4 * stride, in[4]); - RECON_AND_STORE(dest + 5 * stride, in[5]); - RECON_AND_STORE(dest + 6 * stride, in[6]); - RECON_AND_STORE(dest + 7 * stride, in[7]); -} - -void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride, int tx_type) { - __m128i in0[16], in1[16]; - - load_buffer_8x16(input, in0); - input += 8; - load_buffer_8x16(input, in1); - - switch (tx_type) { - case 0: // DCT_DCT - idct16_sse2(in0, in1); - idct16_sse2(in0, in1); - break; - case 1: // ADST_DCT - idct16_sse2(in0, in1); - iadst16_sse2(in0, in1); - break; - case 2: // DCT_ADST - iadst16_sse2(in0, in1); - idct16_sse2(in0, in1); - break; - case 3: // ADST_ADST - iadst16_sse2(in0, in1); - iadst16_sse2(in0, in1); - break; - default: - assert(0); - break; - } - - write_buffer_8x16(dest, in0, stride); - dest += 8; - write_buffer_8x16(dest, in1, stride); -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c deleted file mode 100644 index d63912932c..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c +++ /dev/null @@ -1,2271 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> -#include <stdlib.h> // qsort() - -#include "./vp9_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vpx_dsp/bitreader_buffer.h" -#include "vpx_dsp/bitreader.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/mem_ops.h" -#include "vpx_scale/vpx_scale.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_idct.h" -#include "vp9/common/vp9_thread_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconintra.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_tile_common.h" - -#include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_detokenize.h" -#include "vp9/decoder/vp9_decodemv.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/decoder/vp9_dsubexp.h" - -#define MAX_VP9_HEADER_SIZE 80 - -static int is_compound_reference_allowed(const VP9_COMMON *cm) { - int i; - for (i = 1; i < REFS_PER_FRAME; ++i) - if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) - return 1; - - return 0; -} - -static void setup_compound_reference_mode(VP9_COMMON *cm) { - if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) { - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; - } else if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[ALTREF_FRAME]) { - cm->comp_fixed_ref = GOLDEN_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } else { - cm->comp_fixed_ref = LAST_FRAME; - cm->comp_var_ref[0] = GOLDEN_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } -} - -static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { - return len != 0 && len <= (size_t)(end - start); -} - -static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) { - const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max)); - return data > max ? max : data; -} - -static TX_MODE read_tx_mode(vpx_reader *r) { - TX_MODE tx_mode = vpx_read_literal(r, 2); - if (tx_mode == ALLOW_32X32) - tx_mode += vpx_read_bit(r); - return tx_mode; -} - -static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { - int i, j; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 3; ++j) - vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 2; ++j) - vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 1; ++j) - vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); -} - -static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { - int i, j; - for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) - for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) - vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); -} - -static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { - int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - for (j = 0; j < INTER_MODES - 1; ++j) - vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); -} - -static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, - vpx_reader *r) { - if (is_compound_reference_allowed(cm)) { - return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT - : COMPOUND_REFERENCE) - : SINGLE_REFERENCE; - } else { - return SINGLE_REFERENCE; - } -} - -static void read_frame_reference_mode_probs(VP9_COMMON *cm, vpx_reader *r) { - FRAME_CONTEXT *const fc = cm->fc; - int i; - - if (cm->reference_mode == REFERENCE_MODE_SELECT) - for (i = 0; i < COMP_INTER_CONTEXTS; ++i) - vp9_diff_update_prob(r, &fc->comp_inter_prob[i]); - - if (cm->reference_mode != COMPOUND_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) { - vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]); - vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]); - } - - if (cm->reference_mode != SINGLE_REFERENCE) - for (i = 0; i < REF_CONTEXTS; ++i) - vp9_diff_update_prob(r, &fc->comp_ref_prob[i]); -} - -static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { - int i; - for (i = 0; i < n; ++i) - if (vpx_read(r, MV_UPDATE_PROB)) - p[i] = (vpx_read_literal(r, 7) << 1) | 1; -} - -static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { - int i, j; - - update_mv_probs(ctx->joints, MV_JOINTS - 1, r); - - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - update_mv_probs(&comp_ctx->sign, 1, r); - update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); - update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); - update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); - } - - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - for (j = 0; j < CLASS0_SIZE; ++j) - update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r); - update_mv_probs(comp_ctx->fp, 3, r); - } - - if (allow_hp) { - for (i = 0; i < 2; ++i) { - nmv_component *const comp_ctx = &ctx->comps[i]; - update_mv_probs(&comp_ctx->class0_hp, 1, r); - update_mv_probs(&comp_ctx->hp, 1, r); - } - } -} - -static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, - const TX_SIZE tx_size, - uint8_t *dst, int stride, - int eob) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const dqcoeff = pd->dqcoeff; - assert(eob > 0); -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } - } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_idct4x4_add(dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } - } -#else - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_idct4x4_add(dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } -} - -static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, - const TX_TYPE tx_type, - const TX_SIZE tx_size, - uint8_t *dst, int stride, - int eob) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const dqcoeff = pd->dqcoeff; - assert(eob > 0); -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } - } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } - } -#else - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } -} - -static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, - vpx_reader *r, - MODE_INFO *const mi, - int plane, - int row, int col, - TX_SIZE tx_size) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; - uint8_t *dst; - dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; - - if (mi->sb_type < BLOCK_8X8) - if (plane == 0) - mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; - - vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, - dst, pd->dst.stride, dst, pd->dst.stride, - col, row, plane); - - if (!mi->skip) { - const TX_TYPE tx_type = (plane || xd->lossless) ? - DCT_DCT : intra_mode_to_tx_type_lookup[mode]; - const scan_order *sc = (plane || xd->lossless) ? - &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; - const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, - r, mi->segment_id); - if (eob > 0) { - inverse_transform_block_intra(xd, plane, tx_type, tx_size, - dst, pd->dst.stride, eob); - } - } -} - -static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, - MODE_INFO *const mi, int plane, - int row, int col, TX_SIZE tx_size) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const scan_order *sc = &vp9_default_scan_orders[tx_size]; - const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, - mi->segment_id); - - if (eob > 0) { - inverse_transform_block_inter( - xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], - pd->dst.stride, eob); - } - return eob; -} - -static void build_mc_border(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - int x, int y, int b_w, int b_h, int w, int h) { - // Get a pointer to the start of the real data for this row. - const uint8_t *ref_row = src - x - y * src_stride; - - if (y >= h) - ref_row += (h - 1) * src_stride; - else if (y > 0) - ref_row += y * src_stride; - - do { - int right = 0, copy; - int left = x < 0 ? -x : 0; - - if (left > b_w) - left = b_w; - - if (x + b_w > w) - right = x + b_w - w; - - if (right > b_w) - right = b_w; - - copy = b_w - left - right; - - if (left) - memset(dst, ref_row[0], left); - - if (copy) - memcpy(dst + left, ref_row + x + left, copy); - - if (right) - memset(dst + left + copy, ref_row[w - 1], right); - - dst += dst_stride; - ++y; - - if (y > 0 && y < h) - ref_row += src_stride; - } while (--b_h); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void high_build_mc_border(const uint8_t *src8, int src_stride, - uint16_t *dst, int dst_stride, - int x, int y, int b_w, int b_h, - int w, int h) { - // Get a pointer to the start of the real data for this row. - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - const uint16_t *ref_row = src - x - y * src_stride; - - if (y >= h) - ref_row += (h - 1) * src_stride; - else if (y > 0) - ref_row += y * src_stride; - - do { - int right = 0, copy; - int left = x < 0 ? -x : 0; - - if (left > b_w) - left = b_w; - - if (x + b_w > w) - right = x + b_w - w; - - if (right > b_w) - right = b_w; - - copy = b_w - left - right; - - if (left) - vpx_memset16(dst, ref_row[0], left); - - if (copy) - memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t)); - - if (right) - vpx_memset16(dst + left + copy, ref_row[w - 1], right); - - dst += dst_stride; - ++y; - - if (y > 0 && y < h) - ref_row += src_stride; - } while (--b_h); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if CONFIG_VP9_HIGHBITDEPTH -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, - int border_offset, - uint8_t *const dst, int dst_buf_stride, - int subpel_x, int subpel_y, - const InterpKernel *kernel, - const struct scale_factors *sf, - MACROBLOCKD *xd, - int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); - const uint8_t *buf_ptr; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); - buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset; - } else { - build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); - buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; - } - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); - } else { - inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); - } -} -#else -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, - int border_offset, - uint8_t *const dst, int dst_buf_stride, - int subpel_x, int subpel_y, - const InterpKernel *kernel, - const struct scale_factors *sf, - int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); - const uint8_t *buf_ptr; - - build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); - buf_ptr = mc_buf + border_offset; - - inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd, - int plane, int bw, int bh, int x, - int y, int w, int h, int mi_x, int mi_y, - const InterpKernel *kernel, - const struct scale_factors *sf, - struct buf_2d *pre_buf, - struct buf_2d *dst_buf, const MV* mv, - RefCntBuffer *ref_frame_buf, - int is_scaled, int ref) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - MV32 scaled_mv; - int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, - buf_stride, subpel_x, subpel_y; - uint8_t *ref_frame, *buf_ptr; - - // Get reference frame pointer, width and height. - if (plane == 0) { - frame_width = ref_frame_buf->buf.y_crop_width; - frame_height = ref_frame_buf->buf.y_crop_height; - ref_frame = ref_frame_buf->buf.y_buffer; - } else { - frame_width = ref_frame_buf->buf.uv_crop_width; - frame_height = ref_frame_buf->buf.uv_crop_height; - ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer - : ref_frame_buf->buf.v_buffer; - } - - if (is_scaled) { - const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh, - pd->subsampling_x, - pd->subsampling_y); - // Co-ordinate of containing block to pixel precision. - int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); - int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); -#if CONFIG_BETTER_HW_COMPATIBILITY - assert(xd->mi[0]->sb_type != BLOCK_4X8 && - xd->mi[0]->sb_type != BLOCK_8X4); - assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && - mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x))); -#endif - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = (x_start + x) << SUBPEL_BITS; - y0_16 = (y_start + y) << SUBPEL_BITS; - - // Co-ordinate of current block in reference frame - // to 1/16th pixel precision. - x0_16 = sf->scale_value_x(x0_16, sf); - y0_16 = sf->scale_value_y(y0_16, sf); - - // Map the top left corner of the block into the reference frame. - x0 = sf->scale_value_x(x_start + x, sf); - y0 = sf->scale_value_y(y_start + y, sf); - - // Scale the MV and incorporate the sub-pixel offset of the block - // in the reference frame. - scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); - xs = sf->x_step_q4; - ys = sf->y_step_q4; - } else { - // Co-ordinate of containing block to pixel precision. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - - // Co-ordinate of the block to 1/16th pixel precision. - x0_16 = x0 << SUBPEL_BITS; - y0_16 = y0 << SUBPEL_BITS; - - scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); - scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); - xs = ys = 16; - } - subpel_x = scaled_mv.col & SUBPEL_MASK; - subpel_y = scaled_mv.row & SUBPEL_MASK; - - // Calculate the top left corner of the best matching block in the - // reference frame. - x0 += scaled_mv.col >> SUBPEL_BITS; - y0 += scaled_mv.row >> SUBPEL_BITS; - x0_16 += scaled_mv.col; - y0_16 += scaled_mv.row; - - // Get reference block pointer. - buf_ptr = ref_frame + y0 * pre_buf->stride + x0; - buf_stride = pre_buf->stride; - - // Do border extension if there is motion or the - // width/height is not a multiple of 8 pixels. - if (is_scaled || scaled_mv.col || scaled_mv.row || - (frame_width & 0x7) || (frame_height & 0x7)) { - int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; - - // Get reference block bottom right horizontal coordinate. - int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; - int x_pad = 0, y_pad = 0; - - if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { - x0 -= VP9_INTERP_EXTEND - 1; - x1 += VP9_INTERP_EXTEND; - x_pad = 1; - } - - if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { - y0 -= VP9_INTERP_EXTEND - 1; - y1 += VP9_INTERP_EXTEND; - y_pad = 1; - } - - // Wait until reference block is ready. Pad 7 more pixels as last 7 - // pixels of each superblock row can be changed by next superblock row. - if (worker != NULL) - vp9_frameworker_wait(worker, ref_frame_buf, - VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); - - // Skip border extension if block is inside the frame. - if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || - y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { - // Extend the border. - const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; - const int b_w = x1 - x0 + 1; - const int b_h = y1 - y0 + 1; - const int border_offset = y_pad * 3 * b_w + x_pad * 3; - - extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, - frame_width, frame_height, border_offset, - dst, dst_buf->stride, - subpel_x, subpel_y, - kernel, sf, -#if CONFIG_VP9_HIGHBITDEPTH - xd, -#endif - w, h, ref, xs, ys); - return; - } - } else { - // Wait until reference block is ready. Pad 7 more pixels as last 7 - // pixels of each superblock row can be changed by next superblock row. - if (worker != NULL) { - const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; - vp9_frameworker_wait(worker, ref_frame_buf, - VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); - } - } -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); - } else { - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); - } -#else - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, kernel, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH -} - -static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, - MACROBLOCKD *xd, - int mi_row, int mi_col) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - const MODE_INFO *mi = xd->mi[0]; - const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; - const BLOCK_SIZE sb_type = mi->sb_type; - const int is_compound = has_second_ref(mi); - int ref; - int is_scaled; - VPxWorker *const fwo = pbi->frame_parallel_decode ? - pbi->frame_worker_owner : NULL; - - for (ref = 0; ref < 1 + is_compound; ++ref) { - const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; - RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME]; - const struct scale_factors *const sf = &ref_buf->sf; - const int idx = ref_buf->idx; - BufferPool *const pool = pbi->common.buffer_pool; - RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; - - if (!vp9_is_valid_scale(sf)) - vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, - "Reference frame has invalid dimensions"); - - is_scaled = vp9_is_scaled(sf); - vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, - is_scaled ? sf : NULL); - xd->block_refs[ref] = ref_buf; - - if (sb_type < BLOCK_8X8) { - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - struct buf_2d *const dst_buf = &pd->dst; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int n4w_x4 = 4 * num_4x4_w; - const int n4h_x4 = 4 * num_4x4_h; - struct buf_2d *const pre_buf = &pd->pre[ref]; - int i = 0, x, y; - for (y = 0; y < num_4x4_h; ++y) { - for (x = 0; x < num_4x4_w; ++x) { - const MV mv = average_split_mvs(pd, mi, ref, i++); - dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, - 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, - sf, pre_buf, dst_buf, &mv, - ref_frame_buf, is_scaled, ref); - } - } - } - } else { - const MV mv = mi->mv[ref].as_mv; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - struct buf_2d *const dst_buf = &pd->dst; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int n4w_x4 = 4 * num_4x4_w; - const int n4h_x4 = 4 * num_4x4_h; - struct buf_2d *const pre_buf = &pd->pre[ref]; - dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, - 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, - sf, pre_buf, dst_buf, &mv, - ref_frame_buf, is_scaled, ref); - } - } - } -} - -static INLINE TX_SIZE dec_get_uv_tx_size(const MODE_INFO *mi, - int n4_wl, int n4_hl) { - // get minimum log2 num4x4s dimension - const int x = VPXMIN(n4_wl, n4_hl); - return VPXMIN(mi->tx_size, x); -} - -static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - struct macroblockd_plane *const pd = &xd->plane[i]; - memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w); - memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h); - } -} - -static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, - int bhl) { - int i; - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x; - xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y; - xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x; - xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y; - } -} - -static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int bw, int bh, int x_mis, int y_mis, - int bwl, int bhl) { - const int offset = mi_row * cm->mi_stride + mi_col; - int x, y; - const TileInfo *const tile = &xd->tile; - - xd->mi = cm->mi_grid_visible + offset; - xd->mi[0] = &cm->mi[offset]; - // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of - // passing bsize from decode_partition(). - xd->mi[0]->sb_type = bsize; - for (y = 0; y < y_mis; ++y) - for (x = !y; x < x_mis; ++x) { - xd->mi[y * cm->mi_stride + x] = xd->mi[0]; - } - - set_plane_n4(xd, bw, bh, bwl, bhl); - - set_skip_context(xd, mi_row, mi_col); - - // Distance of Mb to the various image edges. These are specified to 8th pel - // as they are always compared to values that are in 1/8th pel units - set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - - vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); - return xd->mi[0]; -} - -static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, - int mi_row, int mi_col, - vpx_reader *r, BLOCK_SIZE bsize, - int bwl, int bhl) { - VP9_COMMON *const cm = &pbi->common; - const int less8x8 = bsize < BLOCK_8X8; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); - const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); - - MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, - bw, bh, x_mis, y_mis, bwl, bhl); - - if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { - const BLOCK_SIZE uv_subsize = - ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; - if (uv_subsize == BLOCK_INVALID) - vpx_internal_error(xd->error_info, - VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); - } - - vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - - if (mi->skip) { - dec_reset_skip_context(xd); - } - - if (!is_inter_block(mi)) { - int plane; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) - : mi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int step = (1 << tx_size); - int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; - xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; - - for (row = 0; row < max_blocks_high; row += step) - for (col = 0; col < max_blocks_wide; col += step) - predict_and_reconstruct_intra_block(xd, r, mi, plane, - row, col, tx_size); - } - } else { - // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); - - // Reconstruction - if (!mi->skip) { - int eobtotal = 0; - int plane; - - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) - : mi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int step = (1 << tx_size); - int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; - xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; - - for (row = 0; row < max_blocks_high; row += step) - for (col = 0; col < max_blocks_wide; col += step) - eobtotal += reconstruct_inter_block(xd, r, mi, plane, row, col, - tx_size); - } - - if (!less8x8 && eobtotal == 0) - mi->skip = 1; // skip loopfilter - } - } - - xd->corrupted |= vpx_reader_has_error(r); - - if (cm->lf.filter_level) { - vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); - } -} - -static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, - int mi_row, int mi_col, - int bsl) { - const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; - -// assert(bsl >= 0); - - return (left * 2 + above) + bsl * PARTITION_PLOFFSET; -} - -static INLINE void dec_update_partition_context(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE subsize, - int bw) { - PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); - - // update the partition context at the end notes. set partition bits - // of block sizes larger than the current one to be one, and partition - // bits of smaller block sizes to be zero. - memset(above_ctx, partition_context_lookup[subsize].above, bw); - memset(left_ctx, partition_context_lookup[subsize].left, bw); -} - -static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, - vpx_reader *r, - int has_rows, int has_cols, int bsl) { - const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl); - const vpx_prob *const probs = get_partition_probs(xd, ctx); - FRAME_COUNTS *counts = xd->counts; - PARTITION_TYPE p; - - if (has_rows && has_cols) - p = (PARTITION_TYPE)vpx_read_tree(r, vp9_partition_tree, probs); - else if (!has_rows && has_cols) - p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; - else if (has_rows && !has_cols) - p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; - else - p = PARTITION_SPLIT; - - if (counts) - ++counts->partition[ctx][p]; - - return p; -} - -// TODO(slavarnway): eliminate bsize and subsize in future commits -static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd, - int mi_row, int mi_col, - vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { - VP9_COMMON *const cm = &pbi->common; - const int n8x8_l2 = n4x4_l2 - 1; - const int num_8x8_wh = 1 << n8x8_l2; - const int hbs = num_8x8_wh >> 1; - PARTITION_TYPE partition; - BLOCK_SIZE subsize; - const int has_rows = (mi_row + hbs) < cm->mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_cols; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - partition = read_partition(xd, mi_row, mi_col, r, has_rows, has_cols, - n8x8_l2); - subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); - if (!hbs) { - // calculate bmode block dimensions (log 2) - xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); - xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); - decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1); - } else { - switch (partition) { - case PARTITION_NONE: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); - break; - case PARTITION_HORZ: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); - if (has_rows) - decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2, - n8x8_l2); - break; - case PARTITION_VERT: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); - if (has_cols) - decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2, - n4x4_l2); - break; - case PARTITION_SPLIT: - decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize, - n8x8_l2); - break; - default: - assert(0 && "Invalid partition type"); - } - } - - // update partition context - if (bsize >= BLOCK_8X8 && - (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); -} - -static void setup_token_decoder(const uint8_t *data, - const uint8_t *data_end, - size_t read_size, - struct vpx_internal_error_info *error_info, - vpx_reader *r, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // Validate the calculated partition length. If the buffer - // described by the partition can't be fully read, then restrict - // it to the portion that can be (for EC mode) or throw an error. - if (!read_is_valid(data, read_size, data_end)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); - - if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state)) - vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder %d", 1); -} - -static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, - vpx_reader *r) { - int i, j, k, l, m; - - if (vpx_read_bit(r)) - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) - for (m = 0; m < UNCONSTRAINED_NODES; ++m) - vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); -} - -static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, - vpx_reader *r) { - const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; - TX_SIZE tx_size; - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - read_coef_probs_common(fc->coef_probs[tx_size], r); -} - -static void setup_segmentation(struct segmentation *seg, - struct vpx_read_bit_buffer *rb) { - int i, j; - - seg->update_map = 0; - seg->update_data = 0; - - seg->enabled = vpx_rb_read_bit(rb); - if (!seg->enabled) - return; - - // Segmentation map update - seg->update_map = vpx_rb_read_bit(rb); - if (seg->update_map) { - for (i = 0; i < SEG_TREE_PROBS; i++) - seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) - : MAX_PROB; - - seg->temporal_update = vpx_rb_read_bit(rb); - if (seg->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) - seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) - : MAX_PROB; - } else { - for (i = 0; i < PREDICTION_PROBS; i++) - seg->pred_probs[i] = MAX_PROB; - } - } - - // Segmentation data update - seg->update_data = vpx_rb_read_bit(rb); - if (seg->update_data) { - seg->abs_delta = vpx_rb_read_bit(rb); - - vp9_clearall_segfeatures(seg); - - for (i = 0; i < MAX_SEGMENTS; i++) { - for (j = 0; j < SEG_LVL_MAX; j++) { - int data = 0; - const int feature_enabled = vpx_rb_read_bit(rb); - if (feature_enabled) { - vp9_enable_segfeature(seg, i, j); - data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); - if (vp9_is_segfeature_signed(j)) - data = vpx_rb_read_bit(rb) ? -data : data; - } - vp9_set_segdata(seg, i, j, data); - } - } - } -} - -static void setup_loopfilter(struct loopfilter *lf, - struct vpx_read_bit_buffer *rb) { - lf->filter_level = vpx_rb_read_literal(rb, 6); - lf->sharpness_level = vpx_rb_read_literal(rb, 3); - - // Read in loop filter deltas applied at the MB level based on mode or ref - // frame. - lf->mode_ref_delta_update = 0; - - lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb); - if (lf->mode_ref_delta_enabled) { - lf->mode_ref_delta_update = vpx_rb_read_bit(rb); - if (lf->mode_ref_delta_update) { - int i; - - for (i = 0; i < MAX_REF_LF_DELTAS; i++) - if (vpx_rb_read_bit(rb)) - lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6); - - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) - if (vpx_rb_read_bit(rb)) - lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6); - } - } -} - -static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { - return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0; -} - -static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, - struct vpx_read_bit_buffer *rb) { - cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS); - cm->y_dc_delta_q = read_delta_q(rb); - cm->uv_dc_delta_q = read_delta_q(rb); - cm->uv_ac_delta_q = read_delta_q(rb); - cm->dequant_bit_depth = cm->bit_depth; - xd->lossless = cm->base_qindex == 0 && - cm->y_dc_delta_q == 0 && - cm->uv_dc_delta_q == 0 && - cm->uv_ac_delta_q == 0; - -#if CONFIG_VP9_HIGHBITDEPTH - xd->bd = (int)cm->bit_depth; -#endif -} - -static void setup_segmentation_dequant(VP9_COMMON *const cm) { - // Build y/uv dequant values based on segmentation. - if (cm->seg.enabled) { - int i; - for (i = 0; i < MAX_SEGMENTS; ++i) { - const int qindex = vp9_get_qindex(&cm->seg, i, cm->base_qindex); - cm->y_dequant[i][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, - cm->bit_depth); - cm->y_dequant[i][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); - cm->uv_dequant[i][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, - cm->bit_depth); - cm->uv_dequant[i][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, - cm->bit_depth); - } - } else { - const int qindex = cm->base_qindex; - // When segmentation is disabled, only the first value is used. The - // remaining are don't cares. - cm->y_dequant[0][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); - cm->y_dequant[0][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); - cm->uv_dequant[0][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, - cm->bit_depth); - cm->uv_dequant[0][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, - cm->bit_depth); - } -} - -static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { - const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR }; - return vpx_rb_read_bit(rb) ? SWITCHABLE - : literal_to_filter[vpx_rb_read_literal(rb, 2)]; -} - -static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - cm->render_width = cm->width; - cm->render_height = cm->height; - if (vpx_rb_read_bit(rb)) - vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); -} - -static void resize_mv_buffer(VP9_COMMON *cm) { - vpx_free(cm->cur_frame->mvs); - cm->cur_frame->mi_rows = cm->mi_rows; - cm->cur_frame->mi_cols = cm->mi_cols; - CHECK_MEM_ERROR(cm, cm->cur_frame->mvs, - (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*cm->cur_frame->mvs))); -} - -static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { -#if CONFIG_SIZE_LIMIT - if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Dimensions of %dx%d beyond allowed size of %dx%d.", - width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); -#endif - if (cm->width != width || cm->height != height) { - const int new_mi_rows = - ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2; - const int new_mi_cols = - ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2; - - // Allocations in vp9_alloc_context_buffers() depend on individual - // dimensions as well as the overall size. - if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { - if (vp9_alloc_context_buffers(cm, width, height)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate context buffers"); - } else { - vp9_set_mb_mi(cm, width, height); - } - vp9_init_context_buffers(cm); - cm->width = width; - cm->height = height; - } - if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows || - cm->mi_cols > cm->cur_frame->mi_cols) { - resize_mv_buffer(cm); - } -} - -static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - int width, height; - BufferPool *const pool = cm->buffer_pool; - vp9_read_frame_size(rb, &width, &height); - resize_context_buffers(cm, width, height); - setup_render_size(cm, rb); - - lock_buffer_pool(pool); - if (vpx_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_DEC_BORDER_IN_PIXELS, - cm->byte_alignment, - &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, - pool->cb_priv)) { - unlock_buffer_pool(pool); - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - unlock_buffer_pool(pool); - - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; - pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; - pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; -} - -static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, - int ref_xss, int ref_yss, - vpx_bit_depth_t this_bit_depth, - int this_xss, int this_yss) { - return ref_bit_depth == this_bit_depth && ref_xss == this_xss && - ref_yss == this_yss; -} - -static void setup_frame_size_with_refs(VP9_COMMON *cm, - struct vpx_read_bit_buffer *rb) { - int width, height; - int found = 0, i; - int has_valid_ref_frame = 0; - BufferPool *const pool = cm->buffer_pool; - for (i = 0; i < REFS_PER_FRAME; ++i) { - if (vpx_rb_read_bit(rb)) { - if (cm->frame_refs[i].idx != INVALID_IDX) { - YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; - width = buf->y_crop_width; - height = buf->y_crop_height; - found = 1; - break; - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Failed to decode frame size"); - } - } - } - - if (!found) - vp9_read_frame_size(rb, &width, &height); - - if (width <= 0 || height <= 0) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid frame size"); - - // Check to make sure at least one of frames that this frame references - // has valid dimensions. - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX && - valid_ref_frame_size(ref_frame->buf->y_crop_width, - ref_frame->buf->y_crop_height, - width, height)); - } - if (!has_valid_ref_frame) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has invalid size"); - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (ref_frame->idx == INVALID_IDX || - !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, - ref_frame->buf->subsampling_x, - ref_frame->buf->subsampling_y, - cm->bit_depth, - cm->subsampling_x, - cm->subsampling_y)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Referenced frame has incompatible color format"); - } - - resize_context_buffers(cm, width, height); - setup_render_size(cm, rb); - - lock_buffer_pool(pool); - if (vpx_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_DEC_BORDER_IN_PIXELS, - cm->byte_alignment, - &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, - pool->cb_priv)) { - unlock_buffer_pool(pool); - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - unlock_buffer_pool(pool); - - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; - pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; - pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; -} - -static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - int min_log2_tile_cols, max_log2_tile_cols, max_ones; - vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - - // columns - max_ones = max_log2_tile_cols - min_log2_tile_cols; - cm->log2_tile_cols = min_log2_tile_cols; - while (max_ones-- && vpx_rb_read_bit(rb)) - cm->log2_tile_cols++; - - if (cm->log2_tile_cols > 6) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid number of tile columns"); - - // rows - cm->log2_tile_rows = vpx_rb_read_bit(rb); - if (cm->log2_tile_rows) - cm->log2_tile_rows += vpx_rb_read_bit(rb); -} - -// Reads the next tile returning its size and adjusting '*data' accordingly -// based on 'is_last'. -static void get_tile_buffer(const uint8_t *const data_end, - int is_last, - struct vpx_internal_error_info *error_info, - const uint8_t **data, - vpx_decrypt_cb decrypt_cb, void *decrypt_state, - TileBuffer *buf) { - size_t size; - - if (!is_last) { - if (!read_is_valid(*data, 4, data_end)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); - - if (decrypt_cb) { - uint8_t be_data[4]; - decrypt_cb(decrypt_state, *data, be_data, 4); - size = mem_get_be32(be_data); - } else { - size = mem_get_be32(*data); - } - *data += 4; - - if (size > (size_t)(data_end - *data)) - vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile size"); - } else { - size = data_end - *data; - } - - buf->data = *data; - buf->size = size; - - *data += size; -} - -static void get_tile_buffers(VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - int tile_cols, int tile_rows, - TileBuffer (*tile_buffers)[1 << 6]) { - int r, c; - - for (r = 0; r < tile_rows; ++r) { - for (c = 0; c < tile_cols; ++c) { - const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); - TileBuffer *const buf = &tile_buffers[r][c]; - buf->col = c; - get_tile_buffer(data_end, is_last, &pbi->common.error, &data, - pbi->decrypt_cb, pbi->decrypt_state, buf); - } - } -} - -static const uint8_t *decode_tiles(VP9Decoder *pbi, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - TileBuffer tile_buffers[4][1 << 6]; - int tile_row, tile_col; - int mi_row, mi_col; - TileWorkerData *tile_data = NULL; - - if (cm->lf.filter_level && !cm->skip_loop_filter && - pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, - vpx_memalign(32, sizeof(LFWorkerData))); - pbi->lf_worker.hook = (VPxWorkerHook)vp9_loop_filter_worker; - if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Loop filter thread creation failed"); - } - } - - if (cm->lf.filter_level && !cm->skip_loop_filter) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - // Be sure to sync as we might be resuming after a failed frame decode. - winterface->sync(&pbi->lf_worker); - vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm, - pbi->mb.plane); - } - - assert(tile_rows <= 4); - assert(tile_cols <= (1 << 6)); - - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - - memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_cols); - - vp9_reset_lfm(cm); - - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); - - // Load all tile information into tile_data. - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; - tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; - tile_data->xd = pbi->mb; - tile_data->xd.corrupted = 0; - tile_data->xd.counts = - cm->frame_parallel_decoding_mode ? NULL : &cm->counts; - vp9_zero(tile_data->dqcoeff); - vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); - vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); - } - } - - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - TileInfo tile; - vp9_tile_set_row(&tile, cm, tile_row); - for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; - mi_row += MI_BLOCK_SIZE) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - const int col = pbi->inv_tile_order ? - tile_cols - tile_col - 1 : tile_col; - tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; - vp9_tile_set_col(&tile, cm, col); - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, mi_row, - mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); - } - pbi->mb.corrupted |= tile_data->xd.corrupted; - if (pbi->mb.corrupted) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Failed to decode tile data"); - } - // Loopfilter one row. - if (cm->lf.filter_level && !cm->skip_loop_filter) { - const int lf_start = mi_row - MI_BLOCK_SIZE; - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - - // delay the loopfilter by 1 macroblock row. - if (lf_start < 0) continue; - - // decoding has completed: finish up the loop filter in this thread. - if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue; - - winterface->sync(&pbi->lf_worker); - lf_data->start = lf_start; - lf_data->stop = mi_row; - if (pbi->max_threads > 1) { - winterface->launch(&pbi->lf_worker); - } else { - winterface->execute(&pbi->lf_worker); - } - } - // After loopfiltering, the last 7 row pixels in each superblock row may - // still be changed by the longest loopfilter of the next superblock - // row. - if (pbi->frame_parallel_decode) - vp9_frameworker_broadcast(pbi->cur_buf, - mi_row << MI_BLOCK_SIZE_LOG2); - } - } - - // Loopfilter remaining rows in the frame. - if (cm->lf.filter_level && !cm->skip_loop_filter) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - winterface->sync(&pbi->lf_worker); - lf_data->start = lf_data->stop; - lf_data->stop = cm->mi_rows; - winterface->execute(&pbi->lf_worker); - } - - // Get last tile data. - tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; - - if (pbi->frame_parallel_decode) - vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); - return vpx_reader_find_end(&tile_data->bit_reader); -} - -// On entry 'tile_data->data_end' points to the end of the input frame, on exit -// it is updated to reflect the bitreader position of the final tile column if -// present in the tile buffer group or NULL otherwise. -static int tile_worker_hook(TileWorkerData *const tile_data, - VP9Decoder *const pbi) { - TileInfo *volatile tile = &tile_data->xd.tile; - const int final_col = (1 << pbi->common.log2_tile_cols) - 1; - const uint8_t *volatile bit_reader_end = NULL; - volatile int n = tile_data->buf_start; - tile_data->error_info.setjmp = 1; - - if (setjmp(tile_data->error_info.jmp)) { - tile_data->error_info.setjmp = 0; - tile_data->xd.corrupted = 1; - tile_data->data_end = NULL; - return 0; - } - - tile_data->xd.error_info = &tile_data->error_info; - tile_data->xd.corrupted = 0; - - do { - int mi_row, mi_col; - const TileBuffer *const buf = pbi->tile_buffers + n; - vp9_zero(tile_data->dqcoeff); - vp9_tile_init(tile, &pbi->common, 0, buf->col); - setup_token_decoder(buf->data, tile_data->data_end, buf->size, - &tile_data->error_info, &tile_data->bit_reader, - pbi->decrypt_cb, pbi->decrypt_state); - vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff); - - for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; - mi_row += MI_BLOCK_SIZE) { - vp9_zero(tile_data->xd.left_context); - vp9_zero(tile_data->xd.left_seg_context); - for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, mi_row, mi_col, - &tile_data->bit_reader, BLOCK_64X64, 4); - } - } - - if (buf->col == final_col) { - bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); - } - } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); - - tile_data->data_end = bit_reader_end; - return !tile_data->xd.corrupted; -} - -// sorts in descending order -static int compare_tile_buffers(const void *a, const void *b) { - const TileBuffer *const buf1 = (const TileBuffer*)a; - const TileBuffer *const buf2 = (const TileBuffer*)b; - return (int)(buf2->size - buf1->size); -} - -static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, - const uint8_t *data, - const uint8_t *data_end) { - VP9_COMMON *const cm = &pbi->common; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const uint8_t *bit_reader_end = NULL; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = VPXMIN(pbi->max_threads, tile_cols); - int n; - - assert(tile_cols <= (1 << 6)); - assert(tile_rows == 1); - (void)tile_rows; - - if (pbi->num_tile_workers == 0) { - const int num_threads = pbi->max_threads; - CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); - for (n = 0; n < num_threads; ++n) { - VPxWorker *const worker = &pbi->tile_workers[n]; - ++pbi->num_tile_workers; - - winterface->init(worker); - if (n < num_threads - 1 && !winterface->reset(worker)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Tile decoder thread creation failed"); - } - } - } - - // Reset tile decoding hook - for (n = 0; n < num_workers; ++n) { - VPxWorker *const worker = &pbi->tile_workers[n]; - TileWorkerData *const tile_data = - &pbi->tile_worker_data[n + pbi->total_tiles]; - winterface->sync(worker); - tile_data->xd = pbi->mb; - tile_data->xd.counts = - cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; - worker->hook = (VPxWorkerHook)tile_worker_hook; - worker->data1 = tile_data; - worker->data2 = pbi; - } - - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); - memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_mi_cols); - - vp9_reset_lfm(cm); - - // Load tile data into tile_buffers - get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, - &pbi->tile_buffers); - - // Sort the buffers based on size in descending order. - qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]), - compare_tile_buffers); - - if (num_workers == tile_cols) { - // Rearrange the tile buffers such that the largest, and - // presumably the most difficult, tile will be decoded in the main thread. - // This should help minimize the number of instances where the main thread - // is waiting for a worker to complete. - const TileBuffer largest = pbi->tile_buffers[0]; - memmove(pbi->tile_buffers, pbi->tile_buffers + 1, - (tile_cols - 1) * sizeof(pbi->tile_buffers[0])); - pbi->tile_buffers[tile_cols - 1] = largest; - } else { - int start = 0, end = tile_cols - 2; - TileBuffer tmp; - - // Interleave the tiles to distribute the load between threads, assuming a - // larger tile implies it is more difficult to decode. - while (start < end) { - tmp = pbi->tile_buffers[start]; - pbi->tile_buffers[start] = pbi->tile_buffers[end]; - pbi->tile_buffers[end] = tmp; - start += 2; - end -= 2; - } - } - - // Initialize thread frame counts. - if (!cm->frame_parallel_decoding_mode) { - for (n = 0; n < num_workers; ++n) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[n].data1; - vp9_zero(tile_data->counts); - } - } - - { - const int base = tile_cols / num_workers; - const int remain = tile_cols % num_workers; - int buf_start = 0; - - for (n = 0; n < num_workers; ++n) { - const int count = base + (remain + n) / num_workers; - VPxWorker *const worker = &pbi->tile_workers[n]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - - tile_data->buf_start = buf_start; - tile_data->buf_end = buf_start + count - 1; - tile_data->data_end = data_end; - buf_start += count; - - worker->had_error = 0; - if (n == num_workers - 1) { - assert(tile_data->buf_end == tile_cols - 1); - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } - - for (; n > 0; --n) { - VPxWorker *const worker = &pbi->tile_workers[n - 1]; - TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - // TODO(jzern): The tile may have specific error data associated with - // its vpx_internal_error_info which could be propagated to the main info - // in cm. Additionally once the threads have been synced and an error is - // detected, there's no point in continuing to decode tiles. - pbi->mb.corrupted |= !winterface->sync(worker); - if (!bit_reader_end) bit_reader_end = tile_data->data_end; - } - } - - // Accumulate thread frame counts. - if (!cm->frame_parallel_decoding_mode) { - for (n = 0; n < num_workers; ++n) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[n].data1; - vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); - } - } - - assert(bit_reader_end || pbi->mb.corrupted); - return bit_reader_end; -} - -static void error_handler(void *data) { - VP9_COMMON *const cm = (VP9_COMMON *)data; - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); -} - -static void read_bitdepth_colorspace_sampling( - VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - if (cm->profile >= PROFILE_2) { - cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 1; -#endif - } else { - cm->bit_depth = VPX_BITS_8; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 0; -#endif - } - cm->color_space = vpx_rb_read_literal(rb, 3); - if (cm->color_space != VPX_CS_SRGB) { - cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb); - if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { - cm->subsampling_x = vpx_rb_read_bit(rb); - cm->subsampling_y = vpx_rb_read_bit(rb); - if (cm->subsampling_x == 1 && cm->subsampling_y == 1) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "4:2:0 color not supported in profile 1 or 3"); - if (vpx_rb_read_bit(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Reserved bit set"); - } else { - cm->subsampling_y = cm->subsampling_x = 1; - } - } else { - cm->color_range = VPX_CR_FULL_RANGE; - if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { - // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. - // 4:2:2 or 4:4:0 chroma sampling is not allowed. - cm->subsampling_y = cm->subsampling_x = 0; - if (vpx_rb_read_bit(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Reserved bit set"); - } else { - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "4:4:4 color not supported in profile 0 or 2"); - } - } -} - -static size_t read_uncompressed_header(VP9Decoder *pbi, - struct vpx_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; - BufferPool *const pool = cm->buffer_pool; - RefCntBuffer *const frame_bufs = pool->frame_bufs; - int i, mask, ref_index = 0; - size_t sz; - - cm->last_frame_type = cm->frame_type; - cm->last_intra_only = cm->intra_only; - - if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame marker"); - - cm->profile = vp9_read_profile(rb); -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->profile >= MAX_PROFILES) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Unsupported bitstream profile"); -#else - if (cm->profile >= PROFILE_2) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Unsupported bitstream profile"); -#endif - - cm->show_existing_frame = vpx_rb_read_bit(rb); - if (cm->show_existing_frame) { - // Show an existing frame directly. - const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)]; - lock_buffer_pool(pool); - if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) { - unlock_buffer_pool(pool); - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Buffer %d does not contain a decoded frame", - frame_to_show); - } - - ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); - unlock_buffer_pool(pool); - pbi->refresh_frame_flags = 0; - cm->lf.filter_level = 0; - cm->show_frame = 1; - - if (pbi->frame_parallel_decode) { - for (i = 0; i < REF_FRAMES; ++i) - cm->next_ref_frame_map[i] = cm->ref_frame_map[i]; - } - return 0; - } - - cm->frame_type = (FRAME_TYPE) vpx_rb_read_bit(rb); - cm->show_frame = vpx_rb_read_bit(rb); - cm->error_resilient_mode = vpx_rb_read_bit(rb); - - if (cm->frame_type == KEY_FRAME) { - if (!vp9_read_sync_code(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - - read_bitdepth_colorspace_sampling(cm, rb); - pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; - - for (i = 0; i < REFS_PER_FRAME; ++i) { - cm->frame_refs[i].idx = INVALID_IDX; - cm->frame_refs[i].buf = NULL; - } - - setup_frame_size(cm, rb); - if (pbi->need_resync) { - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - pbi->need_resync = 0; - } - } else { - cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb); - - cm->reset_frame_context = cm->error_resilient_mode ? - 0 : vpx_rb_read_literal(rb, 2); - - if (cm->intra_only) { - if (!vp9_read_sync_code(rb)) - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid frame sync code"); - if (cm->profile > PROFILE_0) { - read_bitdepth_colorspace_sampling(cm, rb); - } else { - // NOTE: The intra-only frame header does not include the specification - // of either the color format or color sub-sampling in profile 0. VP9 - // specifies that the default color format should be YUV 4:2:0 in this - // case (normative). - cm->color_space = VPX_CS_BT_601; - cm->color_range = VPX_CR_STUDIO_RANGE; - cm->subsampling_y = cm->subsampling_x = 1; - cm->bit_depth = VPX_BITS_8; -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth = 0; -#endif - } - - pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); - setup_frame_size(cm, rb); - if (pbi->need_resync) { - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - pbi->need_resync = 0; - } - } else if (pbi->need_resync != 1) { /* Skip if need resync */ - pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); - for (i = 0; i < REFS_PER_FRAME; ++i) { - const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2); - const int idx = cm->ref_frame_map[ref]; - RefBuffer *const ref_frame = &cm->frame_refs[i]; - ref_frame->idx = idx; - ref_frame->buf = &frame_bufs[idx].buf; - cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb); - } - - setup_frame_size_with_refs(cm, rb); - - cm->allow_high_precision_mv = vpx_rb_read_bit(rb); - cm->interp_filter = read_interp_filter(rb); - - for (i = 0; i < REFS_PER_FRAME; ++i) { - RefBuffer *const ref_buf = &cm->frame_refs[i]; -#if CONFIG_VP9_HIGHBITDEPTH - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - ref_buf->buf->y_crop_width, - ref_buf->buf->y_crop_height, - cm->width, cm->height, - cm->use_highbitdepth); -#else - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - ref_buf->buf->y_crop_width, - ref_buf->buf->y_crop_height, - cm->width, cm->height); -#endif - } - } - } -#if CONFIG_VP9_HIGHBITDEPTH - get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; -#endif - get_frame_new_buffer(cm)->color_space = cm->color_space; - get_frame_new_buffer(cm)->color_range = cm->color_range; - get_frame_new_buffer(cm)->render_width = cm->render_width; - get_frame_new_buffer(cm)->render_height = cm->render_height; - - if (pbi->need_resync) { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Keyframe / intra-only frame required to reset decoder" - " state"); - } - - if (!cm->error_resilient_mode) { - cm->refresh_frame_context = vpx_rb_read_bit(rb); - cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb); - if (!cm->frame_parallel_decoding_mode) - vp9_zero(cm->counts); - } else { - cm->refresh_frame_context = 0; - cm->frame_parallel_decoding_mode = 1; - } - - // This flag will be overridden by the call to vp9_setup_past_independence - // below, forcing the use of context 0 for those frame types. - cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); - - // Generate next_ref_frame_map. - lock_buffer_pool(pool); - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - if (mask & 1) { - cm->next_ref_frame_map[ref_index] = cm->new_fb_idx; - ++frame_bufs[cm->new_fb_idx].ref_count; - } else { - cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; - } - // Current thread holds the reference frame. - if (cm->ref_frame_map[ref_index] >= 0) - ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; - ++ref_index; - } - - for (; ref_index < REF_FRAMES; ++ref_index) { - cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; - // Current thread holds the reference frame. - if (cm->ref_frame_map[ref_index] >= 0) - ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; - } - unlock_buffer_pool(pool); - pbi->hold_ref_buf = 1; - - if (frame_is_intra_only(cm) || cm->error_resilient_mode) - vp9_setup_past_independence(cm); - - setup_loopfilter(&cm->lf, rb); - setup_quantization(cm, &pbi->mb, rb); - setup_segmentation(&cm->seg, rb); - setup_segmentation_dequant(cm); - - setup_tile_info(cm, rb); - sz = vpx_rb_read_literal(rb, 16); - - if (sz == 0) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Invalid header size"); - - return sz; -} - -static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, - size_t partition_size) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - FRAME_CONTEXT *const fc = cm->fc; - vpx_reader r; - int k; - - if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb, - pbi->decrypt_state)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate bool decoder 0"); - - cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); - if (cm->tx_mode == TX_MODE_SELECT) - read_tx_mode_probs(&fc->tx_probs, &r); - read_coef_probs(fc, cm->tx_mode, &r); - - for (k = 0; k < SKIP_CONTEXTS; ++k) - vp9_diff_update_prob(&r, &fc->skip_probs[k]); - - if (!frame_is_intra_only(cm)) { - nmv_context *const nmvc = &fc->nmvc; - int i, j; - - read_inter_mode_probs(fc, &r); - - if (cm->interp_filter == SWITCHABLE) - read_switchable_interp_probs(fc, &r); - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); - - cm->reference_mode = read_frame_reference_mode(cm, &r); - if (cm->reference_mode != SINGLE_REFERENCE) - setup_compound_reference_mode(cm); - read_frame_reference_mode_probs(cm, &r); - - for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - for (i = 0; i < INTRA_MODES - 1; ++i) - vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); - - for (j = 0; j < PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); - - read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); - } - - return vpx_reader_has_error(&r); -} - -static struct vpx_read_bit_buffer *init_read_bit_buffer( - VP9Decoder *pbi, - struct vpx_read_bit_buffer *rb, - const uint8_t *data, - const uint8_t *data_end, - uint8_t clear_data[MAX_VP9_HEADER_SIZE]) { - rb->bit_offset = 0; - rb->error_handler = error_handler; - rb->error_handler_data = &pbi->common; - if (pbi->decrypt_cb) { - const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data); - pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); - rb->bit_buffer = clear_data; - rb->bit_buffer_end = clear_data + n; - } else { - rb->bit_buffer = data; - rb->bit_buffer_end = data_end; - } - return rb; -} - -//------------------------------------------------------------------------------ - -int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb) { - return vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 && - vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 && - vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2; -} - -void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, - int *width, int *height) { - *width = vpx_rb_read_literal(rb, 16) + 1; - *height = vpx_rb_read_literal(rb, 16) + 1; -} - -BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb) { - int profile = vpx_rb_read_bit(rb); - profile |= vpx_rb_read_bit(rb) << 1; - if (profile > 2) - profile += vpx_rb_read_bit(rb); - return (BITSTREAM_PROFILE) profile; -} - -void vp9_decode_frame(VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - const uint8_t **p_data_end) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - struct vpx_read_bit_buffer rb; - int context_updated = 0; - uint8_t clear_data[MAX_VP9_HEADER_SIZE]; - const size_t first_partition_size = read_uncompressed_header(pbi, - init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); - const int tile_rows = 1 << cm->log2_tile_rows; - const int tile_cols = 1 << cm->log2_tile_cols; - YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); - xd->cur_buf = new_fb; - - if (!first_partition_size) { - // showing a frame directly - *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2); - return; - } - - data += vpx_rb_bytes_read(&rb); - if (!read_is_valid(data, first_partition_size, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt header length"); - - cm->use_prev_frame_mvs = !cm->error_resilient_mode && - cm->width == cm->last_width && - cm->height == cm->last_height && - !cm->last_intra_only && - cm->last_show_frame && - (cm->last_frame_type != KEY_FRAME); - - vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); - - *cm->fc = cm->frame_contexts[cm->frame_context_idx]; - if (!cm->fc->initialized) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Uninitialized entropy context."); - - xd->corrupted = 0; - new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); - if (new_fb->corrupted) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data header is corrupted."); - - if (cm->lf.filter_level && !cm->skip_loop_filter) { - vp9_loop_filter_frame_init(cm, cm->lf.filter_level); - } - - // If encoded in frame parallel mode, frame context is ready after decoding - // the frame header. - if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) { - VPxWorker *const worker = pbi->frame_worker_owner; - FrameWorkerData *const frame_worker_data = worker->data1; - if (cm->refresh_frame_context) { - context_updated = 1; - cm->frame_contexts[cm->frame_context_idx] = *cm->fc; - } - vp9_frameworker_lock_stats(worker); - pbi->cur_buf->row = -1; - pbi->cur_buf->col = -1; - frame_worker_data->frame_context_ready = 1; - // Signal the main thread that context is ready. - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); - } - - if (pbi->tile_worker_data == NULL || - (tile_cols * tile_rows) != pbi->total_tiles) { - const int num_tile_workers = tile_cols * tile_rows + - ((pbi->max_threads > 1) ? pbi->max_threads : 0); - const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); - // Ensure tile data offsets will be properly aligned. This may fail on - // platforms without DECLARE_ALIGNED(). - assert((sizeof(*pbi->tile_worker_data) % 16) == 0); - vpx_free(pbi->tile_worker_data); - CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); - pbi->total_tiles = tile_rows * tile_cols; - } - - if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { - // Multi-threaded tile decoder - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (!xd->corrupted) { - if (!cm->skip_loop_filter) { - // If multiple threads are used to decode tiles, then we use those - // threads to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, - cm->lf.filter_level, 0, 0, pbi->tile_workers, - pbi->num_tile_workers, &pbi->lf_row_sync); - } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); - } - } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); - } - - if (!xd->corrupted) { - if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { - vp9_adapt_coef_probs(cm); - - if (!frame_is_intra_only(cm)) { - vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); - } - } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); - } - - // Non frame parallel update frame context here. - if (cm->refresh_frame_context && !context_updated) - cm->frame_contexts[cm->frame_context_idx] = *cm->fc; -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h deleted file mode 100644 index ce33cbdbd9..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DECODEFRAME_H_ -#define VP9_DECODER_VP9_DECODEFRAME_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "vp9/common/vp9_enums.h" - -struct VP9Decoder; -struct vpx_read_bit_buffer; - -int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb); -void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, - int *width, int *height); -BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb); - -void vp9_decode_frame(struct VP9Decoder *pbi, - const uint8_t *data, const uint8_t *data_end, - const uint8_t **p_data_end); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODEFRAME_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c deleted file mode 100644 index ffc6839ad1..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_entropymv.h" -#include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_pred_common.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/common/vp9_seg_common.h" - -#include "vp9/decoder/vp9_decodemv.h" -#include "vp9/decoder/vp9_decodeframe.h" - -#include "vpx_dsp/vpx_dsp_common.h" - -static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { - return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p); -} - -static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int size_group) { - const PREDICTION_MODE y_mode = - read_intra_mode(r, cm->fc->y_mode_prob[size_group]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->y_mode[size_group][y_mode]; - return y_mode; -} - -static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, - PREDICTION_MODE y_mode) { - const PREDICTION_MODE uv_mode = read_intra_mode(r, - cm->fc->uv_mode_prob[y_mode]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->uv_mode[y_mode][uv_mode]; - return uv_mode; -} - -static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd, - vpx_reader *r, int ctx) { - const int mode = vpx_read_tree(r, vp9_inter_mode_tree, - cm->fc->inter_mode_probs[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->inter_mode[ctx][mode]; - - return NEARESTMV + mode; -} - -static int read_segment_id(vpx_reader *r, const struct segmentation *seg) { - return vpx_read_tree(r, vp9_segment_tree, seg->tree_probs); -} - -static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - TX_SIZE max_tx_size, vpx_reader *r) { - FRAME_COUNTS *counts = xd->counts; - const int ctx = get_tx_size_context(xd); - const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); - int tx_size = vpx_read(r, tx_probs[0]); - if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { - tx_size += vpx_read(r, tx_probs[1]); - if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) - tx_size += vpx_read(r, tx_probs[2]); - } - - if (counts) - ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; - return (TX_SIZE)tx_size; -} - -static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, - int allow_select, vpx_reader *r) { - TX_MODE tx_mode = cm->tx_mode; - BLOCK_SIZE bsize = xd->mi[0]->sb_type; - const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; - if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) - return read_selected_tx_size(cm, xd, max_tx_size, r); - else - return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); -} - -static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, - int mi_offset, int x_mis, int y_mis) { - int x, y, segment_id = INT_MAX; - - for (y = 0; y < y_mis; y++) - for (x = 0; x < x_mis; x++) - segment_id = - VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - return segment_id; -} - -static void set_segment_id(VP9_COMMON *cm, int mi_offset, - int x_mis, int y_mis, int segment_id) { - int x, y; - - assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); - - for (y = 0; y < y_mis; y++) - for (x = 0; x < x_mis; x++) - cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; -} - -static void copy_segment_id(const VP9_COMMON *cm, - const uint8_t *last_segment_ids, - uint8_t *current_segment_ids, - int mi_offset, int x_mis, int y_mis) { - int x, y; - - for (y = 0; y < y_mis; y++) - for (x = 0; x < x_mis; x++) - current_segment_ids[mi_offset + y * cm->mi_cols + x] = last_segment_ids ? - last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0; -} - -static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset, - int x_mis, int y_mis, - vpx_reader *r) { - struct segmentation *const seg = &cm->seg; - int segment_id; - - if (!seg->enabled) - return 0; // Default for disabled segmentation - - if (!seg->update_map) { - copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, - mi_offset, x_mis, y_mis); - return 0; - } - - segment_id = read_segment_id(r, seg); - set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); - return segment_id; -} - -static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - struct segmentation *const seg = &cm->seg; - MODE_INFO *const mi = xd->mi[0]; - int predicted_segment_id, segment_id; - const int mi_offset = mi_row * cm->mi_cols + mi_col; - - if (!seg->enabled) - return 0; // Default for disabled segmentation - - predicted_segment_id = cm->last_frame_seg_map ? - dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) : - 0; - - if (!seg->update_map) { - copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, - mi_offset, x_mis, y_mis); - return predicted_segment_id; - } - - if (seg->temporal_update) { - const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); - mi->seg_id_predicted = vpx_read(r, pred_prob); - segment_id = mi->seg_id_predicted ? predicted_segment_id - : read_segment_id(r, seg); - } else { - segment_id = read_segment_id(r, seg); - } - set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); - return segment_id; -} - -static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, vpx_reader *r) { - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { - return 1; - } else { - const int ctx = vp9_get_skip_context(xd); - const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->skip[ctx][skip]; - return skip; - } -} - -static void read_intra_frame_mode_info(VP9_COMMON *const cm, - MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - MODE_INFO *const mi = xd->mi[0]; - const MODE_INFO *above_mi = xd->above_mi; - const MODE_INFO *left_mi = xd->left_mi; - const BLOCK_SIZE bsize = mi->sb_type; - int i; - const int mi_offset = mi_row * cm->mi_cols + mi_col; - - mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); - mi->skip = read_skip(cm, xd, mi->segment_id, r); - mi->tx_size = read_tx_size(cm, xd, 1, r); - mi->ref_frame[0] = INTRA_FRAME; - mi->ref_frame[1] = NONE; - - switch (bsize) { - case BLOCK_4X4: - for (i = 0; i < 4; ++i) - mi->bmi[i].as_mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); - mi->mode = mi->bmi[3].as_mode; - break; - case BLOCK_4X8: - mi->bmi[0].as_mode = mi->bmi[2].as_mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); - mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); - break; - case BLOCK_8X4: - mi->bmi[0].as_mode = mi->bmi[1].as_mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); - mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); - break; - default: - mi->mode = read_intra_mode(r, - get_y_mode_probs(mi, above_mi, left_mi, 0)); - } - - mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]); -} - -static int read_mv_component(vpx_reader *r, - const nmv_component *mvcomp, int usehp) { - int mag, d, fr, hp; - const int sign = vpx_read(r, mvcomp->sign); - const int mv_class = vpx_read_tree(r, vp9_mv_class_tree, mvcomp->classes); - const int class0 = mv_class == MV_CLASS_0; - - // Integer part - if (class0) { - d = vpx_read_tree(r, vp9_mv_class0_tree, mvcomp->class0); - mag = 0; - } else { - int i; - const int n = mv_class + CLASS0_BITS - 1; // number of bits - - d = 0; - for (i = 0; i < n; ++i) - d |= vpx_read(r, mvcomp->bits[i]) << i; - mag = CLASS0_SIZE << (mv_class + 2); - } - - // Fractional part - fr = vpx_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d] - : mvcomp->fp); - - // High precision part (if hp is not used, the default value of the hp is 1) - hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) - : 1; - - // Result - mag += ((d << 3) | (fr << 1) | hp) + 1; - return sign ? -mag : mag; -} - -static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, - const nmv_context *ctx, - nmv_context_counts *counts, int allow_hp) { - const MV_JOINT_TYPE joint_type = - (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints); - const int use_hp = allow_hp && use_mv_hp(ref); - MV diff = {0, 0}; - - if (mv_joint_vertical(joint_type)) - diff.row = read_mv_component(r, &ctx->comps[0], use_hp); - - if (mv_joint_horizontal(joint_type)) - diff.col = read_mv_component(r, &ctx->comps[1], use_hp); - - vp9_inc_mv(&diff, counts); - - mv->row = ref->row + diff.row; - mv->col = ref->col + diff.col; -} - -static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, - const MACROBLOCKD *xd, - vpx_reader *r) { - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - const int ctx = vp9_get_reference_mode_context(cm, xd); - const REFERENCE_MODE mode = - (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->comp_inter[ctx][mode]; - return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE - } else { - return cm->reference_mode; - } -} - -// Read the referncence frame -static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r, - int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { - FRAME_CONTEXT *const fc = cm->fc; - FRAME_COUNTS *counts = xd->counts; - - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id, - SEG_LVL_REF_FRAME); - ref_frame[1] = NONE; - } else { - const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); - // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding - if (mode == COMPOUND_REFERENCE) { - const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; - const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); - const int bit = vpx_read(r, fc->comp_ref_prob[ctx]); - if (counts) - ++counts->comp_ref[ctx][bit]; - ref_frame[idx] = cm->comp_fixed_ref; - ref_frame[!idx] = cm->comp_var_ref[bit]; - } else if (mode == SINGLE_REFERENCE) { - const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); - const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); - if (counts) - ++counts->single_ref[ctx0][0][bit0]; - if (bit0) { - const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); - const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); - if (counts) - ++counts->single_ref[ctx1][1][bit1]; - ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; - } else { - ref_frame[0] = LAST_FRAME; - } - - ref_frame[1] = NONE; - } else { - assert(0 && "Invalid prediction mode."); - } - } -} - -// TODO(slavarnway): Move this decoder version of -// vp9_get_pred_context_switchable_interp() to vp9_pred_common.h and update the -// encoder. -// -// Returns a context number for the given MB prediction signal -static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { - // Note: - // The mode info data structure has a one element border above and to the - // left of the entries corresponding to real macroblocks. - // The prediction flags in these dummy entries are initialized to 0. - const MODE_INFO *const left_mi = xd->left_mi; - const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS; - const MODE_INFO *const above_mi = xd->above_mi; - const int above_type = above_mi ? above_mi->interp_filter - : SWITCHABLE_FILTERS; - - if (left_type == above_type) - return left_type; - else if (left_type == SWITCHABLE_FILTERS) - return above_type; - else if (above_type == SWITCHABLE_FILTERS) - return left_type; - else - return SWITCHABLE_FILTERS; -} - -static INLINE INTERP_FILTER read_switchable_interp_filter( - VP9_COMMON *const cm, MACROBLOCKD *const xd, - vpx_reader *r) { - const int ctx = dec_get_pred_context_switchable_interp(xd); - const INTERP_FILTER type = - (INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree, - cm->fc->switchable_interp_prob[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->switchable_interp[ctx][type]; - return type; -} - -static void read_intra_block_mode_info(VP9_COMMON *const cm, - MACROBLOCKD *const xd, MODE_INFO *mi, - vpx_reader *r) { - const BLOCK_SIZE bsize = mi->sb_type; - int i; - - switch (bsize) { - case BLOCK_4X4: - for (i = 0; i < 4; ++i) - mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); - mi->mode = mi->bmi[3].as_mode; - break; - case BLOCK_4X8: - mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, - r, 0); - mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode_y(cm, xd, r, 0); - break; - case BLOCK_8X4: - mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, - r, 0); - mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = - read_intra_mode_y(cm, xd, r, 0); - break; - default: - mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); - } - - mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode); - - // Initialize interp_filter here so we do not have to check for inter block - // modes in dec_get_pred_context_switchable_interp() - mi->interp_filter = SWITCHABLE_FILTERS; - - mi->ref_frame[0] = INTRA_FRAME; - mi->ref_frame[1] = NONE; -} - -static INLINE int is_mv_valid(const MV *mv) { - return mv->row > MV_LOW && mv->row < MV_UPP && - mv->col > MV_LOW && mv->col < MV_UPP; -} - -static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { - memcpy(dst, src, sizeof(*dst) * 2); -} - -static INLINE void zero_mv_pair(int_mv *dst) { - memset(dst, 0, sizeof(*dst) * 2); -} - -static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, - PREDICTION_MODE mode, - int_mv mv[2], int_mv ref_mv[2], - int_mv near_nearest_mv[2], - int is_compound, int allow_hp, vpx_reader *r) { - int i; - int ret = 1; - - switch (mode) { - case NEWMV: { - FRAME_COUNTS *counts = xd->counts; - nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL; - for (i = 0; i < 1 + is_compound; ++i) { - read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts, - allow_hp); - ret = ret && is_mv_valid(&mv[i].as_mv); - } - break; - } - case NEARMV: - case NEARESTMV: { - copy_mv_pair(mv, near_nearest_mv); - break; - } - case ZEROMV: { - zero_mv_pair(mv); - break; - } - default: { - return 0; - } - } - return ret; -} - -static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, - int segment_id, vpx_reader *r) { - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; - } else { - const int ctx = get_intra_inter_context(xd); - const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); - FRAME_COUNTS *counts = xd->counts; - if (counts) - ++counts->intra_inter[ctx][is_inter]; - return is_inter; - } -} - -static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, - int refmv_count) { - int i; - - // Make sure all the candidates are properly clamped etc - for (i = 0; i < refmv_count; ++i) { - lower_mv_precision(&mvlist[i].as_mv, allow_hp); - *best_mv = mvlist[i]; - } -} - -static void fpm_sync(void *const data, int mi_row) { - VP9Decoder *const pbi = (VP9Decoder *)data; - vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, - mi_row << MI_BLOCK_SIZE_LOG2); -} - -// This macro is used to add a motion vector mv_ref list if it isn't -// already in the list. If it's the second motion vector or early_break -// it will also skip all additional processing and jump to Done! -#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \ - do { \ - if (refmv_count) { \ - if ((mv).as_int != (mv_ref_list)[0].as_int) { \ - (mv_ref_list)[(refmv_count)] = (mv); \ - refmv_count++; \ - goto Done; \ - } \ - } else { \ - (mv_ref_list)[(refmv_count)++] = (mv); \ - if (early_break) \ - goto Done; \ - } \ - } while (0) - -// If either reference frame is different, not INTRA, and they -// are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \ - refmv_count, mv_ref_list, Done) \ - do { \ - if (is_inter_block(mbmi)) { \ - if ((mbmi)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - if (has_second_ref(mbmi) && \ - (mbmi)->ref_frame[1] != ref_frame && \ - (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ - ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ - refmv_count, mv_ref_list, Done); \ - } \ - } while (0) - -// This function searches the neighborhood of a given MB/SB -// to try and find candidate reference vectors. -static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, - const POSITION *const mv_ref_search, - int_mv *mv_ref_list, - int mi_row, int mi_col, int block, int is_sub8x8, - find_mv_refs_sync sync, void *const data) { - const int *ref_sign_bias = cm->ref_frame_sign_bias; - int i, refmv_count = 0; - int different_ref_found = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? - cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; - const TileInfo *const tile = &xd->tile; - // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop - // searching after the first mv is found. - const int early_break = (mode != NEARMV); - - // Blank the reference vector list - memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); - - i = 0; - if (is_sub8x8) { - // If the size < 8x8 we get the mv from the bmi substructure for the - // nearest two blocks. - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - different_ref_found = 1; - - if (candidate_mi->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST_EB( - get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - else if (candidate_mi->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST_EB( - get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), - refmv_count, mv_ref_list, Done); - } - } - } - - // Check the rest of the neighbors in much the same way - // as before except we don't need to keep track of sub blocks or - // mode counts. - for (; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - different_ref_found = 1; - - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done); - else if (candidate->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast - // on windows platform. The sync here is unnecessary if use_prev_frame_mvs - // is 0. But after removing it, there will be hang in the unit test on windows - // due to several threads waiting for a thread's signal. -#if defined(_WIN32) && !HAVE_PTHREAD_H - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } -#endif - - // Check the last frame's mode and mv info. - if (prev_frame_mvs) { - // Synchronize here for frame parallel decode if sync function is provided. - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } - - if (prev_frame_mvs->ref_frame[0] == ref_frame) { - ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); - } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { - ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); - } - } - - // Since we couldn't find 2 mvs from the same reference frame - // go back through the neighbors and find motion vectors from - // different reference frames. - if (different_ref_found) { - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - - // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias, - refmv_count, mv_ref_list, Done); - } - } - } - - // Since we still don't have a candidate we'll try the last frame. - if (prev_frame_mvs) { - if (prev_frame_mvs->ref_frame[0] != ref_frame && - prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { - int_mv mv = prev_frame_mvs->mv[0]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); - } - - if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && - prev_frame_mvs->ref_frame[1] != ref_frame && - prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { - int_mv mv = prev_frame_mvs->mv[1]; - if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != - ref_sign_bias[ref_frame]) { - mv.as_mv.row *= -1; - mv.as_mv.col *= -1; - } - ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); - } - } - - if (mode == NEARMV) - refmv_count = MAX_MV_REF_CANDIDATES; - else - // we only care about the nearestmv for the remaining modes - refmv_count = 1; - - Done: - // Clamp vectors - for (i = 0; i < refmv_count; ++i) - clamp_mv_ref(&mv_ref_list[i].as_mv, xd); - - return refmv_count; -} - -static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, - const POSITION *const mv_ref_search, - PREDICTION_MODE b_mode, int block, - int ref, int mi_row, int mi_col, - int_mv *best_sub8x8) { - int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi[0]; - b_mode_info *bmi = mi->bmi; - int n; - int refmv_count; - - assert(MAX_MV_REF_CANDIDATES == 2); - - refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], - mv_ref_search, mv_list, mi_row, mi_col, block, - 1, NULL, NULL); - - switch (block) { - case 0: - best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; - break; - case 1: - case 2: - if (b_mode == NEARESTMV) { - best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; - } else { - best_sub8x8->as_int = 0; - for (n = 0; n < refmv_count; ++n) - if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { - best_sub8x8->as_int = mv_list[n].as_int; - break; - } - } - break; - case 3: - if (b_mode == NEARESTMV) { - best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; - } else { - int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; - candidates[0] = bmi[1].as_mv[ref]; - candidates[1] = bmi[0].as_mv[ref]; - candidates[2] = mv_list[0]; - candidates[3] = mv_list[1]; - best_sub8x8->as_int = 0; - for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) - if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { - best_sub8x8->as_int = candidates[n].as_int; - break; - } - } - break; - default: - assert(0 && "Invalid block index."); - } -} - -static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd, - const POSITION *const mv_ref_search, - int mi_row, int mi_col) { - int i; - int context_counter = 0; - const TileInfo *const tile = &xd->tile; - - // Get mode count from nearest 2 blocks - for (i = 0; i < 2; ++i) { - const POSITION *const mv_ref = &mv_ref_search[i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate->mode]; - } - } - - return counter_to_context[context_counter]; -} - -static void read_inter_block_mode_info(VP9Decoder *const pbi, - MACROBLOCKD *const xd, - MODE_INFO *const mi, - int mi_row, int mi_col, vpx_reader *r) { - VP9_COMMON *const cm = &pbi->common; - const BLOCK_SIZE bsize = mi->sb_type; - const int allow_hp = cm->allow_high_precision_mv; - int_mv best_ref_mvs[2]; - int ref, is_compound; - uint8_t inter_mode_ctx; - const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; - - read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame); - is_compound = has_second_ref(mi); - inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col); - - if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { - mi->mode = ZEROMV; - if (bsize < BLOCK_8X8) { - vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, - "Invalid usage of segement feature on small blocks"); - return; - } - } else { - if (bsize >= BLOCK_8X8) - mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); - else - // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. - // Setting mode to NEARESTMV forces the search to stop after the nearestmv - // has been found. After b_modes have been read, mode will be overwritten - // by the last b_mode. - mi->mode = NEARESTMV; - - if (mi->mode != ZEROMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; - const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; - int refmv_count; - - refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, - tmp_mvs, mi_row, mi_col, -1, 0, - fpm_sync, (void *)pbi); - - dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], - refmv_count); - } - } - } - - mi->interp_filter = (cm->interp_filter == SWITCHABLE) - ? read_switchable_interp_filter(cm, xd, r) - : cm->interp_filter; - - if (bsize < BLOCK_8X8) { - const int num_4x4_w = 1 << xd->bmode_blocks_wl; - const int num_4x4_h = 1 << xd->bmode_blocks_hl; - int idx, idy; - PREDICTION_MODE b_mode; - int_mv best_sub8x8[2]; - for (idy = 0; idy < 2; idy += num_4x4_h) { - for (idx = 0; idx < 2; idx += num_4x4_w) { - const int j = idy * 2 + idx; - b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); - - if (b_mode == NEARESTMV || b_mode == NEARMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) - append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, - mi_row, mi_col, &best_sub8x8[ref]); - } - - if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, - best_sub8x8, is_compound, allow_hp, r)) { - xd->corrupted |= 1; - break; - } - - if (num_4x4_h == 2) - mi->bmi[j + 2] = mi->bmi[j]; - if (num_4x4_w == 2) - mi->bmi[j + 1] = mi->bmi[j]; - } - } - - mi->mode = b_mode; - - copy_mv_pair(mi->mv, mi->bmi[3].as_mv); - } else { - xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, - best_ref_mvs, is_compound, allow_hp, r); - } -} - -static void read_inter_frame_mode_info(VP9Decoder *const pbi, - MACROBLOCKD *const xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - VP9_COMMON *const cm = &pbi->common; - MODE_INFO *const mi = xd->mi[0]; - int inter_block; - - mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis, - y_mis); - mi->skip = read_skip(cm, xd, mi->segment_id, r); - inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); - mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r); - - if (inter_block) - read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); - else - read_intra_block_mode_info(cm, xd, mi, r); -} - -static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, - const MV_REFERENCE_FRAME *src) { - memcpy(dst, src, sizeof(*dst) * 2); -} - -void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { - VP9_COMMON *const cm = &pbi->common; - MODE_INFO *const mi = xd->mi[0]; - MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; - int w, h; - - if (frame_is_intra_only(cm)) { - read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis); - } else { - read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - - for (h = 0; h < y_mis; ++h) { - for (w = 0; w < x_mis; ++w) { - MV_REF *const mv = frame_mvs + w; - copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); - copy_mv_pair(mv->mv, mi->mv); - } - frame_mvs += cm->mi_cols; - } - } -#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH - if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && - (xd->above_mi == NULL || xd->left_mi == NULL) && - !is_inter_block(mi) && need_top_left[mi->uv_mode]) - assert(0); -#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h deleted file mode 100644 index 45569ec81f..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DECODEMV_H_ -#define VP9_DECODER_VP9_DECODEMV_H_ - -#include "vpx_dsp/bitreader.h" - -#include "vp9/decoder/vp9_decoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.c b/thirdparty/libvpx/vp9/decoder/vp9_decoder.c deleted file mode 100644 index 935c04f3aa..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decoder.c +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> -#include <limits.h> -#include <stdio.h> - -#include "./vp9_rtcd.h" -#include "./vpx_dsp_rtcd.h" -#include "./vpx_scale_rtcd.h" - -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/system_state.h" -#include "vpx_ports/vpx_once.h" -#include "vpx_ports/vpx_timer.h" -#include "vpx_scale/vpx_scale.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "vp9/common/vp9_onyxc_int.h" -#if CONFIG_VP9_POSTPROC -#include "vp9/common/vp9_postproc.h" -#endif -#include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconintra.h" - -#include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/decoder/vp9_detokenize.h" - -static void initialize_dec(void) { - static volatile int init_done = 0; - - if (!init_done) { - vp9_rtcd(); - vpx_dsp_rtcd(); - vpx_scale_rtcd(); - vp9_init_intra_predictors(); - init_done = 1; - } -} - -static void vp9_dec_setup_mi(VP9_COMMON *cm) { - cm->mi = cm->mip + cm->mi_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; - memset(cm->mi_grid_base, 0, - cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); -} - -static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { - cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); - if (!cm->mip) - return 1; - cm->mi_alloc_size = mi_size; - cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); - if (!cm->mi_grid_base) - return 1; - return 0; -} - -static void vp9_dec_free_mi(VP9_COMMON *cm) { - vpx_free(cm->mip); - cm->mip = NULL; - vpx_free(cm->mi_grid_base); - cm->mi_grid_base = NULL; -} - -VP9Decoder *vp9_decoder_create(BufferPool *const pool) { - VP9Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi)); - VP9_COMMON *volatile const cm = pbi ? &pbi->common : NULL; - - if (!cm) - return NULL; - - vp9_zero(*pbi); - - if (setjmp(cm->error.jmp)) { - cm->error.setjmp = 0; - vp9_decoder_remove(pbi); - return NULL; - } - - cm->error.setjmp = 1; - - CHECK_MEM_ERROR(cm, cm->fc, - (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); - CHECK_MEM_ERROR(cm, cm->frame_contexts, - (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, - sizeof(*cm->frame_contexts))); - - pbi->need_resync = 1; - once(initialize_dec); - - // Initialize the references to not point to any frame buffers. - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); - - cm->current_video_frame = 0; - pbi->ready_for_new_data = 1; - pbi->common.buffer_pool = pool; - - cm->bit_depth = VPX_BITS_8; - cm->dequant_bit_depth = VPX_BITS_8; - - cm->alloc_mi = vp9_dec_alloc_mi; - cm->free_mi = vp9_dec_free_mi; - cm->setup_mi = vp9_dec_setup_mi; - - vp9_loop_filter_init(cm); - - cm->error.setjmp = 0; - - vpx_get_worker_interface()->init(&pbi->lf_worker); - - return pbi; -} - -void vp9_decoder_remove(VP9Decoder *pbi) { - int i; - - if (!pbi) - return; - - vpx_get_worker_interface()->end(&pbi->lf_worker); - vpx_free(pbi->lf_worker.data1); - - for (i = 0; i < pbi->num_tile_workers; ++i) { - VPxWorker *const worker = &pbi->tile_workers[i]; - vpx_get_worker_interface()->end(worker); - } - - vpx_free(pbi->tile_worker_data); - vpx_free(pbi->tile_workers); - - if (pbi->num_tile_workers > 0) { - vp9_loop_filter_dealloc(&pbi->lf_row_sync); - } - - vpx_free(pbi); -} - -static int equal_dimensions(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b) { - return a->y_height == b->y_height && a->y_width == b->y_width && - a->uv_height == b->uv_height && a->uv_width == b->uv_width; -} - -vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &pbi->common; - - /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - * encoder is using the frame buffers for. This is just a stub to keep the - * vpxenc --test-decode functionality working, and will be replaced in a - * later commit that adds VP9-specific controls for this functionality. - */ - if (ref_frame_flag == VP9_LAST_FLAG) { - const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0); - if (cfg == NULL) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "No 'last' reference frame"); - return VPX_CODEC_ERROR; - } - if (!equal_dimensions(cfg, sd)) - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - else - vp8_yv12_copy_frame(cfg, sd); - } else { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Invalid reference frame"); - } - - return cm->error.error_code; -} - - -vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd) { - RefBuffer *ref_buf = NULL; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - - // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - // encoder is using the frame buffers for. This is just a stub to keep the - // vpxenc --test-decode functionality working, and will be replaced in a - // later commit that adds VP9-specific controls for this functionality. - if (ref_frame_flag == VP9_LAST_FLAG) { - ref_buf = &cm->frame_refs[0]; - } else if (ref_frame_flag == VP9_GOLD_FLAG) { - ref_buf = &cm->frame_refs[1]; - } else if (ref_frame_flag == VP9_ALT_FLAG) { - ref_buf = &cm->frame_refs[2]; - } else { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Invalid reference frame"); - return cm->error.error_code; - } - - if (!equal_dimensions(ref_buf->buf, sd)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } else { - int *ref_fb_ptr = &ref_buf->idx; - - // Find an empty frame buffer. - const int free_fb = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Unable to find free frame buffer"); - return cm->error.error_code; - } - - // Decrease ref_count since it will be increased again in - // ref_cnt_fb() below. - --frame_bufs[free_fb].ref_count; - - // Manage the reference counters and copy image. - ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb); - ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf; - vp8_yv12_copy_frame(sd, ref_buf->buf); - } - - return cm->error.error_code; -} - -/* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9Decoder *pbi) { - int ref_index = 0, mask; - VP9_COMMON *const cm = &pbi->common; - BufferPool *const pool = cm->buffer_pool; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - - lock_buffer_pool(pool); - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - // Current thread releases the holding of reference frame. - decrease_ref_count(old_idx, frame_bufs, pool); - - // Release the reference frame in reference map. - if (mask & 1) { - decrease_ref_count(old_idx, frame_bufs, pool); - } - cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; - ++ref_index; - } - - // Current thread releases the holding of reference frame. - for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { - const int old_idx = cm->ref_frame_map[ref_index]; - decrease_ref_count(old_idx, frame_bufs, pool); - cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; - } - unlock_buffer_pool(pool); - pbi->hold_ref_buf = 0; - cm->frame_to_show = get_frame_new_buffer(cm); - - if (!pbi->frame_parallel_decode || !cm->show_frame) { - lock_buffer_pool(pool); - --frame_bufs[cm->new_fb_idx].ref_count; - unlock_buffer_pool(pool); - } - - // Invalidate these references until the next frame starts. - for (ref_index = 0; ref_index < 3; ref_index++) - cm->frame_refs[ref_index].idx = -1; -} - -int vp9_receive_compressed_data(VP9Decoder *pbi, - size_t size, const uint8_t **psource) { - VP9_COMMON *volatile const cm = &pbi->common; - BufferPool *volatile const pool = cm->buffer_pool; - RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; - const uint8_t *source = *psource; - int retcode = 0; - cm->error.error_code = VPX_CODEC_OK; - - if (size == 0) { - // This is used to signal that we are missing frames. - // We do not know if the missing frame(s) was supposed to update - // any of the reference buffers, but we act conservative and - // mark only the last buffer as corrupted. - // - // TODO(jkoleszar): Error concealment is undefined and non-normative - // at this point, but if it becomes so, [0] may not always be the correct - // thing to do here. - if (cm->frame_refs[0].idx > 0) { - assert(cm->frame_refs[0].buf != NULL); - cm->frame_refs[0].buf->corrupted = 1; - } - } - - pbi->ready_for_new_data = 0; - - // Check if the previous frame was a frame without any references to it. - // Release frame buffer if not decoding in frame parallel mode. - if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 - && frame_bufs[cm->new_fb_idx].ref_count == 0) - pool->release_fb_cb(pool->cb_priv, - &frame_bufs[cm->new_fb_idx].raw_frame_buffer); - // Find a free frame buffer. Return error if can not find any. - cm->new_fb_idx = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) { - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Unable to find free frame buffer"); - return cm->error.error_code; - } - - // Assign a MV array to the frame buffer. - cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; - - pbi->hold_ref_buf = 0; - if (pbi->frame_parallel_decode) { - VPxWorker *const worker = pbi->frame_worker_owner; - vp9_frameworker_lock_stats(worker); - frame_bufs[cm->new_fb_idx].frame_worker_owner = worker; - // Reset decoding progress. - pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; - pbi->cur_buf->row = -1; - pbi->cur_buf->col = -1; - vp9_frameworker_unlock_stats(worker); - } else { - pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; - } - - - if (setjmp(cm->error.jmp)) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - int i; - - cm->error.setjmp = 0; - pbi->ready_for_new_data = 1; - - // Synchronize all threads immediately as a subsequent decode call may - // cause a resize invalidating some allocations. - winterface->sync(&pbi->lf_worker); - for (i = 0; i < pbi->num_tile_workers; ++i) { - winterface->sync(&pbi->tile_workers[i]); - } - - lock_buffer_pool(pool); - // Release all the reference buffers if worker thread is holding them. - if (pbi->hold_ref_buf == 1) { - int ref_index = 0, mask; - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - // Current thread releases the holding of reference frame. - decrease_ref_count(old_idx, frame_bufs, pool); - - // Release the reference frame in reference map. - if (mask & 1) { - decrease_ref_count(old_idx, frame_bufs, pool); - } - ++ref_index; - } - - // Current thread releases the holding of reference frame. - for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { - const int old_idx = cm->ref_frame_map[ref_index]; - decrease_ref_count(old_idx, frame_bufs, pool); - } - pbi->hold_ref_buf = 0; - } - // Release current frame. - decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); - unlock_buffer_pool(pool); - - vpx_clear_system_state(); - return -1; - } - - cm->error.setjmp = 1; - vp9_decode_frame(pbi, source, source + size, psource); - - swap_frame_buffers(pbi); - - vpx_clear_system_state(); - - if (!cm->show_existing_frame) { - cm->last_show_frame = cm->show_frame; - cm->prev_frame = cm->cur_frame; - if (cm->seg.enabled && !pbi->frame_parallel_decode) - vp9_swap_current_and_last_seg_map(cm); - } - - // Update progress in frame parallel decode. - if (pbi->frame_parallel_decode) { - // Need to lock the mutex here as another thread may - // be accessing this buffer. - VPxWorker *const worker = pbi->frame_worker_owner; - FrameWorkerData *const frame_worker_data = worker->data1; - vp9_frameworker_lock_stats(worker); - - if (cm->show_frame) { - cm->current_video_frame++; - } - frame_worker_data->frame_decoded = 1; - frame_worker_data->frame_context_ready = 1; - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); - } else { - cm->last_width = cm->width; - cm->last_height = cm->height; - if (cm->show_frame) { - cm->current_video_frame++; - } - } - - cm->error.setjmp = 0; - return retcode; -} - -int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp9_ppflags_t *flags) { - VP9_COMMON *const cm = &pbi->common; - int ret = -1; -#if !CONFIG_VP9_POSTPROC - (void)*flags; -#endif - - if (pbi->ready_for_new_data == 1) - return ret; - - pbi->ready_for_new_data = 1; - - /* no raw frame to show!!! */ - if (!cm->show_frame) - return ret; - - pbi->ready_for_new_data = 1; - -#if CONFIG_VP9_POSTPROC - if (!cm->show_existing_frame) { - ret = vp9_post_proc_frame(cm, sd, flags); - } else { - *sd = *cm->frame_to_show; - ret = 0; - } -#else - *sd = *cm->frame_to_show; - ret = 0; -#endif /*!CONFIG_POSTPROC*/ - vpx_clear_system_state(); - return ret; -} - -vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - // A chunk ending with a byte matching 0xc0 is an invalid chunk unless - // it is a super frame index. If the last byte of real video compression - // data is 0xc0 the encoder must add a 0 byte. If we have the marker but - // not the associated matching marker byte at the front of the index we have - // an invalid bitstream and need to return an error. - - uint8_t marker; - - assert(data_sz); - marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); - *count = 0; - - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frames; - - // This chunk is marked as having a superframe index but doesn't have - // enough data for it, thus it's an invalid superframe index. - if (data_sz < index_sz) - return VPX_CODEC_CORRUPT_FRAME; - - { - const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, - data + data_sz - index_sz); - - // This chunk is marked as having a superframe index but doesn't have - // the matching marker byte at the front of the index therefore it's an - // invalid chunk. - if (marker != marker2) - return VPX_CODEC_CORRUPT_FRAME; - } - - { - // Found a valid superframe index. - uint32_t i, j; - const uint8_t *x = &data[data_sz - index_sz + 1]; - - // Frames has a maximum of 8 and mag has a maximum of 4. - uint8_t clear_buffer[32]; - assert(sizeof(clear_buffer) >= frames * mag); - if (decrypt_cb) { - decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); - x = clear_buffer; - } - - for (i = 0; i < frames; ++i) { - uint32_t this_sz = 0; - - for (j = 0; j < mag; ++j) - this_sz |= ((uint32_t)(*x++)) << (j * 8); - sizes[i] = this_sz; - } - *count = frames; - } - } - return VPX_CODEC_OK; -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.h b/thirdparty/libvpx/vp9/decoder/vp9_decoder.h deleted file mode 100644 index 7111a36d37..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_decoder.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DECODER_H_ -#define VP9_DECODER_VP9_DECODER_H_ - -#include "./vpx_config.h" - -#include "vpx/vpx_codec.h" -#include "vpx_dsp/bitreader.h" -#include "vpx_scale/yv12config.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_thread_common.h" -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/common/vp9_ppflags.h" -#include "vp9/decoder/vp9_dthread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct TileBuffer { - const uint8_t *data; - size_t size; - int col; // only used with multi-threaded decoding -} TileBuffer; - -typedef struct TileWorkerData { - const uint8_t *data_end; - int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive - vpx_reader bit_reader; - FRAME_COUNTS counts; - DECLARE_ALIGNED(16, MACROBLOCKD, xd); - /* dqcoeff are shared by all the planes. So planes must be decoded serially */ - DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); - struct vpx_internal_error_info error_info; -} TileWorkerData; - -typedef struct VP9Decoder { - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - - DECLARE_ALIGNED(16, VP9_COMMON, common); - - int ready_for_new_data; - - int refresh_frame_flags; - - int frame_parallel_decode; // frame-based threading. - - // TODO(hkuang): Combine this with cur_buf in macroblockd as they are - // the same. - RefCntBuffer *cur_buf; // Current decoding frame buffer. - - VPxWorker *frame_worker_owner; // frame_worker that owns this pbi. - VPxWorker lf_worker; - VPxWorker *tile_workers; - TileWorkerData *tile_worker_data; - TileBuffer tile_buffers[64]; - int num_tile_workers; - int total_tiles; - - VP9LfSync lf_row_sync; - - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - - int max_threads; - int inv_tile_order; - int need_resync; // wait for key/intra-only frame. - int hold_ref_buf; // hold the reference buffer. -} VP9Decoder; - -int vp9_receive_compressed_data(struct VP9Decoder *pbi, - size_t size, const uint8_t **dest); - -int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, - vp9_ppflags_t *flags); - -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, - VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - -static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, - void *decrypt_state, - const uint8_t *data) { - if (decrypt_cb) { - uint8_t marker; - decrypt_cb(decrypt_state, data, &marker, 1); - return marker; - } - return *data; -} - -// This function is exposed for use in tests, as well as the inlined function -// "read_marker". -vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, - size_t data_sz, - uint32_t sizes[8], int *count, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); - -void vp9_decoder_remove(struct VP9Decoder *pbi); - -static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, - BufferPool *const pool) { - if (idx >= 0 && frame_bufs[idx].ref_count > 0) { - --frame_bufs[idx].ref_count; - // A worker may only get a free framebuffer index when calling get_free_fb. - // But the private buffer is not set up until finish decoding header. - // So any error happens during decoding header, the frame_bufs will not - // have valid priv buffer. - if (frame_bufs[idx].ref_count == 0 && - frame_bufs[idx].raw_frame_buffer.priv) { - pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); - } - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DECODER_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c deleted file mode 100644 index 47dc107fe2..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -#include "vp9/common/vp9_blockd.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_entropy.h" -#if CONFIG_COEFFICIENT_RANGE_CHECKING -#include "vp9/common/vp9_idct.h" -#endif - -#include "vp9/decoder/vp9_detokenize.h" - -#define EOB_CONTEXT_NODE 0 -#define ZERO_CONTEXT_NODE 1 -#define ONE_CONTEXT_NODE 2 - -#define INCREMENT_COUNT(token) \ - do { \ - if (counts) \ - ++coef_counts[band][ctx][token]; \ - } while (0) - -static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { - int i, val = 0; - for (i = 0; i < n; ++i) - val = (val << 1) | vpx_read(r, probs[i]); - return val; -} - -static int decode_coefs(const MACROBLOCKD *xd, - PLANE_TYPE type, - tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, - int ctx, const int16_t *scan, const int16_t *nb, - vpx_reader *r) { - FRAME_COUNTS *counts = xd->counts; - const int max_eob = 16 << (tx_size << 1); - const FRAME_CONTEXT *const fc = xd->fc; - const int ref = is_inter_block(xd->mi[0]); - int band, c = 0; - const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = - fc->coef_probs[tx_size][type][ref]; - const vpx_prob *prob; - unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; - unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; - uint8_t token_cache[32 * 32]; - const uint8_t *band_translate = get_band_translate(tx_size); - const int dq_shift = (tx_size == TX_32X32); - int v, token; - int16_t dqv = dq[0]; - const uint8_t *const cat6_prob = -#if CONFIG_VP9_HIGHBITDEPTH - (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 : - (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 : -#endif // CONFIG_VP9_HIGHBITDEPTH - vp9_cat6_prob; - const int cat6_bits = -#if CONFIG_VP9_HIGHBITDEPTH - (xd->bd == VPX_BITS_12) ? 18 : - (xd->bd == VPX_BITS_10) ? 16 : -#endif // CONFIG_VP9_HIGHBITDEPTH - 14; - - if (counts) { - coef_counts = counts->coef[tx_size][type][ref]; - eob_branch_count = counts->eob_branch[tx_size][type][ref]; - } - - while (c < max_eob) { - int val = -1; - band = *band_translate++; - prob = coef_probs[band][ctx]; - if (counts) - ++eob_branch_count[band][ctx]; - if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) { - INCREMENT_COUNT(EOB_MODEL_TOKEN); - break; - } - - while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) { - INCREMENT_COUNT(ZERO_TOKEN); - dqv = dq[1]; - token_cache[scan[c]] = 0; - ++c; - if (c >= max_eob) - return c; // zero tokens at the end (no eob token) - ctx = get_coef_context(nb, token_cache, c); - band = *band_translate++; - prob = coef_probs[band][ctx]; - } - - if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) { - INCREMENT_COUNT(ONE_TOKEN); - token = ONE_TOKEN; - val = 1; - } else { - INCREMENT_COUNT(TWO_TOKEN); - token = vpx_read_tree(r, vp9_coef_con_tree, - vp9_pareto8_full[prob[PIVOT_NODE] - 1]); - switch (token) { - case TWO_TOKEN: - case THREE_TOKEN: - case FOUR_TOKEN: - val = token; - break; - case CATEGORY1_TOKEN: - val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r); - break; - case CATEGORY2_TOKEN: - val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r); - break; - case CATEGORY3_TOKEN: - val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r); - break; - case CATEGORY4_TOKEN: - val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r); - break; - case CATEGORY5_TOKEN: - val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r); - break; - case CATEGORY6_TOKEN: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r); - break; - } - } - v = (val * dqv) >> dq_shift; -#if CONFIG_COEFFICIENT_RANGE_CHECKING -#if CONFIG_VP9_HIGHBITDEPTH - dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v), - xd->bd); -#else - dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v); -#endif // CONFIG_VP9_HIGHBITDEPTH -#else - dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v; -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING - token_cache[scan[c]] = vp9_pt_energy_class[token]; - ++c; - ctx = get_coef_context(nb, token_cache, c); - dqv = dq[1]; - } - - return c; -} - -static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, - int x, int y, unsigned int tx_size_in_blocks) { - if (xd->max_blocks_wide) { - if (tx_size_in_blocks + x > xd->max_blocks_wide) - *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; - } - if (xd->max_blocks_high) { - if (tx_size_in_blocks + y > xd->max_blocks_high) - *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; - } -} - -int vp9_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, - int x, int y, TX_SIZE tx_size, vpx_reader *r, - int seg_id) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int16_t *const dequant = pd->seg_dequant[seg_id]; - int eob; - ENTROPY_CONTEXT *a = pd->above_context + x; - ENTROPY_CONTEXT *l = pd->left_context + y; - int ctx; - int ctx_shift_a = 0; - int ctx_shift_l = 0; - - switch (tx_size) { - case TX_4X4: - ctx = a[0] != 0; - ctx += l[0] != 0; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - a[0] = l[0] = (eob > 0); - break; - case TX_8X8: - get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); - ctx = !!*(const uint16_t *)a; - ctx += !!*(const uint16_t *)l; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; - *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; - break; - case TX_16X16: - get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); - ctx = !!*(const uint32_t *)a; - ctx += !!*(const uint32_t *)l; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; - *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; - break; - case TX_32X32: - get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); - // NOTE: casting to uint64_t here is safe because the default memory - // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte - // boundaries. - ctx = !!*(const uint64_t *)a; - ctx += !!*(const uint64_t *)l; - eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, - dequant, ctx, sc->scan, sc->neighbors, r); - *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; - *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; - break; - default: - assert(0 && "Invalid transform size."); - eob = 0; - break; - } - - return eob; -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h deleted file mode 100644 index d242d4466e..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DETOKENIZE_H_ -#define VP9_DECODER_VP9_DETOKENIZE_H_ - -#include "vpx_dsp/bitreader.h" -#include "vp9/decoder/vp9_decoder.h" -#include "vp9/common/vp9_scan.h" - -#ifdef __cplusplus -extern "C" { -#endif - -int vp9_decode_block_tokens(MACROBLOCKD *xd, - int plane, const scan_order *sc, - int x, int y, - TX_SIZE tx_size, vpx_reader *r, - int seg_id); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c deleted file mode 100644 index 05b38538ae..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vp9/common/vp9_entropy.h" - -#include "vp9/decoder/vp9_dsubexp.h" - -static int inv_recenter_nonneg(int v, int m) { - if (v > 2 * m) - return v; - - return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1); -} - -static int decode_uniform(vpx_reader *r) { - const int l = 8; - const int m = (1 << l) - 191; - const int v = vpx_read_literal(r, l - 1); - return v < m ? v : (v << 1) - m + vpx_read_bit(r); -} - -static int inv_remap_prob(int v, int m) { - static uint8_t inv_map_table[MAX_PROB] = { - 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, - 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, - 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76, - 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, - 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, - 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, - 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, - 142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, - 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, - 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, - 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222, - 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, - 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253 - }; - assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0]))); - v = inv_map_table[v]; - m--; - if ((m << 1) <= MAX_PROB) { - return 1 + inv_recenter_nonneg(v, m); - } else { - return MAX_PROB - inv_recenter_nonneg(v, MAX_PROB - 1 - m); - } -} - -static int decode_term_subexp(vpx_reader *r) { - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 4); - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 4) + 16; - if (!vpx_read_bit(r)) - return vpx_read_literal(r, 5) + 32; - return decode_uniform(r) + 64; -} - -void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p) { - if (vpx_read(r, DIFF_UPDATE_PROB)) { - const int delp = decode_term_subexp(r); - *p = (vpx_prob)inv_remap_prob(delp, *p); - } -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h deleted file mode 100644 index a8bcc70be9..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_DSUBEXP_H_ -#define VP9_DECODER_VP9_DSUBEXP_H_ - -#include "vpx_dsp/bitreader.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.c b/thirdparty/libvpx/vp9/decoder/vp9_dthread.c deleted file mode 100644 index 14a71448fe..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dthread.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_reconinter.h" -#include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_decoder.h" - -// #define DEBUG_THREAD - -// TODO(hkuang): Clean up all the #ifdef in this file. -void vp9_frameworker_lock_stats(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const worker_data = worker->data1; - pthread_mutex_lock(&worker_data->stats_mutex); -#else - (void)worker; -#endif -} - -void vp9_frameworker_unlock_stats(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const worker_data = worker->data1; - pthread_mutex_unlock(&worker_data->stats_mutex); -#else - (void)worker; -#endif -} - -void vp9_frameworker_signal_stats(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const worker_data = worker->data1; - -// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper. -#if defined(_WIN32) && !HAVE_PTHREAD_H - pthread_cond_signal(&worker_data->stats_cond); -#else - pthread_cond_broadcast(&worker_data->stats_cond); -#endif - -#else - (void)worker; -#endif -} - -// This macro prevents thread_sanitizer from reporting known concurrent writes. -#if defined(__has_feature) -#if __has_feature(thread_sanitizer) -#define BUILDING_WITH_TSAN -#endif -#endif - -// TODO(hkuang): Remove worker parameter as it is only used in debug code. -void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, - int row) { -#if CONFIG_MULTITHREAD - if (!ref_buf) - return; - -#ifndef BUILDING_WITH_TSAN - // The following line of code will get harmless tsan error but it is the key - // to get best performance. - if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return; -#endif - - { - // Find the worker thread that owns the reference frame. If the reference - // frame has been fully decoded, it may not have owner. - VPxWorker *const ref_worker = ref_buf->frame_worker_owner; - FrameWorkerData *const ref_worker_data = - (FrameWorkerData *)ref_worker->data1; - const VP9Decoder *const pbi = ref_worker_data->pbi; - -#ifdef DEBUG_THREAD - { - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n", - worker_data->worker_id, worker, ref_worker_data->worker_id, - ref_buf->frame_worker_owner, row, ref_buf->row); - } -#endif - - vp9_frameworker_lock_stats(ref_worker); - while (ref_buf->row < row && pbi->cur_buf == ref_buf && - ref_buf->buf.corrupted != 1) { - pthread_cond_wait(&ref_worker_data->stats_cond, - &ref_worker_data->stats_mutex); - } - - if (ref_buf->buf.corrupted == 1) { - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - vp9_frameworker_unlock_stats(ref_worker); - vpx_internal_error(&worker_data->pbi->common.error, - VPX_CODEC_CORRUPT_FRAME, - "Worker %p failed to decode frame", worker); - } - vp9_frameworker_unlock_stats(ref_worker); - } -#else - (void)worker; - (void)ref_buf; - (void)row; - (void)ref_buf; -#endif // CONFIG_MULTITHREAD -} - -void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) { -#if CONFIG_MULTITHREAD - VPxWorker *worker = buf->frame_worker_owner; - -#ifdef DEBUG_THREAD - { - FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; - printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, - buf->frame_worker_owner, row); - } -#endif - - vp9_frameworker_lock_stats(worker); - buf->row = row; - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); -#else - (void)buf; - (void)row; -#endif // CONFIG_MULTITHREAD -} - -void vp9_frameworker_copy_context(VPxWorker *const dst_worker, - VPxWorker *const src_worker) { -#if CONFIG_MULTITHREAD - FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1; - FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1; - VP9_COMMON *const src_cm = &src_worker_data->pbi->common; - VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common; - int i; - - // Wait until source frame's context is ready. - vp9_frameworker_lock_stats(src_worker); - while (!src_worker_data->frame_context_ready) { - pthread_cond_wait(&src_worker_data->stats_cond, - &src_worker_data->stats_mutex); - } - - dst_cm->last_frame_seg_map = src_cm->seg.enabled ? - src_cm->current_frame_seg_map : src_cm->last_frame_seg_map; - dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; - vp9_frameworker_unlock_stats(src_worker); - - dst_cm->bit_depth = src_cm->bit_depth; -#if CONFIG_VP9_HIGHBITDEPTH - dst_cm->use_highbitdepth = src_cm->use_highbitdepth; -#endif - dst_cm->prev_frame = src_cm->show_existing_frame ? - src_cm->prev_frame : src_cm->cur_frame; - dst_cm->last_width = !src_cm->show_existing_frame ? - src_cm->width : src_cm->last_width; - dst_cm->last_height = !src_cm->show_existing_frame ? - src_cm->height : src_cm->last_height; - dst_cm->subsampling_x = src_cm->subsampling_x; - dst_cm->subsampling_y = src_cm->subsampling_y; - dst_cm->frame_type = src_cm->frame_type; - dst_cm->last_show_frame = !src_cm->show_existing_frame ? - src_cm->show_frame : src_cm->last_show_frame; - for (i = 0; i < REF_FRAMES; ++i) - dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i]; - - memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr, - (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh)); - dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level; - dst_cm->lf.filter_level = src_cm->lf.filter_level; - memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS); - memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); - dst_cm->seg = src_cm->seg; - memcpy(dst_cm->frame_contexts, src_cm->frame_contexts, - FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0])); -#else - (void) dst_worker; - (void) src_worker; -#endif // CONFIG_MULTITHREAD -} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.h b/thirdparty/libvpx/vp9/decoder/vp9_dthread.h deleted file mode 100644 index ba7c38a511..0000000000 --- a/thirdparty/libvpx/vp9/decoder/vp9_dthread.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_DTHREAD_H_ -#define VP9_DECODER_VP9_DTHREAD_H_ - -#include "./vpx_config.h" -#include "vpx_util/vpx_thread.h" -#include "vpx/internal/vpx_codec_internal.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct VP9Common; -struct VP9Decoder; - -// WorkerData for the FrameWorker thread. It contains all the information of -// the worker and decode structures for decoding a frame. -typedef struct FrameWorkerData { - struct VP9Decoder *pbi; - const uint8_t *data; - const uint8_t *data_end; - size_t data_size; - void *user_priv; - int result; - int worker_id; - int received_frame; - - // scratch_buffer is used in frame parallel mode only. - // It is used to make a copy of the compressed data. - uint8_t *scratch_buffer; - size_t scratch_buffer_size; - -#if CONFIG_MULTITHREAD - pthread_mutex_t stats_mutex; - pthread_cond_t stats_cond; -#endif - - int frame_context_ready; // Current frame's context is ready to read. - int frame_decoded; // Finished decoding current frame. -} FrameWorkerData; - -void vp9_frameworker_lock_stats(VPxWorker *const worker); -void vp9_frameworker_unlock_stats(VPxWorker *const worker); -void vp9_frameworker_signal_stats(VPxWorker *const worker); - -// Wait until ref_buf has been decoded to row in real pixel unit. -// Note: worker may already finish decoding ref_buf and release it in order to -// start decoding next frame. So need to check whether worker is still decoding -// ref_buf. -void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, - int row); - -// FrameWorker broadcasts its decoding progress so other workers that are -// waiting on it can resume decoding. -void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row); - -// Copy necessary decoding context from src worker to dst worker. -void vp9_frameworker_copy_context(VPxWorker *const dst_worker, - VPxWorker *const src_worker); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.c b/thirdparty/libvpx/vp9/vp9_dx_iface.c deleted file mode 100644 index 6531e2c618..0000000000 --- a/thirdparty/libvpx/vp9/vp9_dx_iface.c +++ /dev/null @@ -1,1093 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> -#include <string.h> - -#include "./vpx_config.h" -#include "./vpx_version.h" - -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_decoder.h" -#include "vpx_dsp/bitreader_buffer.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_util/vpx_thread.h" - -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_frame_buffers.h" - -#include "vp9/decoder/vp9_decodeframe.h" - -#include "vp9/vp9_dx_iface.h" -#include "vp9/vp9_iface_common.h" - -#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) - -static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - // This function only allocates space for the vpx_codec_alg_priv_t - // structure. More memory may be required at the time the stream - // information becomes known. - (void)data; - - if (!ctx->priv) { - vpx_codec_alg_priv_t *const priv = - (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); - if (priv == NULL) - return VPX_CODEC_MEM_ERROR; - - ctx->priv = (vpx_codec_priv_t *)priv; - ctx->priv->init_flags = ctx->init_flags; - priv->si.sz = sizeof(priv->si); - priv->flushed = 0; - // Only do frame parallel decode when threads > 1. - priv->frame_parallel_decode = - (ctx->config.dec && (ctx->config.dec->threads > 1) && - (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING)) ? 1 : 0; - if (ctx->config.dec) { - priv->cfg = *ctx->config.dec; - ctx->config.dec = &priv->cfg; - } - } - - return VPX_CODEC_OK; -} - -static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { - if (ctx->frame_workers != NULL) { - int i; - for (i = 0; i < ctx->num_frame_workers; ++i) { - VPxWorker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - vpx_get_worker_interface()->end(worker); - vp9_remove_common(&frame_worker_data->pbi->common); -#if CONFIG_VP9_POSTPROC - vp9_free_postproc_buffers(&frame_worker_data->pbi->common); -#endif - vp9_decoder_remove(frame_worker_data->pbi); - vpx_free(frame_worker_data->scratch_buffer); -#if CONFIG_MULTITHREAD - pthread_mutex_destroy(&frame_worker_data->stats_mutex); - pthread_cond_destroy(&frame_worker_data->stats_cond); -#endif - vpx_free(frame_worker_data); - } -#if CONFIG_MULTITHREAD - pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex); -#endif - } - - if (ctx->buffer_pool) { - vp9_free_ref_frame_buffers(ctx->buffer_pool); - vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); - } - - vpx_free(ctx->frame_workers); - vpx_free(ctx->buffer_pool); - vpx_free(ctx); - return VPX_CODEC_OK; -} - -static int parse_bitdepth_colorspace_sampling( - BITSTREAM_PROFILE profile, struct vpx_read_bit_buffer *rb) { - vpx_color_space_t color_space; - if (profile >= PROFILE_2) - rb->bit_offset += 1; // Bit-depth 10 or 12. - color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3); - if (color_space != VPX_CS_SRGB) { - rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range. - if (profile == PROFILE_1 || profile == PROFILE_3) { - rb->bit_offset += 2; // subsampling x/y. - rb->bit_offset += 1; // unused. - } - } else { - if (profile == PROFILE_1 || profile == PROFILE_3) { - rb->bit_offset += 1; // unused - } else { - // RGB is only available in version 1. - return 0; - } - } - return 1; -} - -static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si, - int *is_intra_only, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - int intra_only_flag = 0; - uint8_t clear_buffer[10]; - - if (data + data_sz <= data) - return VPX_CODEC_INVALID_PARAM; - - si->is_kf = 0; - si->w = si->h = 0; - - if (decrypt_cb) { - data_sz = VPXMIN(sizeof(clear_buffer), data_sz); - decrypt_cb(decrypt_state, data, clear_buffer, data_sz); - data = clear_buffer; - } - - // A maximum of 6 bits are needed to read the frame marker, profile and - // show_existing_frame. - if (data_sz < 1) - return VPX_CODEC_UNSUP_BITSTREAM; - - { - int show_frame; - int error_resilient; - struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; - const int frame_marker = vpx_rb_read_literal(&rb, 2); - const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); - - if (frame_marker != VP9_FRAME_MARKER) - return VPX_CODEC_UNSUP_BITSTREAM; - - if (profile >= MAX_PROFILES) - return VPX_CODEC_UNSUP_BITSTREAM; - - if (vpx_rb_read_bit(&rb)) { // show an existing frame - // If profile is > 2 and show_existing_frame is true, then at least 1 more - // byte (6+3=9 bits) is needed. - if (profile > 2 && data_sz < 2) - return VPX_CODEC_UNSUP_BITSTREAM; - vpx_rb_read_literal(&rb, 3); // Frame buffer to show. - return VPX_CODEC_OK; - } - - // For the rest of the function, a maximum of 9 more bytes are needed - // (computed by taking the maximum possible bits needed in each case). Note - // that this has to be updated if we read any more bits in this function. - if (data_sz < 10) - return VPX_CODEC_UNSUP_BITSTREAM; - - si->is_kf = !vpx_rb_read_bit(&rb); - show_frame = vpx_rb_read_bit(&rb); - error_resilient = vpx_rb_read_bit(&rb); - - if (si->is_kf) { - if (!vp9_read_sync_code(&rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - - if (!parse_bitdepth_colorspace_sampling(profile, &rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); - } else { - intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb); - - rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context - - if (intra_only_flag) { - if (!vp9_read_sync_code(&rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - if (profile > PROFILE_0) { - if (!parse_bitdepth_colorspace_sampling(profile, &rb)) - return VPX_CODEC_UNSUP_BITSTREAM; - } - rb.bit_offset += REF_FRAMES; // refresh_frame_flags - vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); - } - } - } - if (is_intra_only != NULL) - *is_intra_only = intra_only_flag; - return VPX_CODEC_OK; -} - -static vpx_codec_err_t decoder_peek_si(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) { - return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL); -} - -static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si) { - const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) - ? sizeof(vp9_stream_info_t) - : sizeof(vpx_codec_stream_info_t); - memcpy(si, &ctx->si, sz); - si->sz = (unsigned int)sz; - - return VPX_CODEC_OK; -} - -static void set_error_detail(vpx_codec_alg_priv_t *ctx, - const char *const error) { - ctx->base.err_detail = error; -} - -static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) { - if (error->error_code) - set_error_detail(ctx, error->has_detail ? error->detail : NULL); - - return error->error_code; -} - -static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { - int i; - - for (i = 0; i < ctx->num_frame_workers; ++i) { - VPxWorker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - VP9_COMMON *const cm = &frame_worker_data->pbi->common; - BufferPool *const pool = cm->buffer_pool; - - cm->new_fb_idx = INVALID_IDX; - cm->byte_alignment = ctx->byte_alignment; - cm->skip_loop_filter = ctx->skip_loop_filter; - - if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - pool->get_fb_cb = ctx->get_ext_fb_cb; - pool->release_fb_cb = ctx->release_ext_fb_cb; - pool->cb_priv = ctx->ext_priv; - } else { - pool->get_fb_cb = vp9_get_frame_buffer; - pool->release_fb_cb = vp9_release_frame_buffer; - - if (vp9_alloc_internal_frame_buffers(&pool->int_frame_buffers)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to initialize internal frame buffers"); - - pool->cb_priv = &pool->int_frame_buffers; - } - } -} - -static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { - cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; - cfg->deblocking_level = 4; - cfg->noise_level = 0; -} - -static void set_ppflags(const vpx_codec_alg_priv_t *ctx, - vp9_ppflags_t *flags) { - flags->post_proc_flag = - ctx->postproc_cfg.post_proc_flag; - - flags->deblocking_level = ctx->postproc_cfg.deblocking_level; - flags->noise_level = ctx->postproc_cfg.noise_level; -} - -static int frame_worker_hook(void *arg1, void *arg2) { - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; - const uint8_t *data = frame_worker_data->data; - (void)arg2; - - frame_worker_data->result = - vp9_receive_compressed_data(frame_worker_data->pbi, - frame_worker_data->data_size, - &data); - frame_worker_data->data_end = data; - - if (frame_worker_data->pbi->frame_parallel_decode) { - // In frame parallel decoding, a worker thread must successfully decode all - // the compressed data. - if (frame_worker_data->result != 0 || - frame_worker_data->data + frame_worker_data->data_size - 1 > data) { - VPxWorker *const worker = frame_worker_data->pbi->frame_worker_owner; - BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool; - // Signal all the other threads that are waiting for this frame. - vp9_frameworker_lock_stats(worker); - frame_worker_data->frame_context_ready = 1; - lock_buffer_pool(pool); - frame_worker_data->pbi->cur_buf->buf.corrupted = 1; - unlock_buffer_pool(pool); - frame_worker_data->pbi->need_resync = 1; - vp9_frameworker_signal_stats(worker); - vp9_frameworker_unlock_stats(worker); - return 0; - } - } else if (frame_worker_data->result != 0) { - // Check decode result in serial decode. - frame_worker_data->pbi->cur_buf->buf.corrupted = 1; - frame_worker_data->pbi->need_resync = 1; - } - return !frame_worker_data->result; -} - -static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { - int i; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - - ctx->last_show_frame = -1; - ctx->next_submit_worker_id = 0; - ctx->last_submit_worker_id = 0; - ctx->next_output_worker_id = 0; - ctx->frame_cache_read = 0; - ctx->frame_cache_write = 0; - ctx->num_cache_frames = 0; - ctx->need_resync = 1; - ctx->num_frame_workers = - (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; - if (ctx->num_frame_workers > MAX_DECODE_THREADS) - ctx->num_frame_workers = MAX_DECODE_THREADS; - ctx->available_threads = ctx->num_frame_workers; - ctx->flushed = 0; - - ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); - if (ctx->buffer_pool == NULL) - return VPX_CODEC_MEM_ERROR; - -#if CONFIG_MULTITHREAD - if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { - set_error_detail(ctx, "Failed to allocate buffer pool mutex"); - return VPX_CODEC_MEM_ERROR; - } -#endif - - ctx->frame_workers = (VPxWorker *) - vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); - if (ctx->frame_workers == NULL) { - set_error_detail(ctx, "Failed to allocate frame_workers"); - return VPX_CODEC_MEM_ERROR; - } - - for (i = 0; i < ctx->num_frame_workers; ++i) { - VPxWorker *const worker = &ctx->frame_workers[i]; - FrameWorkerData *frame_worker_data = NULL; - winterface->init(worker); - worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); - if (worker->data1 == NULL) { - set_error_detail(ctx, "Failed to allocate frame_worker_data"); - return VPX_CODEC_MEM_ERROR; - } - frame_worker_data = (FrameWorkerData *)worker->data1; - frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); - if (frame_worker_data->pbi == NULL) { - set_error_detail(ctx, "Failed to allocate frame_worker_data"); - return VPX_CODEC_MEM_ERROR; - } - frame_worker_data->pbi->frame_worker_owner = worker; - frame_worker_data->worker_id = i; - frame_worker_data->scratch_buffer = NULL; - frame_worker_data->scratch_buffer_size = 0; - frame_worker_data->frame_context_ready = 0; - frame_worker_data->received_frame = 0; -#if CONFIG_MULTITHREAD - if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { - set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); - return VPX_CODEC_MEM_ERROR; - } - - if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { - set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); - return VPX_CODEC_MEM_ERROR; - } -#endif - // If decoding in serial mode, FrameWorker thread could create tile worker - // thread or loopfilter thread. - frame_worker_data->pbi->max_threads = - (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; - - frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; - frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; - frame_worker_data->pbi->common.frame_parallel_decode = - ctx->frame_parallel_decode; - worker->hook = (VPxWorkerHook)frame_worker_hook; - if (!winterface->reset(worker)) { - set_error_detail(ctx, "Frame Worker thread creation failed"); - return VPX_CODEC_MEM_ERROR; - } - } - - // If postprocessing was enabled by the application and a - // configuration has not been provided, default it. - if (!ctx->postproc_cfg_set && - (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) - set_default_ppflags(&ctx->postproc_cfg); - - init_buffer_callbacks(ctx); - - return VPX_CODEC_OK; -} - -static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, - const VP9Decoder *const pbi) { - // Clear resync flag if worker got a key frame or intra only frame. - if (ctx->need_resync == 1 && pbi->need_resync == 0 && - (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME)) - ctx->need_resync = 0; -} - -static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, - const uint8_t **data, unsigned int data_sz, - void *user_priv, int64_t deadline) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - (void)deadline; - - // Determine the stream parameters. Note that we rely on peek_si to - // validate that we have a buffer that does not wrap around the top - // of the heap. - if (!ctx->si.h) { - int is_intra_only = 0; - const vpx_codec_err_t res = - decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only, - ctx->decrypt_cb, ctx->decrypt_state); - if (res != VPX_CODEC_OK) - return res; - - if (!ctx->si.is_kf && !is_intra_only) - return VPX_CODEC_ERROR; - } - - if (!ctx->frame_parallel_decode) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - frame_worker_data->data = *data; - frame_worker_data->data_size = data_sz; - frame_worker_data->user_priv = user_priv; - frame_worker_data->received_frame = 1; - - // Set these even if already initialized. The caller may have changed the - // decrypt config between frames. - frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb; - frame_worker_data->pbi->decrypt_state = ctx->decrypt_state; - - worker->had_error = 0; - winterface->execute(worker); - - // Update data pointer after decode. - *data = frame_worker_data->data_end; - - if (worker->had_error) - return update_error_state(ctx, &frame_worker_data->pbi->common.error); - - check_resync(ctx, frame_worker_data->pbi); - } else { - VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id]; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - // Copy context from last worker thread to next worker thread. - if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) - vp9_frameworker_copy_context( - &ctx->frame_workers[ctx->next_submit_worker_id], - &ctx->frame_workers[ctx->last_submit_worker_id]); - - frame_worker_data->pbi->ready_for_new_data = 0; - // Copy the compressed data into worker's internal buffer. - // TODO(hkuang): Will all the workers allocate the same size - // as the size of the first intra frame be better? This will - // avoid too many deallocate and allocate. - if (frame_worker_data->scratch_buffer_size < data_sz) { - frame_worker_data->scratch_buffer = - (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz); - if (frame_worker_data->scratch_buffer == NULL) { - set_error_detail(ctx, "Failed to reallocate scratch buffer"); - return VPX_CODEC_MEM_ERROR; - } - frame_worker_data->scratch_buffer_size = data_sz; - } - frame_worker_data->data_size = data_sz; - memcpy(frame_worker_data->scratch_buffer, *data, data_sz); - - frame_worker_data->frame_decoded = 0; - frame_worker_data->frame_context_ready = 0; - frame_worker_data->received_frame = 1; - frame_worker_data->data = frame_worker_data->scratch_buffer; - frame_worker_data->user_priv = user_priv; - - if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) - ctx->last_submit_worker_id = - (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers; - - ctx->next_submit_worker_id = - (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers; - --ctx->available_threads; - worker->had_error = 0; - winterface->launch(worker); - } - - return VPX_CODEC_OK; -} - -static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { - YV12_BUFFER_CONFIG sd; - vp9_ppflags_t flags = {0, 0, 0}; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - ctx->next_output_worker_id = - (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; - // TODO(hkuang): Add worker error handling here. - winterface->sync(worker); - frame_worker_data->received_frame = 0; - ++ctx->available_threads; - - check_resync(ctx, frame_worker_data->pbi); - - if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { - VP9_COMMON *const cm = &frame_worker_data->pbi->common; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; - yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd, - frame_worker_data->user_priv); - ctx->frame_cache[ctx->frame_cache_write].img.fb_priv = - frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - ctx->frame_cache_write = - (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE; - ++ctx->num_cache_frames; - } -} - -static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, unsigned int data_sz, - void *user_priv, long deadline) { - const uint8_t *data_start = data; - const uint8_t * const data_end = data + data_sz; - vpx_codec_err_t res; - uint32_t frame_sizes[8]; - int frame_count; - - if (data == NULL && data_sz == 0) { - ctx->flushed = 1; - return VPX_CODEC_OK; - } - - // Reset flushed when receiving a valid frame. - ctx->flushed = 0; - - // Initialize the decoder workers on the first frame. - if (ctx->frame_workers == NULL) { - const vpx_codec_err_t res = init_decoder(ctx); - if (res != VPX_CODEC_OK) - return res; - } - - res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count, - ctx->decrypt_cb, ctx->decrypt_state); - if (res != VPX_CODEC_OK) - return res; - - if (ctx->frame_parallel_decode) { - // Decode in frame parallel mode. When decoding in this mode, the frame - // passed to the decoder must be either a normal frame or a superframe with - // superframe index so the decoder could get each frame's start position - // in the superframe. - if (frame_count > 0) { - int i; - - for (i = 0; i < frame_count; ++i) { - const uint8_t *data_start_copy = data_start; - const uint32_t frame_size = frame_sizes[i]; - if (data_start < data - || frame_size > (uint32_t) (data_end - data_start)) { - set_error_detail(ctx, "Invalid frame size in index"); - return VPX_CODEC_CORRUPT_FRAME; - } - - if (ctx->available_threads == 0) { - // No more threads for decoding. Wait until the next output worker - // finishes decoding. Then copy the decoded frame into cache. - if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { - wait_worker_and_cache_frame(ctx); - } else { - // TODO(hkuang): Add unit test to test this path. - set_error_detail(ctx, "Frame output cache is full."); - return VPX_CODEC_ERROR; - } - } - - res = decode_one(ctx, &data_start_copy, frame_size, user_priv, - deadline); - if (res != VPX_CODEC_OK) - return res; - data_start += frame_size; - } - } else { - if (ctx->available_threads == 0) { - // No more threads for decoding. Wait until the next output worker - // finishes decoding. Then copy the decoded frame into cache. - if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { - wait_worker_and_cache_frame(ctx); - } else { - // TODO(hkuang): Add unit test to test this path. - set_error_detail(ctx, "Frame output cache is full."); - return VPX_CODEC_ERROR; - } - } - - res = decode_one(ctx, &data, data_sz, user_priv, deadline); - if (res != VPX_CODEC_OK) - return res; - } - } else { - // Decode in serial mode. - if (frame_count > 0) { - int i; - - for (i = 0; i < frame_count; ++i) { - const uint8_t *data_start_copy = data_start; - const uint32_t frame_size = frame_sizes[i]; - vpx_codec_err_t res; - if (data_start < data - || frame_size > (uint32_t) (data_end - data_start)) { - set_error_detail(ctx, "Invalid frame size in index"); - return VPX_CODEC_CORRUPT_FRAME; - } - - res = decode_one(ctx, &data_start_copy, frame_size, user_priv, - deadline); - if (res != VPX_CODEC_OK) - return res; - - data_start += frame_size; - } - } else { - while (data_start < data_end) { - const uint32_t frame_size = (uint32_t) (data_end - data_start); - const vpx_codec_err_t res = decode_one(ctx, &data_start, frame_size, - user_priv, deadline); - if (res != VPX_CODEC_OK) - return res; - - // Account for suboptimal termination by the encoder. - while (data_start < data_end) { - const uint8_t marker = read_marker(ctx->decrypt_cb, - ctx->decrypt_state, data_start); - if (marker) - break; - ++data_start; - } - } - } - } - - return res; -} - -static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) { - RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs; - // Decrease reference count of last output frame in frame parallel mode. - if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { - BufferPool *const pool = ctx->buffer_pool; - lock_buffer_pool(pool); - decrease_ref_count(ctx->last_show_frame, frame_bufs, pool); - unlock_buffer_pool(pool); - } -} - -static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) { - vpx_image_t *img = NULL; - - // Only return frame when all the cpu are busy or - // application fluhsed the decoder in frame parallel decode. - if (ctx->frame_parallel_decode && ctx->available_threads > 0 && - !ctx->flushed) { - return NULL; - } - - // Output the frames in the cache first. - if (ctx->num_cache_frames > 0) { - release_last_output_frame(ctx); - ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx; - if (ctx->need_resync) - return NULL; - img = &ctx->frame_cache[ctx->frame_cache_read].img; - ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE; - --ctx->num_cache_frames; - return img; - } - - // iter acts as a flip flop, so an image is only returned on the first - // call to get_frame. - if (*iter == NULL && ctx->frame_workers != NULL) { - do { - YV12_BUFFER_CONFIG sd; - vp9_ppflags_t flags = {0, 0, 0}; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - VPxWorker *const worker = - &ctx->frame_workers[ctx->next_output_worker_id]; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - ctx->next_output_worker_id = - (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) - set_ppflags(ctx, &flags); - // Wait for the frame from worker thread. - if (winterface->sync(worker)) { - // Check if worker has received any frames. - if (frame_worker_data->received_frame == 1) { - ++ctx->available_threads; - frame_worker_data->received_frame = 0; - check_resync(ctx, frame_worker_data->pbi); - } - if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { - VP9_COMMON *const cm = &frame_worker_data->pbi->common; - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - release_last_output_frame(ctx); - ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx; - if (ctx->need_resync) - return NULL; - yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv); - ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - img = &ctx->img; - return img; - } - } else { - // Decoding failed. Release the worker thread. - frame_worker_data->received_frame = 0; - ++ctx->available_threads; - ctx->need_resync = 1; - if (ctx->flushed != 1) - return NULL; - } - } while (ctx->next_output_worker_id != ctx->next_submit_worker_id); - } - return NULL; -} - -static vpx_codec_err_t decoder_set_fb_fn( - vpx_codec_alg_priv_t *ctx, - vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { - if (cb_get == NULL || cb_release == NULL) { - return VPX_CODEC_INVALID_PARAM; - } else if (ctx->frame_workers == NULL) { - // If the decoder has already been initialized, do not accept changes to - // the frame buffer functions. - ctx->get_ext_fb_cb = cb_get; - ctx->release_ext_fb_cb = cb_release; - ctx->ext_priv = cb_priv; - return VPX_CODEC_OK; - } - - return VPX_CODEC_ERROR; -} - -static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (data) { - vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; - YV12_BUFFER_CONFIG sd; - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(&frame_worker_data->pbi->common, - (VP9_REFFRAME)frame->frame_type, &sd); - } else { - return VPX_CODEC_INVALID_PARAM; - } -} - -static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (data) { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; - YV12_BUFFER_CONFIG sd; - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - image2yuvconfig(&frame->img, &sd); - return vp9_copy_reference_dec(frame_worker_data->pbi, - (VP9_REFFRAME)frame->frame_type, &sd); - } else { - return VPX_CODEC_INVALID_PARAM; - } -} - -static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, - va_list args) { - vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (data) { - YV12_BUFFER_CONFIG* fb; - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - fb = get_ref_frame(&frame_worker_data->pbi->common, data->idx); - if (fb == NULL) return VPX_CODEC_ERROR; - yuvconfig2image(&data->img, fb, NULL); - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -} - -static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, - va_list args) { -#if CONFIG_VP9_POSTPROC - vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); - - if (data) { - ctx->postproc_cfg_set = 1; - ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); - return VPX_CODEC_OK; - } else { - return VPX_CODEC_INVALID_PARAM; - } -#else - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -#endif -} - -static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, - va_list args) { - (void)ctx; - (void)args; - return VPX_CODEC_INCAPABLE; -} - -static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const update_info = va_arg(args, int *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (update_info) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - *update_info = frame_worker_data->pbi->refresh_frame_flags; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *corrupted = va_arg(args, int *); - - if (corrupted) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - RefCntBuffer *const frame_bufs = - frame_worker_data->pbi->common.buffer_pool->frame_bufs; - if (frame_worker_data->pbi->common.frame_to_show == NULL) - return VPX_CODEC_ERROR; - if (ctx->last_show_frame >= 0) - *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const frame_size = va_arg(args, int *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (frame_size) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - frame_size[0] = cm->width; - frame_size[1] = cm->height; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const render_size = va_arg(args, int *); - - // Only support this function in serial decode. - if (ctx->frame_parallel_decode) { - set_error_detail(ctx, "Not supported in frame parallel decode"); - return VPX_CODEC_INCAPABLE; - } - - if (render_size) { - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - render_size[0] = cm->render_width; - render_size[1] = cm->render_height; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx, - va_list args) { - unsigned int *const bit_depth = va_arg(args, unsigned int *); - VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; - - if (bit_depth) { - if (worker) { - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - *bit_depth = cm->bit_depth; - return VPX_CODEC_OK; - } else { - return VPX_CODEC_ERROR; - } - } - - return VPX_CODEC_INVALID_PARAM; -} - -static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, - va_list args) { - ctx->invert_tile_order = va_arg(args, int); - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx, - va_list args) { - vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); - ctx->decrypt_cb = init ? init->decrypt_cb : NULL; - ctx->decrypt_state = init ? init->decrypt_state : NULL; - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx, - va_list args) { - const int legacy_byte_alignment = 0; - const int min_byte_alignment = 32; - const int max_byte_alignment = 1024; - const int byte_alignment = va_arg(args, int); - - if (byte_alignment != legacy_byte_alignment && - (byte_alignment < min_byte_alignment || - byte_alignment > max_byte_alignment || - (byte_alignment & (byte_alignment - 1)) != 0)) - return VPX_CODEC_INVALID_PARAM; - - ctx->byte_alignment = byte_alignment; - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = - (FrameWorkerData *)worker->data1; - frame_worker_data->pbi->common.byte_alignment = byte_alignment; - } - return VPX_CODEC_OK; -} - -static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx, - va_list args) { - ctx->skip_loop_filter = va_arg(args, int); - - if (ctx->frame_workers) { - VPxWorker *const worker = ctx->frame_workers; - FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; - frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter; - } - - return VPX_CODEC_OK; -} - -static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { - {VP8_COPY_REFERENCE, ctrl_copy_reference}, - - // Setters - {VP8_SET_REFERENCE, ctrl_set_reference}, - {VP8_SET_POSTPROC, ctrl_set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options}, - {VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options}, - {VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options}, - {VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options}, - {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, - {VPXD_SET_DECRYPTOR, ctrl_set_decryptor}, - {VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment}, - {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter}, - - // Getters - {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, - {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, - {VP9_GET_REFERENCE, ctrl_get_reference}, - {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, - {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, - {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, - - { -1, NULL}, -}; - -#ifndef VERSION_STRING -#define VERSION_STRING -#endif -CODEC_INTERFACE(vpx_codec_vp9_dx) = { - "WebM Project VP9 Decoder" VERSION_STRING, - VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | - VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t - decoder_init, // vpx_codec_init_fn_t - decoder_destroy, // vpx_codec_destroy_fn_t - decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t - { // NOLINT - decoder_peek_si, // vpx_codec_peek_si_fn_t - decoder_get_si, // vpx_codec_get_si_fn_t - decoder_decode, // vpx_codec_decode_fn_t - decoder_get_frame, // vpx_codec_frame_get_fn_t - decoder_set_fb_fn, // vpx_codec_set_fb_fn_t - }, - { // NOLINT - 0, - NULL, // vpx_codec_enc_cfg_map_t - NULL, // vpx_codec_encode_fn_t - NULL, // vpx_codec_get_cx_data_fn_t - NULL, // vpx_codec_enc_config_set_fn_t - NULL, // vpx_codec_get_global_headers_fn_t - NULL, // vpx_codec_get_preview_frame_fn_t - NULL // vpx_codec_enc_mr_get_mem_loc_fn_t - } -}; diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.h b/thirdparty/libvpx/vp9/vp9_dx_iface.h deleted file mode 100644 index e0e948e16c..0000000000 --- a/thirdparty/libvpx/vp9/vp9_dx_iface.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_VP9_DX_IFACE_H_ -#define VP9_VP9_DX_IFACE_H_ - -#include "vp9/decoder/vp9_decoder.h" - -typedef vpx_codec_stream_info_t vp9_stream_info_t; - -// This limit is due to framebuffer numbers. -// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. -#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. - -typedef struct cache_frame { - int fb_idx; - vpx_image_t img; -} cache_frame; - -struct vpx_codec_alg_priv { - vpx_codec_priv_t base; - vpx_codec_dec_cfg_t cfg; - vp9_stream_info_t si; - int postproc_cfg_set; - vp8_postproc_cfg_t postproc_cfg; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - vpx_image_t img; - int img_avail; - int flushed; - int invert_tile_order; - int last_show_frame; // Index of last output frame. - int byte_alignment; - int skip_loop_filter; - - // Frame parallel related. - int frame_parallel_decode; // frame-based threading. - VPxWorker *frame_workers; - int num_frame_workers; - int next_submit_worker_id; - int last_submit_worker_id; - int next_output_worker_id; - int available_threads; - cache_frame frame_cache[FRAME_CACHE_SIZE]; - int frame_cache_write; - int frame_cache_read; - int num_cache_frames; - int need_resync; // wait for key/intra-only frame - // BufferPool that holds all reference frames. Shared by all the FrameWorkers. - BufferPool *buffer_pool; - - // External frame buffer info to save for VP9 common. - void *ext_priv; // Private data associated with the external frame buffers. - vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; - vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; -}; - -#endif // VP9_VP9_DX_IFACE_H_ diff --git a/thirdparty/libvpx/vp9/vp9_iface_common.h b/thirdparty/libvpx/vp9/vp9_iface_common.h deleted file mode 100644 index 938d4224ba..0000000000 --- a/thirdparty/libvpx/vp9/vp9_iface_common.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VP9_VP9_IFACE_COMMON_H_ -#define VP9_VP9_IFACE_COMMON_H_ - -#include "vpx_ports/mem.h" - -static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, - void *user_priv) { - /** vpx_img_wrap() doesn't allow specifying independent strides for - * the Y, U, and V planes, nor other alignment adjustments that - * might be representable by a YV12_BUFFER_CONFIG, so we just - * initialize all the fields.*/ - int bps; - if (!yv12->subsampling_y) { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I444; - bps = 24; - } else { - img->fmt = VPX_IMG_FMT_I422; - bps = 16; - } - } else { - if (!yv12->subsampling_x) { - img->fmt = VPX_IMG_FMT_I440; - bps = 16; - } else { - img->fmt = VPX_IMG_FMT_I420; - bps = 12; - } - } - img->cs = yv12->color_space; - img->range = yv12->color_range; - img->bit_depth = 8; - img->w = yv12->y_stride; - img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); - img->d_w = yv12->y_crop_width; - img->d_h = yv12->y_crop_height; - img->r_w = yv12->render_width; - img->r_h = yv12->render_height; - img->x_chroma_shift = yv12->subsampling_x; - img->y_chroma_shift = yv12->subsampling_y; - img->planes[VPX_PLANE_Y] = yv12->y_buffer; - img->planes[VPX_PLANE_U] = yv12->u_buffer; - img->planes[VPX_PLANE_V] = yv12->v_buffer; - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = yv12->y_stride; - img->stride[VPX_PLANE_U] = yv12->uv_stride; - img->stride[VPX_PLANE_V] = yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; -#if CONFIG_VP9_HIGHBITDEPTH - if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { - // vpx_image_t uses byte strides and a pointer to the first byte - // of the image. - img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); - img->bit_depth = yv12->bit_depth; - img->planes[VPX_PLANE_Y] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->y_buffer); - img->planes[VPX_PLANE_U] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->u_buffer); - img->planes[VPX_PLANE_V] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->v_buffer); - img->planes[VPX_PLANE_ALPHA] = NULL; - img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; - img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; - img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - img->bps = bps; - img->user_priv = user_priv; - img->img_data = yv12->buffer_alloc; - img->img_data_owner = 0; - img->self_allocd = 0; -} - -static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, - YV12_BUFFER_CONFIG *yv12) { - yv12->y_buffer = img->planes[VPX_PLANE_Y]; - yv12->u_buffer = img->planes[VPX_PLANE_U]; - yv12->v_buffer = img->planes[VPX_PLANE_V]; - - yv12->y_crop_width = img->d_w; - yv12->y_crop_height = img->d_h; - yv12->render_width = img->r_w; - yv12->render_height = img->r_h; - yv12->y_width = img->d_w; - yv12->y_height = img->d_h; - - yv12->uv_width = img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 - : yv12->y_width; - yv12->uv_height = img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 - : yv12->y_height; - yv12->uv_crop_width = yv12->uv_width; - yv12->uv_crop_height = yv12->uv_height; - - yv12->y_stride = img->stride[VPX_PLANE_Y]; - yv12->uv_stride = img->stride[VPX_PLANE_U]; - yv12->color_space = img->cs; - yv12->color_range = img->range; - -#if CONFIG_VP9_HIGHBITDEPTH - if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { - // In vpx_image_t - // planes point to uint8 address of start of data - // stride counts uint8s to reach next row - // In YV12_BUFFER_CONFIG - // y_buffer, u_buffer, v_buffer point to uint16 address of data - // stride and border counts in uint16s - // This means that all the address calculations in the main body of code - // should work correctly. - // However, before we do any pixel operations we need to cast the address - // to a uint16 ponter and double its value. - yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); - yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); - yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); - yv12->y_stride >>= 1; - yv12->uv_stride >>= 1; - yv12->flags = YV12_FLAG_HIGHBITDEPTH; - } else { - yv12->flags = 0; - } - yv12->border = (yv12->y_stride - img->w) / 2; -#else - yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; -#endif // CONFIG_VP9_HIGHBITDEPTH - yv12->subsampling_x = img->x_chroma_shift; - yv12->subsampling_y = img->y_chroma_shift; - return VPX_CODEC_OK; -} - -#endif // VP9_VP9_IFACE_COMMON_H_ diff --git a/thirdparty/libvpx/vp9_rtcd.h b/thirdparty/libvpx/vp9_rtcd.h deleted file mode 100644 index cf2b463d63..0000000000 --- a/thirdparty/libvpx/vp9_rtcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - #include "rtcd/vp9_rtcd_x86.h" -#elif defined(WEBM_ARMASM) && ARCH_ARM - #include "rtcd/vp9_rtcd_arm.h" -#else - #include "rtcd/vp9_rtcd_c.h" -#endif diff --git a/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h b/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h deleted file mode 100644 index 7380fcc7e2..0000000000 --- a/thirdparty/libvpx/vpx/internal/vpx_codec_internal.h +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Describes the decoder algorithm interface for algorithm - * implementations. - * - * This file defines the private structures and data types that are only - * relevant to implementing an algorithm, as opposed to using it. - * - * To create a decoder algorithm class, an interface structure is put - * into the global namespace: - * <pre> - * my_codec.c: - * vpx_codec_iface_t my_codec = { - * "My Codec v1.0", - * VPX_CODEC_ALG_ABI_VERSION, - * ... - * }; - * </pre> - * - * An application instantiates a specific decoder instance by using - * vpx_codec_init() and a pointer to the algorithm's interface structure: - * <pre> - * my_app.c: - * extern vpx_codec_iface_t my_codec; - * { - * vpx_codec_ctx_t algo; - * res = vpx_codec_init(&algo, &my_codec); - * } - * </pre> - * - * Once initialized, the instance is manged using other functions from - * the vpx_codec_* family. - */ -#ifndef VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ -#define VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ -#include "../vpx_decoder.h" -#include "../vpx_encoder.h" -#include <stdarg.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/ - -typedef struct vpx_codec_alg_priv vpx_codec_alg_priv_t; -typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t; - -/*!\brief init function pointer prototype - * - * Performs algorithm-specific initialization of the decoder context. This - * function is called by the generic vpx_codec_init() wrapper function, so - * plugins implementing this interface may trust the input parameters to be - * properly initialized. - * - * \param[in] ctx Pointer to this instance's context - * \retval #VPX_CODEC_OK - * The input stream was recognized and decoder initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory operation failed. - */ -typedef vpx_codec_err_t (*vpx_codec_init_fn_t)(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data); - -/*!\brief destroy function pointer prototype - * - * Performs algorithm-specific destruction of the decoder context. This - * function is called by the generic vpx_codec_destroy() wrapper function, - * so plugins implementing this interface may trust the input parameters - * to be properly initialized. - * - * \param[in] ctx Pointer to this instance's context - * \retval #VPX_CODEC_OK - * The input stream was recognized and decoder initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory operation failed. - */ -typedef vpx_codec_err_t (*vpx_codec_destroy_fn_t)(vpx_codec_alg_priv_t *ctx); - -/*!\brief parse stream info function pointer prototype - * - * Performs high level parsing of the bitstream. This function is called by the - * generic vpx_codec_peek_stream_info() wrapper function, so plugins - * implementing this interface may trust the input parameters to be properly - * initialized. - * - * \param[in] data Pointer to a block of data to parse - * \param[in] data_sz Size of the data buffer - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ -typedef vpx_codec_err_t (*vpx_codec_peek_si_fn_t)(const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si); - -/*!\brief Return information about the current stream. - * - * Returns information about the stream that has been parsed during decoding. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ -typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si); - -/*!\brief control function pointer prototype - * - * This function is used to exchange algorithm specific data with the decoder - * instance. This can be used to implement features specific to a particular - * algorithm. - * - * This function is called by the generic vpx_codec_control() wrapper - * function, so plugins implementing this interface may trust the input - * parameters to be properly initialized. However, this interface does not - * provide type safety for the exchanged data or assign meanings to the - * control codes. Those details should be specified in the algorithm's - * header file. In particular, the ctrl_id parameter is guaranteed to exist - * in the algorithm's control mapping table, and the data parameter may be NULL. - * - * - * \param[in] ctx Pointer to this instance's context - * \param[in] ctrl_id Algorithm specific control identifier - * \param[in,out] data Data to exchange with algorithm instance. - * - * \retval #VPX_CODEC_OK - * The internal state data was deserialized. - */ -typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx, - va_list ap); - -/*!\brief control function pointer mapping - * - * This structure stores the mapping between control identifiers and - * implementing functions. Each algorithm provides a list of these - * mappings. This list is searched by the vpx_codec_control() wrapper - * function to determine which function to invoke. The special - * value {0, NULL} is used to indicate end-of-list, and must be - * present. The special value {0, <non-null>} can be used as a catch-all - * mapping. This implies that ctrl_id values chosen by the algorithm - * \ref MUST be non-zero. - */ -typedef const struct vpx_codec_ctrl_fn_map { - int ctrl_id; - vpx_codec_control_fn_t fn; -} vpx_codec_ctrl_fn_map_t; - -/*!\brief decode data function pointer prototype - * - * Processes a buffer of coded data. If the processing results in a new - * decoded frame becoming available, #VPX_CODEC_CB_PUT_SLICE and - * #VPX_CODEC_CB_PUT_FRAME events are generated as appropriate. This - * function is called by the generic vpx_codec_decode() wrapper function, - * so plugins implementing this interface may trust the input parameters - * to be properly initialized. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] data Pointer to this block of new coded data. If - * NULL, a #VPX_CODEC_CB_PUT_FRAME event is posted - * for the previously decoded frame. - * \param[in] data_sz Size of the coded data, in bytes. - * - * \return Returns #VPX_CODEC_OK if the coded data was processed completely - * and future pictures can be decoded without error. Otherwise, - * see the descriptions of the other error codes in ::vpx_codec_err_t - * for recoverability capabilities. - */ -typedef vpx_codec_err_t (*vpx_codec_decode_fn_t)(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline); - -/*!\brief Decoded frames iterator - * - * Iterates over a list of the frames available for display. The iterator - * storage should be initialized to NULL to start the iteration. Iteration is - * complete when this function returns NULL. - * - * The list of available frames becomes valid upon completion of the - * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode. - * - * \param[in] ctx Pointer to this instance's context - * \param[in out] iter Iterator storage, initialized to NULL - * - * \return Returns a pointer to an image, if one is ready for display. Frames - * produced will always be in PTS (presentation time stamp) order. - */ -typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter); - -/*!\brief Pass in external frame buffers for the decoder to use. - * - * Registers functions to be called when libvpx needs a frame buffer - * to decode the current frame and a function to be called when libvpx does - * not internally reference the frame buffer. This set function must - * be called before the first call to decode or libvpx will assume the - * default behavior of allocating frame buffers internally. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb_get Pointer to the get callback function - * \param[in] cb_release Pointer to the release callback function - * \param[in] cb_priv Callback's private data - * - * \retval #VPX_CODEC_OK - * External frame buffers will be used by libvpx. - * \retval #VPX_CODEC_INVALID_PARAM - * One or more of the callbacks were NULL. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * using external frame buffers. - * - * \note - * When decoding VP9, the application may be required to pass in at least - * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame - * buffers. - */ -typedef vpx_codec_err_t (*vpx_codec_set_fb_fn_t)( - vpx_codec_alg_priv_t *ctx, - vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); - - -typedef vpx_codec_err_t (*vpx_codec_encode_fn_t)(vpx_codec_alg_priv_t *ctx, - const vpx_image_t *img, - vpx_codec_pts_t pts, - unsigned long duration, - vpx_enc_frame_flags_t flags, - unsigned long deadline); -typedef const vpx_codec_cx_pkt_t *(*vpx_codec_get_cx_data_fn_t)(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter); - -typedef vpx_codec_err_t -(*vpx_codec_enc_config_set_fn_t)(vpx_codec_alg_priv_t *ctx, - const vpx_codec_enc_cfg_t *cfg); -typedef vpx_fixed_buf_t * -(*vpx_codec_get_global_headers_fn_t)(vpx_codec_alg_priv_t *ctx); - -typedef vpx_image_t * -(*vpx_codec_get_preview_frame_fn_t)(vpx_codec_alg_priv_t *ctx); - -typedef vpx_codec_err_t -(*vpx_codec_enc_mr_get_mem_loc_fn_t)(const vpx_codec_enc_cfg_t *cfg, - void **mem_loc); - -/*!\brief usage configuration mapping - * - * This structure stores the mapping between usage identifiers and - * configuration structures. Each algorithm provides a list of these - * mappings. This list is searched by the vpx_codec_enc_config_default() - * wrapper function to determine which config to return. The special value - * {-1, {0}} is used to indicate end-of-list, and must be present. At least - * one mapping must be present, in addition to the end-of-list. - * - */ -typedef const struct vpx_codec_enc_cfg_map { - int usage; - vpx_codec_enc_cfg_t cfg; -} vpx_codec_enc_cfg_map_t; - -/*!\brief Decoder algorithm interface interface - * - * All decoders \ref MUST expose a variable of this type. - */ -struct vpx_codec_iface { - const char *name; /**< Identification String */ - int abi_version; /**< Implemented ABI version */ - vpx_codec_caps_t caps; /**< Decoder capabilities */ - vpx_codec_init_fn_t init; /**< \copydoc ::vpx_codec_init_fn_t */ - vpx_codec_destroy_fn_t destroy; /**< \copydoc ::vpx_codec_destroy_fn_t */ - vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */ - struct vpx_codec_dec_iface { - vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */ - vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_get_si_fn_t */ - vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */ - vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */ - vpx_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::vpx_codec_set_fb_fn_t */ - } dec; - struct vpx_codec_enc_iface { - int cfg_map_count; - vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */ - vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */ - vpx_codec_get_cx_data_fn_t get_cx_data; /**< \copydoc ::vpx_codec_get_cx_data_fn_t */ - vpx_codec_enc_config_set_fn_t cfg_set; /**< \copydoc ::vpx_codec_enc_config_set_fn_t */ - vpx_codec_get_global_headers_fn_t get_glob_hdrs; /**< \copydoc ::vpx_codec_get_global_headers_fn_t */ - vpx_codec_get_preview_frame_fn_t get_preview; /**< \copydoc ::vpx_codec_get_preview_frame_fn_t */ - vpx_codec_enc_mr_get_mem_loc_fn_t mr_get_mem_loc; /**< \copydoc ::vpx_codec_enc_mr_get_mem_loc_fn_t */ - } enc; -}; - -/*!\brief Callback function pointer / user data pair storage */ -typedef struct vpx_codec_priv_cb_pair { - union { - vpx_codec_put_frame_cb_fn_t put_frame; - vpx_codec_put_slice_cb_fn_t put_slice; - } u; - void *user_priv; -} vpx_codec_priv_cb_pair_t; - - -/*!\brief Instance private storage - * - * This structure is allocated by the algorithm's init function. It can be - * extended in one of two ways. First, a second, algorithm specific structure - * can be allocated and the priv member pointed to it. Alternatively, this - * structure can be made the first member of the algorithm specific structure, - * and the pointer cast to the proper type. - */ -struct vpx_codec_priv { - const char *err_detail; - vpx_codec_flags_t init_flags; - struct { - vpx_codec_priv_cb_pair_t put_frame_cb; - vpx_codec_priv_cb_pair_t put_slice_cb; - } dec; - struct { - vpx_fixed_buf_t cx_data_dst_buf; - unsigned int cx_data_pad_before; - unsigned int cx_data_pad_after; - vpx_codec_cx_pkt_t cx_data_pkt; - unsigned int total_encoders; - } enc; -}; - -/* - * Multi-resolution encoding internal configuration - */ -struct vpx_codec_priv_enc_mr_cfg -{ - unsigned int mr_total_resolutions; - unsigned int mr_encoder_id; - struct vpx_rational mr_down_sampling_factor; - void* mr_low_res_mode_info; -}; - -#undef VPX_CTRL_USE_TYPE -#define VPX_CTRL_USE_TYPE(id, typ) \ - static VPX_INLINE typ id##__value(va_list args) {return va_arg(args, typ);} - -#undef VPX_CTRL_USE_TYPE_DEPRECATED -#define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ - static VPX_INLINE typ id##__value(va_list args) {return va_arg(args, typ);} - -#define CAST(id, arg) id##__value(arg) - -/* CODEC_INTERFACE convenience macro - * - * By convention, each codec interface is a struct with extern linkage, where - * the symbol is suffixed with _algo. A getter function is also defined to - * return a pointer to the struct, since in some cases it's easier to work - * with text symbols than data symbols (see issue #169). This function has - * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE - * macro is provided to define this getter function automatically. - */ -#define CODEC_INTERFACE(id)\ - vpx_codec_iface_t* id(void) { return &id##_algo; }\ - vpx_codec_iface_t id##_algo - - -/* Internal Utility Functions - * - * The following functions are intended to be used inside algorithms as - * utilities for manipulating vpx_codec_* data structures. - */ -struct vpx_codec_pkt_list { - unsigned int cnt; - unsigned int max; - struct vpx_codec_cx_pkt pkts[1]; -}; - -#define vpx_codec_pkt_list_decl(n)\ - union {struct vpx_codec_pkt_list head;\ - struct {struct vpx_codec_pkt_list head;\ - struct vpx_codec_cx_pkt pkts[n];} alloc;} - -#define vpx_codec_pkt_list_init(m)\ - (m)->alloc.head.cnt = 0,\ - (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0]) - -int -vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *, - const struct vpx_codec_cx_pkt *); - -const vpx_codec_cx_pkt_t * -vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list, - vpx_codec_iter_t *iter); - - -#include <stdio.h> -#include <setjmp.h> - -struct vpx_internal_error_info { - vpx_codec_err_t error_code; - int has_detail; - char detail[80]; - int setjmp; - jmp_buf jmp; -}; - -#define CLANG_ANALYZER_NORETURN -#if defined(__has_feature) -#if __has_feature(attribute_analyzer_noreturn) -#undef CLANG_ANALYZER_NORETURN -#define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn)) -#endif -#endif - -void vpx_internal_error(struct vpx_internal_error_info *info, - vpx_codec_err_t error, - const char *fmt, - ...) CLANG_ANALYZER_NORETURN; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ diff --git a/thirdparty/libvpx/vpx/internal/vpx_psnr.h b/thirdparty/libvpx/vpx/internal/vpx_psnr.h deleted file mode 100644 index 07d81bb8d9..0000000000 --- a/thirdparty/libvpx/vpx/internal/vpx_psnr.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_INTERNAL_VPX_PSNR_H_ -#define VPX_INTERNAL_VPX_PSNR_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t - -/*!\brief Converts SSE to PSNR - * - * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). - * - * \param[in] samples Number of samples - * \param[in] peak Max sample value - * \param[in] sse Sum of squared errors - */ -double vpx_sse_to_psnr(double samples, double peak, double sse); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_INTERNAL_VPX_PSNR_H_ diff --git a/thirdparty/libvpx/vpx/src/vpx_codec.c b/thirdparty/libvpx/vpx/src/vpx_codec.c deleted file mode 100644 index 5a495ce814..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_codec.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Provides the high level interface to wrap decoder algorithms. - * - */ -#include <stdarg.h> -#include <stdlib.h> -#include "vpx/vpx_integer.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx_version.h" - -#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var) - -int vpx_codec_version(void) { - return VERSION_PACKED; -} - - -const char *vpx_codec_version_str(void) { - return VERSION_STRING_NOSP; -} - - -const char *vpx_codec_version_extra_str(void) { - return VERSION_EXTRA; -} - - -const char *vpx_codec_iface_name(vpx_codec_iface_t *iface) { - return iface ? iface->name : "<invalid interface>"; -} - -const char *vpx_codec_err_to_string(vpx_codec_err_t err) { - switch (err) { - case VPX_CODEC_OK: - return "Success"; - case VPX_CODEC_ERROR: - return "Unspecified internal error"; - case VPX_CODEC_MEM_ERROR: - return "Memory allocation error"; - case VPX_CODEC_ABI_MISMATCH: - return "ABI version mismatch"; - case VPX_CODEC_INCAPABLE: - return "Codec does not implement requested capability"; - case VPX_CODEC_UNSUP_BITSTREAM: - return "Bitstream not supported by this decoder"; - case VPX_CODEC_UNSUP_FEATURE: - return "Bitstream required feature not supported by this decoder"; - case VPX_CODEC_CORRUPT_FRAME: - return "Corrupt frame detected"; - case VPX_CODEC_INVALID_PARAM: - return "Invalid parameter"; - case VPX_CODEC_LIST_END: - return "End of iterated list"; - } - - return "Unrecognized error code"; -} - -const char *vpx_codec_error(vpx_codec_ctx_t *ctx) { - return (ctx) ? vpx_codec_err_to_string(ctx->err) - : vpx_codec_err_to_string(VPX_CODEC_INVALID_PARAM); -} - -const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx) { - if (ctx && ctx->err) - return ctx->priv ? ctx->priv->err_detail : ctx->err_detail; - - return NULL; -} - - -vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx) { - vpx_codec_err_t res; - - if (!ctx) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv) - res = VPX_CODEC_ERROR; - else { - ctx->iface->destroy((vpx_codec_alg_priv_t *)ctx->priv); - - ctx->iface = NULL; - ctx->name = NULL; - ctx->priv = NULL; - res = VPX_CODEC_OK; - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface) { - return (iface) ? iface->caps : 0; -} - - -vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, - int ctrl_id, - ...) { - vpx_codec_err_t res; - - if (!ctx || !ctrl_id) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps) - res = VPX_CODEC_ERROR; - else { - vpx_codec_ctrl_fn_map_t *entry; - - res = VPX_CODEC_ERROR; - - for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) { - if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) { - va_list ap; - - va_start(ap, ctrl_id); - res = entry->fn((vpx_codec_alg_priv_t *)ctx->priv, ap); - va_end(ap); - break; - } - } - } - - return SAVE_STATUS(ctx, res); -} - -void vpx_internal_error(struct vpx_internal_error_info *info, - vpx_codec_err_t error, - const char *fmt, - ...) { - va_list ap; - - info->error_code = error; - info->has_detail = 0; - - if (fmt) { - size_t sz = sizeof(info->detail); - - info->has_detail = 1; - va_start(ap, fmt); - vsnprintf(info->detail, sz - 1, fmt, ap); - va_end(ap); - info->detail[sz - 1] = '\0'; - } - - if (info->setjmp) - longjmp(info->jmp, info->error_code); -} diff --git a/thirdparty/libvpx/vpx/src/vpx_decoder.c b/thirdparty/libvpx/vpx/src/vpx_decoder.c deleted file mode 100644 index 802d8edd8a..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_decoder.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Provides the high level interface to wrap decoder algorithms. - * - */ -#include <string.h> -#include "vpx/internal/vpx_codec_internal.h" - -#define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var) - -static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { - return (vpx_codec_alg_priv_t *)ctx->priv; -} - -vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - const vpx_codec_dec_cfg_t *cfg, - vpx_codec_flags_t flags, - int ver) { - vpx_codec_err_t res; - - if (ver != VPX_DECODER_ABI_VERSION) - res = VPX_CODEC_ABI_MISMATCH; - else if (!ctx || !iface) - res = VPX_CODEC_INVALID_PARAM; - else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION) - res = VPX_CODEC_ABI_MISMATCH; - else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC)) - res = VPX_CODEC_INCAPABLE; - else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) && - !(iface->caps & VPX_CODEC_CAP_ERROR_CONCEALMENT)) - res = VPX_CODEC_INCAPABLE; - else if ((flags & VPX_CODEC_USE_INPUT_FRAGMENTS) && - !(iface->caps & VPX_CODEC_CAP_INPUT_FRAGMENTS)) - res = VPX_CODEC_INCAPABLE; - else if (!(iface->caps & VPX_CODEC_CAP_DECODER)) - res = VPX_CODEC_INCAPABLE; - else { - memset(ctx, 0, sizeof(*ctx)); - ctx->iface = iface; - ctx->name = iface->name; - ctx->priv = NULL; - ctx->init_flags = flags; - ctx->config.dec = cfg; - - res = ctx->iface->init(ctx, NULL); - if (res) { - ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; - vpx_codec_destroy(ctx); - } - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, - const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si) { - vpx_codec_err_t res; - - if (!iface || !data || !data_sz || !si - || si->sz < sizeof(vpx_codec_stream_info_t)) - res = VPX_CODEC_INVALID_PARAM; - else { - /* Set default/unknown values */ - si->w = 0; - si->h = 0; - - res = iface->dec.peek_si(data, data_sz, si); - } - - return res; -} - - -vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, - vpx_codec_stream_info_t *si) { - vpx_codec_err_t res; - - if (!ctx || !si || si->sz < sizeof(vpx_codec_stream_info_t)) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv) - res = VPX_CODEC_ERROR; - else { - /* Set default/unknown values */ - si->w = 0; - si->h = 0; - - res = ctx->iface->dec.get_si(get_alg_priv(ctx), si); - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline) { - vpx_codec_err_t res; - - /* Sanity checks */ - /* NULL data ptr allowed if data_sz is 0 too */ - if (!ctx || (!data && data_sz) || (data && !data_sz)) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv) - res = VPX_CODEC_ERROR; - else { - res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv, - deadline); - } - - return SAVE_STATUS(ctx, res); -} - -vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter) { - vpx_image_t *img; - - if (!ctx || !iter || !ctx->iface || !ctx->priv) - img = NULL; - else - img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter); - - return img; -} - - -vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_frame_cb_fn_t cb, - void *user_priv) { - vpx_codec_err_t res; - - if (!ctx || !cb) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv - || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME)) - res = VPX_CODEC_ERROR; - else { - ctx->priv->dec.put_frame_cb.u.put_frame = cb; - ctx->priv->dec.put_frame_cb.user_priv = user_priv; - res = VPX_CODEC_OK; - } - - return SAVE_STATUS(ctx, res); -} - - -vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_slice_cb_fn_t cb, - void *user_priv) { - vpx_codec_err_t res; - - if (!ctx || !cb) - res = VPX_CODEC_INVALID_PARAM; - else if (!ctx->iface || !ctx->priv - || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE)) - res = VPX_CODEC_ERROR; - else { - ctx->priv->dec.put_slice_cb.u.put_slice = cb; - ctx->priv->dec.put_slice_cb.user_priv = user_priv; - res = VPX_CODEC_OK; - } - - return SAVE_STATUS(ctx, res); -} - -vpx_codec_err_t vpx_codec_set_frame_buffer_functions( - vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { - vpx_codec_err_t res; - - if (!ctx || !cb_get || !cb_release) { - res = VPX_CODEC_INVALID_PARAM; - } else if (!ctx->iface || !ctx->priv || - !(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) { - res = VPX_CODEC_ERROR; - } else { - res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release, - cb_priv); - } - - return SAVE_STATUS(ctx, res); -} diff --git a/thirdparty/libvpx/vpx/src/vpx_image.c b/thirdparty/libvpx/vpx/src/vpx_image.c deleted file mode 100644 index 9aae12c794..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_image.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> -#include <string.h> - -#include "vpx/vpx_image.h" -#include "vpx/vpx_integer.h" -#include "vpx_mem/vpx_mem.h" - -static vpx_image_t *img_alloc_helper(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int buf_align, - unsigned int stride_align, - unsigned char *img_data) { - unsigned int h, w, s, xcs, ycs, bps; - unsigned int stride_in_bytes; - int align; - - /* Treat align==0 like align==1 */ - if (!buf_align) - buf_align = 1; - - /* Validate alignment (must be power of 2) */ - if (buf_align & (buf_align - 1)) - goto fail; - - /* Treat align==0 like align==1 */ - if (!stride_align) - stride_align = 1; - - /* Validate alignment (must be power of 2) */ - if (stride_align & (stride_align - 1)) - goto fail; - - /* Get sample size for this format */ - switch (fmt) { - case VPX_IMG_FMT_RGB32: - case VPX_IMG_FMT_RGB32_LE: - case VPX_IMG_FMT_ARGB: - case VPX_IMG_FMT_ARGB_LE: - bps = 32; - break; - case VPX_IMG_FMT_RGB24: - case VPX_IMG_FMT_BGR24: - bps = 24; - break; - case VPX_IMG_FMT_RGB565: - case VPX_IMG_FMT_RGB565_LE: - case VPX_IMG_FMT_RGB555: - case VPX_IMG_FMT_RGB555_LE: - case VPX_IMG_FMT_UYVY: - case VPX_IMG_FMT_YUY2: - case VPX_IMG_FMT_YVYU: - bps = 16; - break; - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: - bps = 12; - break; - case VPX_IMG_FMT_I422: - case VPX_IMG_FMT_I440: - bps = 16; - break; - case VPX_IMG_FMT_I444: - bps = 24; - break; - case VPX_IMG_FMT_I42016: - bps = 24; - break; - case VPX_IMG_FMT_I42216: - case VPX_IMG_FMT_I44016: - bps = 32; - break; - case VPX_IMG_FMT_I44416: - bps = 48; - break; - default: - bps = 16; - break; - } - - /* Get chroma shift values for this format */ - switch (fmt) { - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: - case VPX_IMG_FMT_I422: - case VPX_IMG_FMT_I42016: - case VPX_IMG_FMT_I42216: - xcs = 1; - break; - default: - xcs = 0; - break; - } - - switch (fmt) { - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_I440: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: - case VPX_IMG_FMT_I42016: - case VPX_IMG_FMT_I44016: - ycs = 1; - break; - default: - ycs = 0; - break; - } - - /* Calculate storage sizes given the chroma subsampling */ - align = (1 << xcs) - 1; - w = (d_w + align) & ~align; - align = (1 << ycs) - 1; - h = (d_h + align) & ~align; - s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8; - s = (s + stride_align - 1) & ~(stride_align - 1); - stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s; - - /* Allocate the new image */ - if (!img) { - img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t)); - - if (!img) - goto fail; - - img->self_allocd = 1; - } else { - memset(img, 0, sizeof(vpx_image_t)); - } - - img->img_data = img_data; - - if (!img_data) { - const uint64_t alloc_size = (fmt & VPX_IMG_FMT_PLANAR) ? - (uint64_t)h * s * bps / 8 : (uint64_t)h * s; - - if (alloc_size != (size_t)alloc_size) - goto fail; - - img->img_data = (uint8_t *)vpx_memalign(buf_align, (size_t)alloc_size); - img->img_data_owner = 1; - } - - if (!img->img_data) - goto fail; - - img->fmt = fmt; - img->bit_depth = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 16 : 8; - img->w = w; - img->h = h; - img->x_chroma_shift = xcs; - img->y_chroma_shift = ycs; - img->bps = bps; - - /* Calculate strides */ - img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes; - img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs; - - /* Default viewport to entire image */ - if (!vpx_img_set_rect(img, 0, 0, d_w, d_h)) - return img; - -fail: - vpx_img_free(img); - return NULL; -} - -vpx_image_t *vpx_img_alloc(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int align) { - return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL); -} - -vpx_image_t *vpx_img_wrap(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int stride_align, - unsigned char *img_data) { - /* By setting buf_align = 1, we don't change buffer alignment in this - * function. */ - return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data); -} - -int vpx_img_set_rect(vpx_image_t *img, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h) { - unsigned char *data; - - if (x + w <= img->w && y + h <= img->h) { - img->d_w = w; - img->d_h = h; - - /* Calculate plane pointers */ - if (!(img->fmt & VPX_IMG_FMT_PLANAR)) { - img->planes[VPX_PLANE_PACKED] = - img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED]; - } else { - const int bytes_per_sample = - (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; - data = img->img_data; - - if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) { - img->planes[VPX_PLANE_ALPHA] = - data + x * bytes_per_sample + y * img->stride[VPX_PLANE_ALPHA]; - data += img->h * img->stride[VPX_PLANE_ALPHA]; - } - - img->planes[VPX_PLANE_Y] = data + x * bytes_per_sample + - y * img->stride[VPX_PLANE_Y]; - data += img->h * img->stride[VPX_PLANE_Y]; - - if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) { - img->planes[VPX_PLANE_U] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; - data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; - img->planes[VPX_PLANE_V] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; - } else { - img->planes[VPX_PLANE_V] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; - data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; - img->planes[VPX_PLANE_U] = - data + (x >> img->x_chroma_shift) * bytes_per_sample + - (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; - } - } - return 0; - } - return -1; -} - -void vpx_img_flip(vpx_image_t *img) { - /* Note: In the calculation pointer adjustment calculation, we want the - * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99 - * standard indicates that if the adjustment parameter is unsigned, the - * stride parameter will be promoted to unsigned, causing errors when - * the lhs is a larger type than the rhs. - */ - img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y]; - img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y]; - - img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) - * img->stride[VPX_PLANE_U]; - img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U]; - - img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) - * img->stride[VPX_PLANE_V]; - img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V]; - - img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA]; - img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA]; -} - -void vpx_img_free(vpx_image_t *img) { - if (img) { - if (img->img_data && img->img_data_owner) - vpx_free(img->img_data); - - if (img->self_allocd) - free(img); - } -} diff --git a/thirdparty/libvpx/vpx/src/vpx_psnr.c b/thirdparty/libvpx/vpx/src/vpx_psnr.c deleted file mode 100644 index 05843acb61..0000000000 --- a/thirdparty/libvpx/vpx/src/vpx_psnr.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <math.h> - -#include "vpx/internal/vpx_psnr.h" - -#define MAX_PSNR 100.0 - -double vpx_sse_to_psnr(double samples, double peak, double sse) { - if (sse > 0.0) { - const double psnr = 10.0 * log10(samples * peak * peak / sse); - return psnr > MAX_PSNR ? MAX_PSNR : psnr; - } else { - return MAX_PSNR; - } -} diff --git a/thirdparty/libvpx/vpx/vp8.h b/thirdparty/libvpx/vpx/vp8.h deleted file mode 100644 index 8a035f9770..0000000000 --- a/thirdparty/libvpx/vpx/vp8.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/*!\defgroup vp8 VP8 - * \ingroup codecs - * VP8 is vpx's newest video compression algorithm that uses motion - * compensated prediction, Discrete Cosine Transform (DCT) coding of the - * prediction error signal and context dependent entropy coding techniques - * based on arithmetic principles. It features: - * - YUV 4:2:0 image format - * - Macro-block based coding (16x16 luma plus two 8x8 chroma) - * - 1/4 (1/8) pixel accuracy motion compensated prediction - * - 4x4 DCT transform - * - 128 level linear quantizer - * - In loop deblocking filter - * - Context-based entropy coding - * - * @{ - */ -/*!\file - * \brief Provides controls common to both the VP8 encoder and decoder. - */ -#ifndef VPX_VP8_H_ -#define VPX_VP8_H_ - -#include "./vpx_codec.h" -#include "./vpx_image.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*!\brief Control functions - * - * The set of macros define the control functions of VP8 interface - */ -enum vp8_com_control_id { - VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ - VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ - VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ - VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */ - VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */ - VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */ - VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */ - - /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+) - * for its control ids. These should be migrated to something like the - * VP8_DECODER_CTRL_ID_START range next time we're ready to break the ABI. - */ - VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */ - VP8_COMMON_CTRL_ID_MAX, - VP8_DECODER_CTRL_ID_START = 256 -}; - -/*!\brief post process flags - * - * The set of macros define VP8 decoder post processing flags - */ -enum vp8_postproc_level { - VP8_NOFILTERING = 0, - VP8_DEBLOCK = 1 << 0, - VP8_DEMACROBLOCK = 1 << 1, - VP8_ADDNOISE = 1 << 2, - VP8_DEBUG_TXT_FRAME_INFO = 1 << 3, /**< print frame information */ - VP8_DEBUG_TXT_MBLK_MODES = 1 << 4, /**< print macro block modes over each macro block */ - VP8_DEBUG_TXT_DC_DIFF = 1 << 5, /**< print dc diff for each macro block */ - VP8_DEBUG_TXT_RATE_INFO = 1 << 6, /**< print video rate info (encoder only) */ - VP8_MFQE = 1 << 10 -}; - -/*!\brief post process flags - * - * This define a structure that describe the post processing settings. For - * the best objective measure (using the PSNR metric) set post_proc_flag - * to VP8_DEBLOCK and deblocking_level to 1. - */ - -typedef struct vp8_postproc_cfg { - int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ - int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ - int noise_level; /**< the strength of additive noise, valid range [0, 16] */ -} vp8_postproc_cfg_t; - -/*!\brief reference frame type - * - * The set of macros define the type of VP8 reference frames - */ -typedef enum vpx_ref_frame_type { - VP8_LAST_FRAME = 1, - VP8_GOLD_FRAME = 2, - VP8_ALTR_FRAME = 4 -} vpx_ref_frame_type_t; - -/*!\brief reference frame data struct - * - * Define the data struct to access vp8 reference frames. - */ -typedef struct vpx_ref_frame { - vpx_ref_frame_type_t frame_type; /**< which reference frame */ - vpx_image_t img; /**< reference frame data in image format */ -} vpx_ref_frame_t; - -/*!\brief VP9 specific reference frame data struct - * - * Define the data struct to access vp9 reference frames. - */ -typedef struct vp9_ref_frame { - int idx; /**< frame index to get (input) */ - vpx_image_t img; /**< img structure to populate (output) */ -} vp9_ref_frame_t; - -/*!\cond */ -/*!\brief vp8 decoder control function parameter type - * - * defines the data type for each of VP8 decoder control function requires - */ -VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) -#define VPX_CTRL_VP8_SET_REFERENCE -VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) -#define VPX_CTRL_VP8_COPY_REFERENCE -VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) -#define VPX_CTRL_VP8_SET_POSTPROC -VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_REF_FRAME -VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_MB_MODES -VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) -#define VPX_CTRL_VP8_SET_DBG_COLOR_B_MODES -VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) -#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV -VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) -#define VPX_CTRL_VP9_GET_REFERENCE - -/*!\endcond */ -/*! @} - end defgroup vp8 */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VP8_H_ diff --git a/thirdparty/libvpx/vpx/vp8dx.h b/thirdparty/libvpx/vpx/vp8dx.h deleted file mode 100644 index 67c97bb6c9..0000000000 --- a/thirdparty/libvpx/vpx/vp8dx.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\defgroup vp8_decoder WebM VP8/VP9 Decoder - * \ingroup vp8 - * - * @{ - */ -/*!\file - * \brief Provides definitions for using VP8 or VP9 within the vpx Decoder - * interface. - */ -#ifndef VPX_VP8DX_H_ -#define VPX_VP8DX_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Include controls common to both the encoder and decoder */ -#include "./vp8.h" - -/*!\name Algorithm interface for VP8 - * - * This interface provides the capability to decode VP8 streams. - * @{ - */ -extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; -extern vpx_codec_iface_t *vpx_codec_vp8_dx(void); -/*!@} - end algorithm interface member group*/ - -/*!\name Algorithm interface for VP9 - * - * This interface provides the capability to decode VP9 streams. - * @{ - */ -extern vpx_codec_iface_t vpx_codec_vp9_dx_algo; -extern vpx_codec_iface_t *vpx_codec_vp9_dx(void); -/*!@} - end algorithm interface member group*/ - -/*!\enum vp8_dec_control_id - * \brief VP8 decoder control functions - * - * This set of macros define the control functions available for the VP8 - * decoder interface. - * - * \sa #vpx_codec_control - */ -enum vp8_dec_control_id { - /** control function to get info on which reference frames were updated - * by the last decode - */ - VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START, - - /** check if the indicated frame is corrupted */ - VP8D_GET_FRAME_CORRUPTED, - - /** control function to get info on which reference frames were used - * by the last decode - */ - VP8D_GET_LAST_REF_USED, - - /** decryption function to decrypt encoded buffer data immediately - * before decoding. Takes a vpx_decrypt_init, which contains - * a callback function and opaque context pointer. - */ - VPXD_SET_DECRYPTOR, - VP8D_SET_DECRYPTOR = VPXD_SET_DECRYPTOR, - - /** control function to get the dimensions that the current frame is decoded - * at. This may be different to the intended display size for the frame as - * specified in the wrapper or frame header (see VP9D_GET_DISPLAY_SIZE). */ - VP9D_GET_FRAME_SIZE, - - /** control function to get the current frame's intended display dimensions - * (as specified in the wrapper or frame header). This may be different to - * the decoded dimensions of this frame (see VP9D_GET_FRAME_SIZE). */ - VP9D_GET_DISPLAY_SIZE, - - /** control function to get the bit depth of the stream. */ - VP9D_GET_BIT_DEPTH, - - /** control function to set the byte alignment of the planes in the reference - * buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets - * legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly - * follows Y plane, and V plane directly follows U plane. Default value is 0. - */ - VP9_SET_BYTE_ALIGNMENT, - - /** control function to invert the decoding order to from right to left. The - * function is used in a test to confirm the decoding independence of tile - * columns. The function may be used in application where this order - * of decoding is desired. - * - * TODO(yaowu): Rework the unit test that uses this control, and in a future - * release, this test-only control shall be removed. - */ - VP9_INVERT_TILE_DECODE_ORDER, - - /** control function to set the skip loop filter flag. Valid values are - * integers. The decoder will skip the loop filter when its value is set to - * nonzero. If the loop filter is skipped the decoder may accumulate decode - * artifacts. The default value is 0. - */ - VP9_SET_SKIP_LOOP_FILTER, - - VP8_DECODER_CTRL_ID_MAX -}; - -/** Decrypt n bytes of data from input -> output, using the decrypt_state - * passed in VPXD_SET_DECRYPTOR. - */ -typedef void (*vpx_decrypt_cb)(void *decrypt_state, const unsigned char *input, - unsigned char *output, int count); - -/*!\brief Structure to hold decryption state - * - * Defines a structure to hold the decryption state and access function. - */ -typedef struct vpx_decrypt_init { - /*! Decrypt callback. */ - vpx_decrypt_cb decrypt_cb; - - /*! Decryption state. */ - void *decrypt_state; -} vpx_decrypt_init; - -/*!\brief A deprecated alias for vpx_decrypt_init. - */ -typedef vpx_decrypt_init vp8_decrypt_init; - - -/*!\cond */ -/*!\brief VP8 decoder control function parameter type - * - * Defines the data types that VP8D control functions take. Note that - * additional common controls are defined in vp8.h - * - */ - - -VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) -#define VPX_CTRL_VP8D_GET_LAST_REF_UPDATES -VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) -#define VPX_CTRL_VP8D_GET_FRAME_CORRUPTED -VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *) -#define VPX_CTRL_VP8D_GET_LAST_REF_USED -VPX_CTRL_USE_TYPE(VPXD_SET_DECRYPTOR, vpx_decrypt_init *) -#define VPX_CTRL_VPXD_SET_DECRYPTOR -VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR, vpx_decrypt_init *) -#define VPX_CTRL_VP8D_SET_DECRYPTOR -VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE, int *) -#define VPX_CTRL_VP9D_GET_DISPLAY_SIZE -VPX_CTRL_USE_TYPE(VP9D_GET_BIT_DEPTH, unsigned int *) -#define VPX_CTRL_VP9D_GET_BIT_DEPTH -VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *) -#define VPX_CTRL_VP9D_GET_FRAME_SIZE -VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int) -#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER - -/*!\endcond */ -/*! @} - end defgroup vp8_decoder */ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VP8DX_H_ diff --git a/thirdparty/libvpx/vpx/vpx_codec.h b/thirdparty/libvpx/vpx/vpx_codec.h deleted file mode 100644 index b6037bb4d7..0000000000 --- a/thirdparty/libvpx/vpx/vpx_codec.h +++ /dev/null @@ -1,479 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\defgroup codec Common Algorithm Interface - * This abstraction allows applications to easily support multiple video - * formats with minimal code duplication. This section describes the interface - * common to all codecs (both encoders and decoders). - * @{ - */ - -/*!\file - * \brief Describes the codec algorithm interface to applications. - * - * This file describes the interface between an application and a - * video codec algorithm. - * - * An application instantiates a specific codec instance by using - * vpx_codec_init() and a pointer to the algorithm's interface structure: - * <pre> - * my_app.c: - * extern vpx_codec_iface_t my_codec; - * { - * vpx_codec_ctx_t algo; - * res = vpx_codec_init(&algo, &my_codec); - * } - * </pre> - * - * Once initialized, the instance is manged using other functions from - * the vpx_codec_* family. - */ -#ifndef VPX_VPX_CODEC_H_ -#define VPX_VPX_CODEC_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_integer.h" -#include "./vpx_image.h" - - /*!\brief Decorator indicating a function is deprecated */ -#ifndef DEPRECATED -#if defined(__GNUC__) && __GNUC__ -#define DEPRECATED __attribute__ ((deprecated)) -#elif defined(_MSC_VER) -#define DEPRECATED -#else -#define DEPRECATED -#endif -#endif /* DEPRECATED */ - -#ifndef DECLSPEC_DEPRECATED -#if defined(__GNUC__) && __GNUC__ -#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ -#elif defined(_MSC_VER) -#define DECLSPEC_DEPRECATED __declspec(deprecated) /**< \copydoc #DEPRECATED */ -#else -#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */ -#endif -#endif /* DECLSPEC_DEPRECATED */ - - /*!\brief Decorator indicating a function is potentially unused */ -#ifdef UNUSED -#elif defined(__GNUC__) || defined(__clang__) -#define UNUSED __attribute__ ((unused)) -#else -#define UNUSED -#endif - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_CODEC_ABI_VERSION (3 + VPX_IMAGE_ABI_VERSION) /**<\hideinitializer*/ - - /*!\brief Algorithm return codes */ - typedef enum { - /*!\brief Operation completed without error */ - VPX_CODEC_OK, - - /*!\brief Unspecified error */ - VPX_CODEC_ERROR, - - /*!\brief Memory operation failed */ - VPX_CODEC_MEM_ERROR, - - /*!\brief ABI version mismatch */ - VPX_CODEC_ABI_MISMATCH, - - /*!\brief Algorithm does not have required capability */ - VPX_CODEC_INCAPABLE, - - /*!\brief The given bitstream is not supported. - * - * The bitstream was unable to be parsed at the highest level. The decoder - * is unable to proceed. This error \ref SHOULD be treated as fatal to the - * stream. */ - VPX_CODEC_UNSUP_BITSTREAM, - - /*!\brief Encoded bitstream uses an unsupported feature - * - * The decoder does not implement a feature required by the encoder. This - * return code should only be used for features that prevent future - * pictures from being properly decoded. This error \ref MAY be treated as - * fatal to the stream or \ref MAY be treated as fatal to the current GOP. - */ - VPX_CODEC_UNSUP_FEATURE, - - /*!\brief The coded data for this stream is corrupt or incomplete - * - * There was a problem decoding the current frame. This return code - * should only be used for failures that prevent future pictures from - * being properly decoded. This error \ref MAY be treated as fatal to the - * stream or \ref MAY be treated as fatal to the current GOP. If decoding - * is continued for the current GOP, artifacts may be present. - */ - VPX_CODEC_CORRUPT_FRAME, - - /*!\brief An application-supplied parameter is not valid. - * - */ - VPX_CODEC_INVALID_PARAM, - - /*!\brief An iterator reached the end of list. - * - */ - VPX_CODEC_LIST_END - - } - vpx_codec_err_t; - - - /*! \brief Codec capabilities bitfield - * - * Each codec advertises the capabilities it supports as part of its - * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces - * or functionality, and are not required to be supported. - * - * The available flags are specified by VPX_CODEC_CAP_* defines. - */ - typedef long vpx_codec_caps_t; -#define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */ -#define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */ - - - /*! \brief Initialization-time Feature Enabling - * - * Certain codec features must be known at initialization time, to allow for - * proper memory allocation. - * - * The available flags are specified by VPX_CODEC_USE_* defines. - */ - typedef long vpx_codec_flags_t; - - - /*!\brief Codec interface structure. - * - * Contains function pointers and other data private to the codec - * implementation. This structure is opaque to the application. - */ - typedef const struct vpx_codec_iface vpx_codec_iface_t; - - - /*!\brief Codec private data structure. - * - * Contains data private to the codec implementation. This structure is opaque - * to the application. - */ - typedef struct vpx_codec_priv vpx_codec_priv_t; - - - /*!\brief Iterator - * - * Opaque storage used for iterating over lists. - */ - typedef const void *vpx_codec_iter_t; - - - /*!\brief Codec context structure - * - * All codecs \ref MUST support this context structure fully. In general, - * this data should be considered private to the codec algorithm, and - * not be manipulated or examined by the calling application. Applications - * may reference the 'name' member to get a printable description of the - * algorithm. - */ - typedef struct vpx_codec_ctx { - const char *name; /**< Printable interface name */ - vpx_codec_iface_t *iface; /**< Interface pointers */ - vpx_codec_err_t err; /**< Last returned error */ - const char *err_detail; /**< Detailed info, if available */ - vpx_codec_flags_t init_flags; /**< Flags passed at init time */ - union { - /**< Decoder Configuration Pointer */ - const struct vpx_codec_dec_cfg *dec; - /**< Encoder Configuration Pointer */ - const struct vpx_codec_enc_cfg *enc; - const void *raw; - } config; /**< Configuration pointer aliasing union */ - vpx_codec_priv_t *priv; /**< Algorithm private storage */ - } vpx_codec_ctx_t; - - /*!\brief Bit depth for codec - * * - * This enumeration determines the bit depth of the codec. - */ - typedef enum vpx_bit_depth { - VPX_BITS_8 = 8, /**< 8 bits */ - VPX_BITS_10 = 10, /**< 10 bits */ - VPX_BITS_12 = 12, /**< 12 bits */ - } vpx_bit_depth_t; - - /* - * Library Version Number Interface - * - * For example, see the following sample return values: - * vpx_codec_version() (1<<16 | 2<<8 | 3) - * vpx_codec_version_str() "v1.2.3-rc1-16-gec6a1ba" - * vpx_codec_version_extra_str() "rc1-16-gec6a1ba" - */ - - /*!\brief Return the version information (as an integer) - * - * Returns a packed encoding of the library version number. This will only include - * the major.minor.patch component of the version number. Note that this encoded - * value should be accessed through the macros provided, as the encoding may change - * in the future. - * - */ - int vpx_codec_version(void); -#define VPX_VERSION_MAJOR(v) ((v>>16)&0xff) /**< extract major from packed version */ -#define VPX_VERSION_MINOR(v) ((v>>8)&0xff) /**< extract minor from packed version */ -#define VPX_VERSION_PATCH(v) ((v>>0)&0xff) /**< extract patch from packed version */ - - /*!\brief Return the version major number */ -#define vpx_codec_version_major() ((vpx_codec_version()>>16)&0xff) - - /*!\brief Return the version minor number */ -#define vpx_codec_version_minor() ((vpx_codec_version()>>8)&0xff) - - /*!\brief Return the version patch number */ -#define vpx_codec_version_patch() ((vpx_codec_version()>>0)&0xff) - - - /*!\brief Return the version information (as a string) - * - * Returns a printable string containing the full library version number. This may - * contain additional text following the three digit version number, as to indicate - * release candidates, prerelease versions, etc. - * - */ - const char *vpx_codec_version_str(void); - - - /*!\brief Return the version information (as a string) - * - * Returns a printable "extra string". This is the component of the string returned - * by vpx_codec_version_str() following the three digit version number. - * - */ - const char *vpx_codec_version_extra_str(void); - - - /*!\brief Return the build configuration - * - * Returns a printable string containing an encoded version of the build - * configuration. This may be useful to vpx support. - * - */ - const char *vpx_codec_build_config(void); - - - /*!\brief Return the name for a given interface - * - * Returns a human readable string for name of the given codec interface. - * - * \param[in] iface Interface pointer - * - */ - const char *vpx_codec_iface_name(vpx_codec_iface_t *iface); - - - /*!\brief Convert error number to printable string - * - * Returns a human readable string for the last error returned by the - * algorithm. The returned error will be one line and will not contain - * any newline characters. - * - * - * \param[in] err Error number. - * - */ - const char *vpx_codec_err_to_string(vpx_codec_err_t err); - - - /*!\brief Retrieve error synopsis for codec context - * - * Returns a human readable string for the last error returned by the - * algorithm. The returned error will be one line and will not contain - * any newline characters. - * - * - * \param[in] ctx Pointer to this instance's context. - * - */ - const char *vpx_codec_error(vpx_codec_ctx_t *ctx); - - - /*!\brief Retrieve detailed error information for codec context - * - * Returns a human readable string providing detailed information about - * the last error. - * - * \param[in] ctx Pointer to this instance's context. - * - * \retval NULL - * No detailed information is available. - */ - const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx); - - - /* REQUIRED FUNCTIONS - * - * The following functions are required to be implemented for all codecs. - * They represent the base case functionality expected of all codecs. - */ - - /*!\brief Destroy a codec instance - * - * Destroys a codec context, freeing any associated memory buffers. - * - * \param[in] ctx Pointer to this instance's context - * - * \retval #VPX_CODEC_OK - * The codec algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx); - - - /*!\brief Get the capabilities of an algorithm. - * - * Retrieves the capabilities bitfield from the algorithm's interface. - * - * \param[in] iface Pointer to the algorithm interface - * - */ - vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface); - - - /*!\brief Control algorithm - * - * This function is used to exchange algorithm specific data with the codec - * instance. This can be used to implement features specific to a particular - * algorithm. - * - * This wrapper function dispatches the request to the helper function - * associated with the given ctrl_id. It tries to call this function - * transparently, but will return #VPX_CODEC_ERROR if the request could not - * be dispatched. - * - * Note that this function should not be used directly. Call the - * #vpx_codec_control wrapper macro instead. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] ctrl_id Algorithm specific control identifier - * - * \retval #VPX_CODEC_OK - * The control request was processed. - * \retval #VPX_CODEC_ERROR - * The control request was not processed. - * \retval #VPX_CODEC_INVALID_PARAM - * The data was not valid. - */ - vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, - int ctrl_id, - ...); -#if defined(VPX_DISABLE_CTRL_TYPECHECKS) && VPX_DISABLE_CTRL_TYPECHECKS -# define vpx_codec_control(ctx,id,data) vpx_codec_control_(ctx,id,data) -# define VPX_CTRL_USE_TYPE(id, typ) -# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) -# define VPX_CTRL_VOID(id, typ) - -#else - /*!\brief vpx_codec_control wrapper macro - * - * This macro allows for type safe conversions across the variadic parameter - * to vpx_codec_control_(). - * - * \internal - * It works by dispatching the call to the control function through a wrapper - * function named with the id parameter. - */ -# define vpx_codec_control(ctx,id,data) vpx_codec_control_##id(ctx,id,data)\ - /**<\hideinitializer*/ - - - /*!\brief vpx_codec_control type definition macro - * - * This macro allows for type safe conversions across the variadic parameter - * to vpx_codec_control_(). It defines the type of the argument for a given - * control identifier. - * - * \internal - * It defines a static function with - * the correctly typed arguments as a wrapper to the type-unsafe internal - * function. - */ -# define VPX_CTRL_USE_TYPE(id, typ) \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) UNUSED;\ - \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\ - return vpx_codec_control_(ctx, ctrl_id, data);\ - } /**<\hideinitializer*/ - - - /*!\brief vpx_codec_control deprecated type definition macro - * - * Like #VPX_CTRL_USE_TYPE, but indicates that the specified control is - * deprecated and should not be used. Consult the documentation for your - * codec for more information. - * - * \internal - * It defines a static function with the correctly typed arguments as a - * wrapper to the type-unsafe internal function. - */ -# define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ - DECLSPEC_DEPRECATED static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t*, int, typ) DEPRECATED UNUSED;\ - \ - DECLSPEC_DEPRECATED static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id, typ data) {\ - return vpx_codec_control_(ctx, ctrl_id, data);\ - } /**<\hideinitializer*/ - - - /*!\brief vpx_codec_control void type definition macro - * - * This macro allows for type safe conversions across the variadic parameter - * to vpx_codec_control_(). It indicates that a given control identifier takes - * no argument. - * - * \internal - * It defines a static function without a data argument as a wrapper to the - * type-unsafe internal function. - */ -# define VPX_CTRL_VOID(id) \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t*, int) UNUSED;\ - \ - static vpx_codec_err_t \ - vpx_codec_control_##id(vpx_codec_ctx_t *ctx, int ctrl_id) {\ - return vpx_codec_control_(ctx, ctrl_id);\ - } /**<\hideinitializer*/ - - -#endif - - /*!@} - end defgroup codec*/ -#ifdef __cplusplus -} -#endif -#endif // VPX_VPX_CODEC_H_ - diff --git a/thirdparty/libvpx/vpx/vpx_decoder.h b/thirdparty/libvpx/vpx/vpx_decoder.h deleted file mode 100644 index 62fd919756..0000000000 --- a/thirdparty/libvpx/vpx/vpx_decoder.h +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_VPX_DECODER_H_ -#define VPX_VPX_DECODER_H_ - -/*!\defgroup decoder Decoder Algorithm Interface - * \ingroup codec - * This abstraction allows applications using this decoder to easily support - * multiple video formats with minimal code duplication. This section describes - * the interface common to all decoders. - * @{ - */ - -/*!\file - * \brief Describes the decoder algorithm interface to applications. - * - * This file describes the interface between an application and a - * video decoder algorithm. - * - */ -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_codec.h" -#include "./vpx_frame_buffer.h" - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_DECODER_ABI_VERSION (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ - - /*! \brief Decoder capabilities bitfield - * - * Each decoder advertises the capabilities it supports as part of its - * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces - * or functionality, and are not required to be supported by a decoder. - * - * The available flags are specified by VPX_CODEC_CAP_* defines. - */ -#define VPX_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */ -#define VPX_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */ -#define VPX_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */ -#define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000 /**< Can conceal errors due to - packet loss */ -#define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000 /**< Can receive encoded frames - one fragment at a time */ - - /*! \brief Initialization-time Feature Enabling - * - * Certain codec features must be known at initialization time, to allow for - * proper memory allocation. - * - * The available flags are specified by VPX_CODEC_USE_* defines. - */ -#define VPX_CODEC_CAP_FRAME_THREADING 0x200000 /**< Can support frame-based - multi-threading */ -#define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000 /**< Can support external - frame buffers */ - -#define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */ -#define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /**< Conceal errors in decoded - frames */ -#define VPX_CODEC_USE_INPUT_FRAGMENTS 0x40000 /**< The input frame should be - passed to the decoder one - fragment at a time */ -#define VPX_CODEC_USE_FRAME_THREADING 0x80000 /**< Enable frame-based - multi-threading */ - - /*!\brief Stream properties - * - * This structure is used to query or set properties of the decoded - * stream. Algorithms may extend this structure with data specific - * to their bitstream by setting the sz member appropriately. - */ - typedef struct vpx_codec_stream_info { - unsigned int sz; /**< Size of this structure */ - unsigned int w; /**< Width (or 0 for unknown/default) */ - unsigned int h; /**< Height (or 0 for unknown/default) */ - unsigned int is_kf; /**< Current frame is a keyframe */ - } vpx_codec_stream_info_t; - - /* REQUIRED FUNCTIONS - * - * The following functions are required to be implemented for all decoders. - * They represent the base case functionality expected of all decoders. - */ - - - /*!\brief Initialization Configurations - * - * This structure is used to pass init time configuration options to the - * decoder. - */ - typedef struct vpx_codec_dec_cfg { - unsigned int threads; /**< Maximum number of threads to use, default 1 */ - unsigned int w; /**< Width */ - unsigned int h; /**< Height */ - } vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */ - - - /*!\brief Initialize a decoder instance - * - * Initializes a decoder context using the given interface. Applications - * should call the vpx_codec_dec_init convenience macro instead of this - * function directly, to ensure that the ABI version number parameter - * is properly initialized. - * - * If the library was configured with --disable-multithread, this call - * is not thread safe and should be guarded with a lock if being used - * in a multithreaded context. - * - * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the algorithm interface to use. - * \param[in] cfg Configuration to use, if known. May be NULL. - * \param[in] flags Bitfield of VPX_CODEC_USE_* flags - * \param[in] ver ABI version number. Must be set to - * VPX_DECODER_ABI_VERSION - * \retval #VPX_CODEC_OK - * The decoder algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - const vpx_codec_dec_cfg_t *cfg, - vpx_codec_flags_t flags, - int ver); - - /*!\brief Convenience macro for vpx_codec_dec_init_ver() - * - * Ensures the ABI version parameter is properly set. - */ -#define vpx_codec_dec_init(ctx, iface, cfg, flags) \ - vpx_codec_dec_init_ver(ctx, iface, cfg, flags, VPX_DECODER_ABI_VERSION) - - - /*!\brief Parse stream info from a buffer - * - * Performs high level parsing of the bitstream. Construction of a decoder - * context is not necessary. Can be used to determine if the bitstream is - * of the proper format, and to extract information from the stream. - * - * \param[in] iface Pointer to the algorithm interface - * \param[in] data Pointer to a block of data to parse - * \param[in] data_sz Size of the data buffer - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ - vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, - const uint8_t *data, - unsigned int data_sz, - vpx_codec_stream_info_t *si); - - - /*!\brief Return information about the current stream. - * - * Returns information about the stream that has been parsed during decoding. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] si Pointer to stream info to update. The size member - * \ref MUST be properly initialized, but \ref MAY be - * clobbered by the algorithm. This parameter \ref MAY - * be NULL. - * - * \retval #VPX_CODEC_OK - * Bitstream is parsable and stream information updated - */ - vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, - vpx_codec_stream_info_t *si); - - - /*!\brief Decode data - * - * Processes a buffer of coded data. If the processing results in a new - * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be - * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode - * time stamp) order. Frames produced will always be in PTS (presentation - * time stamp) order. - * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled, - * data and data_sz can contain a fragment of the encoded frame. Fragment - * \#n must contain at least partition \#n, but can also contain subsequent - * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must - * be empty. When no more data is available, this function should be called - * with NULL as data and 0 as data_sz. The memory passed to this function - * must be available until the frame has been decoded. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] data Pointer to this block of new coded data. If - * NULL, a VPX_CODEC_CB_PUT_FRAME event is posted - * for the previously decoded frame. - * \param[in] data_sz Size of the coded data, in bytes. - * \param[in] user_priv Application specific data to associate with - * this frame. - * \param[in] deadline Soft deadline the decoder should attempt to meet, - * in us. Set to zero for unlimited. - * - * \return Returns #VPX_CODEC_OK if the coded data was processed completely - * and future pictures can be decoded without error. Otherwise, - * see the descriptions of the other error codes in ::vpx_codec_err_t - * for recoverability capabilities. - */ - vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline); - - - /*!\brief Decoded frames iterator - * - * Iterates over a list of the frames available for display. The iterator - * storage should be initialized to NULL to start the iteration. Iteration is - * complete when this function returns NULL. - * - * The list of available frames becomes valid upon completion of the - * vpx_codec_decode call, and remains valid until the next call to vpx_codec_decode. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] iter Iterator storage, initialized to NULL - * - * \return Returns a pointer to an image, if one is ready for display. Frames - * produced will always be in PTS (presentation time stamp) order. - */ - vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter); - - - /*!\defgroup cap_put_frame Frame-Based Decoding Functions - * - * The following functions are required to be implemented for all decoders - * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these functions - * for codecs that don't advertise this capability will result in an error - * code being returned, usually VPX_CODEC_ERROR - * @{ - */ - - /*!\brief put frame callback prototype - * - * This callback is invoked by the decoder to notify the application of - * the availability of decoded image data. - */ - typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv, - const vpx_image_t *img); - - - /*!\brief Register for notification of frame completion. - * - * Registers a given function to be called when a decoded frame is - * available. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb Pointer to the callback function - * \param[in] user_priv User's private data - * - * \retval #VPX_CODEC_OK - * Callback successfully registered. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * posting slice completion. - */ - vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_frame_cb_fn_t cb, - void *user_priv); - - - /*!@} - end defgroup cap_put_frame */ - - /*!\defgroup cap_put_slice Slice-Based Decoding Functions - * - * The following functions are required to be implemented for all decoders - * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these functions - * for codecs that don't advertise this capability will result in an error - * code being returned, usually VPX_CODEC_ERROR - * @{ - */ - - /*!\brief put slice callback prototype - * - * This callback is invoked by the decoder to notify the application of - * the availability of partially decoded image data. The - */ - typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv, - const vpx_image_t *img, - const vpx_image_rect_t *valid, - const vpx_image_rect_t *update); - - - /*!\brief Register for notification of slice completion. - * - * Registers a given function to be called when a decoded slice is - * available. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb Pointer to the callback function - * \param[in] user_priv User's private data - * - * \retval #VPX_CODEC_OK - * Callback successfully registered. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * posting slice completion. - */ - vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, - vpx_codec_put_slice_cb_fn_t cb, - void *user_priv); - - - /*!@} - end defgroup cap_put_slice*/ - - /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions - * - * The following section is required to be implemented for all decoders - * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability. - * Calling this function for codecs that don't advertise this capability - * will result in an error code being returned, usually VPX_CODEC_ERROR. - * - * \note - * Currently this only works with VP9. - * @{ - */ - - /*!\brief Pass in external frame buffers for the decoder to use. - * - * Registers functions to be called when libvpx needs a frame buffer - * to decode the current frame and a function to be called when libvpx does - * not internally reference the frame buffer. This set function must - * be called before the first call to decode or libvpx will assume the - * default behavior of allocating frame buffers internally. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cb_get Pointer to the get callback function - * \param[in] cb_release Pointer to the release callback function - * \param[in] cb_priv Callback's private data - * - * \retval #VPX_CODEC_OK - * External frame buffers will be used by libvpx. - * \retval #VPX_CODEC_INVALID_PARAM - * One or more of the callbacks were NULL. - * \retval #VPX_CODEC_ERROR - * Decoder context not initialized, or algorithm not capable of - * using external frame buffers. - * - * \note - * When decoding VP9, the application may be required to pass in at least - * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame - * buffers. - */ - vpx_codec_err_t vpx_codec_set_frame_buffer_functions( - vpx_codec_ctx_t *ctx, - vpx_get_frame_buffer_cb_fn_t cb_get, - vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); - - /*!@} - end defgroup cap_external_frame_buffer */ - - /*!@} - end defgroup decoder*/ -#ifdef __cplusplus -} -#endif -#endif // VPX_VPX_DECODER_H_ - diff --git a/thirdparty/libvpx/vpx/vpx_encoder.h b/thirdparty/libvpx/vpx/vpx_encoder.h deleted file mode 100644 index 955e873519..0000000000 --- a/thirdparty/libvpx/vpx/vpx_encoder.h +++ /dev/null @@ -1,1043 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_VPX_ENCODER_H_ -#define VPX_VPX_ENCODER_H_ - -/*!\defgroup encoder Encoder Algorithm Interface - * \ingroup codec - * This abstraction allows applications using this encoder to easily support - * multiple video formats with minimal code duplication. This section describes - * the interface common to all encoders. - * @{ - */ - -/*!\file - * \brief Describes the encoder algorithm interface to applications. - * - * This file describes the interface between an application and a - * video encoder algorithm. - * - */ -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_codec.h" - - /*! Temporal Scalability: Maximum length of the sequence defining frame - * layer membership - */ -#define VPX_TS_MAX_PERIODICITY 16 - - /*! Temporal Scalability: Maximum number of coding layers */ -#define VPX_TS_MAX_LAYERS 5 - - /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */ -#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY - -/*! Temporal+Spatial Scalability: Maximum number of coding layers */ -#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. - -/*!\deprecated Use #VPX_MAX_LAYERS instead. */ -#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed. - -/*! Spatial Scalability: Maximum number of coding layers */ -#define VPX_SS_MAX_LAYERS 5 - -/*! Spatial Scalability: Default number of coding layers */ -#define VPX_SS_DEFAULT_LAYERS 1 - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ - - - /*! \brief Encoder capabilities bitfield - * - * Each encoder advertises the capabilities it supports as part of its - * ::vpx_codec_iface_t interface structure. Capabilities are extra - * interfaces or functionality, and are not required to be supported - * by an encoder. - * - * The available flags are specified by VPX_CODEC_CAP_* defines. - */ -#define VPX_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */ - - /*! Can output one partition at a time. Each partition is returned in its - * own VPX_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for - * every partition but the last. In this mode all frames are always - * returned partition by partition. - */ -#define VPX_CODEC_CAP_OUTPUT_PARTITION 0x20000 - -/*! Can support input images at greater than 8 bitdepth. - */ -#define VPX_CODEC_CAP_HIGHBITDEPTH 0x40000 - - /*! \brief Initialization-time Feature Enabling - * - * Certain codec features must be known at initialization time, to allow - * for proper memory allocation. - * - * The available flags are specified by VPX_CODEC_USE_* defines. - */ -#define VPX_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */ -#define VPX_CODEC_USE_OUTPUT_PARTITION 0x20000 /**< Make the encoder output one - partition at a time. */ -#define VPX_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */ - - - /*!\brief Generic fixed size buffer structure - * - * This structure is able to hold a reference to any fixed size buffer. - */ - typedef struct vpx_fixed_buf { - void *buf; /**< Pointer to the data */ - size_t sz; /**< Length of the buffer, in chars */ - } vpx_fixed_buf_t; /**< alias for struct vpx_fixed_buf */ - - - /*!\brief Time Stamp Type - * - * An integer, which when multiplied by the stream's time base, provides - * the absolute time of a sample. - */ - typedef int64_t vpx_codec_pts_t; - - - /*!\brief Compressed Frame Flags - * - * This type represents a bitfield containing information about a compressed - * frame that may be useful to an application. The most significant 16 bits - * can be used by an algorithm to provide additional detail, for example to - * support frame types that are codec specific (MPEG-1 D-frames for example) - */ - typedef uint32_t vpx_codec_frame_flags_t; -#define VPX_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */ -#define VPX_FRAME_IS_DROPPABLE 0x2 /**< frame can be dropped without affecting - the stream (no future frame depends on - this one) */ -#define VPX_FRAME_IS_INVISIBLE 0x4 /**< frame should be decoded but will not - be shown */ -#define VPX_FRAME_IS_FRAGMENT 0x8 /**< this is a fragment of the encoded - frame */ - - /*!\brief Error Resilient flags - * - * These flags define which error resilient features to enable in the - * encoder. The flags are specified through the - * vpx_codec_enc_cfg::g_error_resilient variable. - */ - typedef uint32_t vpx_codec_er_flags_t; -#define VPX_ERROR_RESILIENT_DEFAULT 0x1 /**< Improve resiliency against - losses of whole frames */ -#define VPX_ERROR_RESILIENT_PARTITIONS 0x2 /**< The frame partitions are - independently decodable by the - bool decoder, meaning that - partitions can be decoded even - though earlier partitions have - been lost. Note that intra - prediction is still done over - the partition boundary. */ - - /*!\brief Encoder output packet variants - * - * This enumeration lists the different kinds of data packets that can be - * returned by calls to vpx_codec_get_cx_data(). Algorithms \ref MAY - * extend this list to provide additional functionality. - */ - enum vpx_codec_cx_pkt_kind { - VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ - VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ - VPX_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ - VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ - // Spatial SVC is still experimental and may be removed before the next ABI - // bump. -#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) - VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/ - VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/ -#endif - VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ - }; - - - /*!\brief Encoder output packet - * - * This structure contains the different kinds of output data the encoder - * may produce while compressing a frame. - */ - typedef struct vpx_codec_cx_pkt { - enum vpx_codec_cx_pkt_kind kind; /**< packet variant */ - union { - struct { - void *buf; /**< compressed data buffer */ - size_t sz; /**< length of compressed data */ - vpx_codec_pts_t pts; /**< time stamp to show frame - (in timebase units) */ - unsigned long duration; /**< duration to show frame - (in timebase units) */ - vpx_codec_frame_flags_t flags; /**< flags for this frame */ - int partition_id; /**< the partition id - defines the decoding order - of the partitions. Only - applicable when "output partition" - mode is enabled. First partition - has id 0.*/ - - } frame; /**< data for compressed frame packet */ - vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */ - vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ - struct vpx_psnr_pkt { - unsigned int samples[4]; /**< Number of samples, total/y/u/v */ - uint64_t sse[4]; /**< sum squared error, total/y/u/v */ - double psnr[4]; /**< PSNR, total/y/u/v */ - } psnr; /**< data for PSNR packet */ - vpx_fixed_buf_t raw; /**< data for arbitrary packets */ - // Spatial SVC is still experimental and may be removed before the next - // ABI bump. -#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION) - size_t layer_sizes[VPX_SS_MAX_LAYERS]; - struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS]; -#endif - - /* This packet size is fixed to allow codecs to extend this - * interface without having to manage storage for raw packets, - * i.e., if it's smaller than 128 bytes, you can store in the - * packet list directly. - */ - char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */ - } data; /**< packet data */ - } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */ - - - /*!\brief Encoder return output buffer callback - * - * This callback function, when registered, returns with packets when each - * spatial layer is encoded. - */ - // putting the definitions here for now. (agrange: find if there - // is a better place for this) - typedef void (* vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt, - void *user_data); - - /*!\brief Callback function pointer / user data pair storage */ - typedef struct vpx_codec_enc_output_cx_cb_pair { - vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt; /**< Callback function */ - void *user_priv; /**< Pointer to private data */ - } vpx_codec_priv_output_cx_pkt_cb_pair_t; - - /*!\brief Rational Number - * - * This structure holds a fractional value. - */ - typedef struct vpx_rational { - int num; /**< fraction numerator */ - int den; /**< fraction denominator */ - } vpx_rational_t; /**< alias for struct vpx_rational */ - - - /*!\brief Multi-pass Encoding Pass */ - enum vpx_enc_pass { - VPX_RC_ONE_PASS, /**< Single pass mode */ - VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */ - VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */ - }; - - - /*!\brief Rate control mode */ - enum vpx_rc_mode { - VPX_VBR, /**< Variable Bit Rate (VBR) mode */ - VPX_CBR, /**< Constant Bit Rate (CBR) mode */ - VPX_CQ, /**< Constrained Quality (CQ) mode */ - VPX_Q, /**< Constant Quality (Q) mode */ - }; - - - /*!\brief Keyframe placement mode. - * - * This enumeration determines whether keyframes are placed automatically by - * the encoder or whether this behavior is disabled. Older releases of this - * SDK were implemented such that VPX_KF_FIXED meant keyframes were disabled. - * This name is confusing for this behavior, so the new symbols to be used - * are VPX_KF_AUTO and VPX_KF_DISABLED. - */ - enum vpx_kf_mode { - VPX_KF_FIXED, /**< deprecated, implies VPX_KF_DISABLED */ - VPX_KF_AUTO, /**< Encoder determines optimal placement automatically */ - VPX_KF_DISABLED = 0 /**< Encoder does not place keyframes. */ - }; - - - /*!\brief Encoded Frame Flags - * - * This type indicates a bitfield to be passed to vpx_codec_encode(), defining - * per-frame boolean values. By convention, bits common to all codecs will be - * named VPX_EFLAG_*, and bits specific to an algorithm will be named - * /algo/_eflag_*. The lower order 16 bits are reserved for common use. - */ - typedef long vpx_enc_frame_flags_t; -#define VPX_EFLAG_FORCE_KF (1<<0) /**< Force this frame to be a keyframe */ - - - /*!\brief Encoder configuration structure - * - * This structure contains the encoder settings that have common representations - * across all codecs. This doesn't imply that all codecs support all features, - * however. - */ - typedef struct vpx_codec_enc_cfg { - /* - * generic settings (g) - */ - - /*!\brief Algorithm specific "usage" value - * - * Algorithms may define multiple values for usage, which may convey the - * intent of how the application intends to use the stream. If this value - * is non-zero, consult the documentation for the codec to determine its - * meaning. - */ - unsigned int g_usage; - - - /*!\brief Maximum number of threads to use - * - * For multi-threaded implementations, use no more than this number of - * threads. The codec may use fewer threads than allowed. The value - * 0 is equivalent to the value 1. - */ - unsigned int g_threads; - - - /*!\brief Bitstream profile to use - * - * Some codecs support a notion of multiple bitstream profiles. Typically - * this maps to a set of features that are turned on or off. Often the - * profile to use is determined by the features of the intended decoder. - * Consult the documentation for the codec to determine the valid values - * for this parameter, or set to zero for a sane default. - */ - unsigned int g_profile; /**< profile of bitstream to use */ - - - - /*!\brief Width of the frame - * - * This value identifies the presentation resolution of the frame, - * in pixels. Note that the frames passed as input to the encoder must - * have this resolution. Frames will be presented by the decoder in this - * resolution, independent of any spatial resampling the encoder may do. - */ - unsigned int g_w; - - - /*!\brief Height of the frame - * - * This value identifies the presentation resolution of the frame, - * in pixels. Note that the frames passed as input to the encoder must - * have this resolution. Frames will be presented by the decoder in this - * resolution, independent of any spatial resampling the encoder may do. - */ - unsigned int g_h; - - /*!\brief Bit-depth of the codec - * - * This value identifies the bit_depth of the codec, - * Only certain bit-depths are supported as identified in the - * vpx_bit_depth_t enum. - */ - vpx_bit_depth_t g_bit_depth; - - /*!\brief Bit-depth of the input frames - * - * This value identifies the bit_depth of the input frames in bits. - * Note that the frames passed as input to the encoder must have - * this bit-depth. - */ - unsigned int g_input_bit_depth; - - /*!\brief Stream timebase units - * - * Indicates the smallest interval of time, in seconds, used by the stream. - * For fixed frame rate material, or variable frame rate material where - * frames are timed at a multiple of a given clock (ex: video capture), - * the \ref RECOMMENDED method is to set the timebase to the reciprocal - * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the - * pts to correspond to the frame number, which can be handy. For - * re-encoding video from containers with absolute time timestamps, the - * \ref RECOMMENDED method is to set the timebase to that of the parent - * container or multimedia framework (ex: 1/1000 for ms, as in FLV). - */ - struct vpx_rational g_timebase; - - - /*!\brief Enable error resilient modes. - * - * The error resilient bitfield indicates to the encoder which features - * it should enable to take measures for streaming over lossy or noisy - * links. - */ - vpx_codec_er_flags_t g_error_resilient; - - - /*!\brief Multi-pass Encoding Mode - * - * This value should be set to the current phase for multi-pass encoding. - * For single pass, set to #VPX_RC_ONE_PASS. - */ - enum vpx_enc_pass g_pass; - - - /*!\brief Allow lagged encoding - * - * If set, this value allows the encoder to consume a number of input - * frames before producing output frames. This allows the encoder to - * base decisions for the current frame on future frames. This does - * increase the latency of the encoding pipeline, so it is not appropriate - * in all situations (ex: realtime encoding). - * - * Note that this is a maximum value -- the encoder may produce frames - * sooner than the given limit. Set this value to 0 to disable this - * feature. - */ - unsigned int g_lag_in_frames; - - - /* - * rate control settings (rc) - */ - - /*!\brief Temporal resampling configuration, if supported by the codec. - * - * Temporal resampling allows the codec to "drop" frames as a strategy to - * meet its target data rate. This can cause temporal discontinuities in - * the encoded video, which may appear as stuttering during playback. This - * trade-off is often acceptable, but for many applications is not. It can - * be disabled in these cases. - * - * Note that not all codecs support this feature. All vpx VPx codecs do. - * For other codecs, consult the documentation for that algorithm. - * - * This threshold is described as a percentage of the target data buffer. - * When the data buffer falls below this percentage of fullness, a - * dropped frame is indicated. Set the threshold to zero (0) to disable - * this feature. - */ - unsigned int rc_dropframe_thresh; - - - /*!\brief Enable/disable spatial resampling, if supported by the codec. - * - * Spatial resampling allows the codec to compress a lower resolution - * version of the frame, which is then upscaled by the encoder to the - * correct presentation resolution. This increases visual quality at - * low data rates, at the expense of CPU time on the encoder/decoder. - */ - unsigned int rc_resize_allowed; - - /*!\brief Internal coded frame width. - * - * If spatial resampling is enabled this specifies the width of the - * encoded frame. - */ - unsigned int rc_scaled_width; - - /*!\brief Internal coded frame height. - * - * If spatial resampling is enabled this specifies the height of the - * encoded frame. - */ - unsigned int rc_scaled_height; - - /*!\brief Spatial resampling up watermark. - * - * This threshold is described as a percentage of the target data buffer. - * When the data buffer rises above this percentage of fullness, the - * encoder will step up to a higher resolution version of the frame. - */ - unsigned int rc_resize_up_thresh; - - - /*!\brief Spatial resampling down watermark. - * - * This threshold is described as a percentage of the target data buffer. - * When the data buffer falls below this percentage of fullness, the - * encoder will step down to a lower resolution version of the frame. - */ - unsigned int rc_resize_down_thresh; - - - /*!\brief Rate control algorithm to use. - * - * Indicates whether the end usage of this stream is to be streamed over - * a bandwidth constrained link, indicating that Constant Bit Rate (CBR) - * mode should be used, or whether it will be played back on a high - * bandwidth link, as from a local disk, where higher variations in - * bitrate are acceptable. - */ - enum vpx_rc_mode rc_end_usage; - - - /*!\brief Two-pass stats buffer. - * - * A buffer containing all of the stats packets produced in the first - * pass, concatenated. - */ - vpx_fixed_buf_t rc_twopass_stats_in; - - /*!\brief first pass mb stats buffer. - * - * A buffer containing all of the first pass mb stats packets produced - * in the first pass, concatenated. - */ - vpx_fixed_buf_t rc_firstpass_mb_stats_in; - - /*!\brief Target data rate - * - * Target bandwidth to use for this stream, in kilobits per second. - */ - unsigned int rc_target_bitrate; - - - /* - * quantizer settings - */ - - - /*!\brief Minimum (Best Quality) Quantizer - * - * The quantizer is the most direct control over the quality of the - * encoded image. The range of valid values for the quantizer is codec - * specific. Consult the documentation for the codec to determine the - * values to use. To determine the range programmatically, call - * vpx_codec_enc_config_default() with a usage value of 0. - */ - unsigned int rc_min_quantizer; - - - /*!\brief Maximum (Worst Quality) Quantizer - * - * The quantizer is the most direct control over the quality of the - * encoded image. The range of valid values for the quantizer is codec - * specific. Consult the documentation for the codec to determine the - * values to use. To determine the range programmatically, call - * vpx_codec_enc_config_default() with a usage value of 0. - */ - unsigned int rc_max_quantizer; - - - /* - * bitrate tolerance - */ - - - /*!\brief Rate control adaptation undershoot control - * - * This value, expressed as a percentage of the target bitrate, - * controls the maximum allowed adaptation speed of the codec. - * This factor controls the maximum amount of bits that can - * be subtracted from the target bitrate in order to compensate - * for prior overshoot. - * - * Valid values in the range 0-1000. - */ - unsigned int rc_undershoot_pct; - - - /*!\brief Rate control adaptation overshoot control - * - * This value, expressed as a percentage of the target bitrate, - * controls the maximum allowed adaptation speed of the codec. - * This factor controls the maximum amount of bits that can - * be added to the target bitrate in order to compensate for - * prior undershoot. - * - * Valid values in the range 0-1000. - */ - unsigned int rc_overshoot_pct; - - - /* - * decoder buffer model parameters - */ - - - /*!\brief Decoder Buffer Size - * - * This value indicates the amount of data that may be buffered by the - * decoding application. Note that this value is expressed in units of - * time (milliseconds). For example, a value of 5000 indicates that the - * client will buffer (at least) 5000ms worth of encoded data. Use the - * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if - * necessary. - */ - unsigned int rc_buf_sz; - - - /*!\brief Decoder Buffer Initial Size - * - * This value indicates the amount of data that will be buffered by the - * decoding application prior to beginning playback. This value is - * expressed in units of time (milliseconds). Use the target bitrate - * (#rc_target_bitrate) to convert to bits/bytes, if necessary. - */ - unsigned int rc_buf_initial_sz; - - - /*!\brief Decoder Buffer Optimal Size - * - * This value indicates the amount of data that the encoder should try - * to maintain in the decoder's buffer. This value is expressed in units - * of time (milliseconds). Use the target bitrate (#rc_target_bitrate) - * to convert to bits/bytes, if necessary. - */ - unsigned int rc_buf_optimal_sz; - - - /* - * 2 pass rate control parameters - */ - - - /*!\brief Two-pass mode CBR/VBR bias - * - * Bias, expressed on a scale of 0 to 100, for determining target size - * for the current frame. The value 0 indicates the optimal CBR mode - * value should be used. The value 100 indicates the optimal VBR mode - * value should be used. Values in between indicate which way the - * encoder should "lean." - */ - unsigned int rc_2pass_vbr_bias_pct; /**< RC mode bias between CBR and VBR(0-100: 0->CBR, 100->VBR) */ - - - /*!\brief Two-pass mode per-GOP minimum bitrate - * - * This value, expressed as a percentage of the target bitrate, indicates - * the minimum bitrate to be used for a single GOP (aka "section") - */ - unsigned int rc_2pass_vbr_minsection_pct; - - - /*!\brief Two-pass mode per-GOP maximum bitrate - * - * This value, expressed as a percentage of the target bitrate, indicates - * the maximum bitrate to be used for a single GOP (aka "section") - */ - unsigned int rc_2pass_vbr_maxsection_pct; - - - /* - * keyframing settings (kf) - */ - - /*!\brief Keyframe placement mode - * - * This value indicates whether the encoder should place keyframes at a - * fixed interval, or determine the optimal placement automatically - * (as governed by the #kf_min_dist and #kf_max_dist parameters) - */ - enum vpx_kf_mode kf_mode; - - - /*!\brief Keyframe minimum interval - * - * This value, expressed as a number of frames, prevents the encoder from - * placing a keyframe nearer than kf_min_dist to the previous keyframe. At - * least kf_min_dist frames non-keyframes will be coded before the next - * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval. - */ - unsigned int kf_min_dist; - - - /*!\brief Keyframe maximum interval - * - * This value, expressed as a number of frames, forces the encoder to code - * a keyframe if one has not been coded in the last kf_max_dist frames. - * A value of 0 implies all frames will be keyframes. Set kf_min_dist - * equal to kf_max_dist for a fixed interval. - */ - unsigned int kf_max_dist; - - /* - * Spatial scalability settings (ss) - */ - - /*!\brief Number of spatial coding layers. - * - * This value specifies the number of spatial coding layers to be used. - */ - unsigned int ss_number_layers; - - /*!\brief Enable auto alt reference flags for each spatial layer. - * - * These values specify if auto alt reference frame is enabled for each - * spatial layer. - */ - int ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; - - /*!\brief Target bitrate for each spatial layer. - * - * These values specify the target coding bitrate to be used for each - * spatial layer. - */ - unsigned int ss_target_bitrate[VPX_SS_MAX_LAYERS]; - - /*!\brief Number of temporal coding layers. - * - * This value specifies the number of temporal layers to be used. - */ - unsigned int ts_number_layers; - - /*!\brief Target bitrate for each temporal layer. - * - * These values specify the target coding bitrate to be used for each - * temporal layer. - */ - unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS]; - - /*!\brief Frame rate decimation factor for each temporal layer. - * - * These values specify the frame rate decimation factors to apply - * to each temporal layer. - */ - unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS]; - - /*!\brief Length of the sequence defining frame temporal layer membership. - * - * This value specifies the length of the sequence that defines the - * membership of frames to temporal layers. For example, if the - * ts_periodicity = 8, then the frames are assigned to coding layers with a - * repeated sequence of length 8. - */ - unsigned int ts_periodicity; - - /*!\brief Template defining the membership of frames to temporal layers. - * - * This array defines the membership of frames to temporal coding layers. - * For a 2-layer encoding that assigns even numbered frames to one temporal - * layer (0) and odd numbered frames to a second temporal layer (1) with - * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). - */ - unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; - - /*!\brief Target bitrate for each spatial/temporal layer. - * - * These values specify the target coding bitrate to be used for each - * spatial/temporal layer. - * - */ - unsigned int layer_target_bitrate[VPX_MAX_LAYERS]; - - /*!\brief Temporal layering mode indicating which temporal layering scheme to use. - * - * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the - * temporal layering mode to use. - * - */ - int temporal_layering_mode; - } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ - - /*!\brief vp9 svc extra configure parameters - * - * This defines max/min quantizers and scale factors for each layer - * - */ - typedef struct vpx_svc_parameters { - int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */ - int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */ - int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */ - int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ - int temporal_layering_mode; /**< Temporal layering mode */ - } vpx_svc_extra_cfg_t; - - - /*!\brief Initialize an encoder instance - * - * Initializes a encoder context using the given interface. Applications - * should call the vpx_codec_enc_init convenience macro instead of this - * function directly, to ensure that the ABI version number parameter - * is properly initialized. - * - * If the library was configured with --disable-multithread, this call - * is not thread safe and should be guarded with a lock if being used - * in a multithreaded context. - * - * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the algorithm interface to use. - * \param[in] cfg Configuration to use, if known. May be NULL. - * \param[in] flags Bitfield of VPX_CODEC_USE_* flags - * \param[in] ver ABI version number. Must be set to - * VPX_ENCODER_ABI_VERSION - * \retval #VPX_CODEC_OK - * The decoder algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - const vpx_codec_enc_cfg_t *cfg, - vpx_codec_flags_t flags, - int ver); - - - /*!\brief Convenience macro for vpx_codec_enc_init_ver() - * - * Ensures the ABI version parameter is properly set. - */ -#define vpx_codec_enc_init(ctx, iface, cfg, flags) \ - vpx_codec_enc_init_ver(ctx, iface, cfg, flags, VPX_ENCODER_ABI_VERSION) - - - /*!\brief Initialize multi-encoder instance - * - * Initializes multi-encoder context using the given interface. - * Applications should call the vpx_codec_enc_init_multi convenience macro - * instead of this function directly, to ensure that the ABI version number - * parameter is properly initialized. - * - * \param[in] ctx Pointer to this instance's context. - * \param[in] iface Pointer to the algorithm interface to use. - * \param[in] cfg Configuration to use, if known. May be NULL. - * \param[in] num_enc Total number of encoders. - * \param[in] flags Bitfield of VPX_CODEC_USE_* flags - * \param[in] dsf Pointer to down-sampling factors. - * \param[in] ver ABI version number. Must be set to - * VPX_ENCODER_ABI_VERSION - * \retval #VPX_CODEC_OK - * The decoder algorithm initialized. - * \retval #VPX_CODEC_MEM_ERROR - * Memory allocation failed. - */ - vpx_codec_err_t vpx_codec_enc_init_multi_ver(vpx_codec_ctx_t *ctx, - vpx_codec_iface_t *iface, - vpx_codec_enc_cfg_t *cfg, - int num_enc, - vpx_codec_flags_t flags, - vpx_rational_t *dsf, - int ver); - - - /*!\brief Convenience macro for vpx_codec_enc_init_multi_ver() - * - * Ensures the ABI version parameter is properly set. - */ -#define vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \ - vpx_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \ - VPX_ENCODER_ABI_VERSION) - - - /*!\brief Get a default configuration - * - * Initializes a encoder configuration structure with default values. Supports - * the notion of "usages" so that an algorithm may offer different default - * settings depending on the user's intended goal. This function \ref SHOULD - * be called by all applications to initialize the configuration structure - * before specializing the configuration with application specific values. - * - * \param[in] iface Pointer to the algorithm interface to use. - * \param[out] cfg Configuration buffer to populate. - * \param[in] reserved Must set to 0 for VP8 and VP9. - * - * \retval #VPX_CODEC_OK - * The configuration was populated. - * \retval #VPX_CODEC_INCAPABLE - * Interface is not an encoder interface. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, or the usage value was not recognized. - */ - vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, - vpx_codec_enc_cfg_t *cfg, - unsigned int reserved); - - - /*!\brief Set or change configuration - * - * Reconfigures an encoder instance according to the given configuration. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] cfg Configuration buffer to use - * - * \retval #VPX_CODEC_OK - * The configuration was populated. - * \retval #VPX_CODEC_INCAPABLE - * Interface is not an encoder interface. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, or the usage value was not recognized. - */ - vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx, - const vpx_codec_enc_cfg_t *cfg); - - - /*!\brief Get global stream headers - * - * Retrieves a stream level global header packet, if supported by the codec. - * - * \param[in] ctx Pointer to this instance's context - * - * \retval NULL - * Encoder does not support global header - * \retval Non-NULL - * Pointer to buffer containing global header packet - */ - vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx); - - -#define VPX_DL_REALTIME (1) /**< deadline parameter analogous to - * VPx REALTIME mode. */ -#define VPX_DL_GOOD_QUALITY (1000000) /**< deadline parameter analogous to - * VPx GOOD QUALITY mode. */ -#define VPX_DL_BEST_QUALITY (0) /**< deadline parameter analogous to - * VPx BEST QUALITY mode. */ - /*!\brief Encode a frame - * - * Encodes a video frame at the given "presentation time." The presentation - * time stamp (PTS) \ref MUST be strictly increasing. - * - * The encoder supports the notion of a soft real-time deadline. Given a - * non-zero value to the deadline parameter, the encoder will make a "best - * effort" guarantee to return before the given time slice expires. It is - * implicit that limiting the available time to encode will degrade the - * output quality. The encoder can be given an unlimited time to produce the - * best possible frame by specifying a deadline of '0'. This deadline - * supercedes the VPx notion of "best quality, good quality, realtime". - * Applications that wish to map these former settings to the new deadline - * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY, - * and #VPX_DL_BEST_QUALITY. - * - * When the last frame has been passed to the encoder, this function should - * continue to be called, with the img parameter set to NULL. This will - * signal the end-of-stream condition to the encoder and allow it to encode - * any held buffers. Encoding is complete when vpx_codec_encode() is called - * and vpx_codec_get_cx_data() returns no data. - * - * \param[in] ctx Pointer to this instance's context - * \param[in] img Image data to encode, NULL to flush. - * \param[in] pts Presentation time stamp, in timebase units. - * \param[in] duration Duration to show frame, in timebase units. - * \param[in] flags Flags to use for encoding this frame. - * \param[in] deadline Time to spend encoding, in microseconds. (0=infinite) - * - * \retval #VPX_CODEC_OK - * The configuration was populated. - * \retval #VPX_CODEC_INCAPABLE - * Interface is not an encoder interface. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, the image format is unsupported, etc. - */ - vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, - const vpx_image_t *img, - vpx_codec_pts_t pts, - unsigned long duration, - vpx_enc_frame_flags_t flags, - unsigned long deadline); - - /*!\brief Set compressed data output buffer - * - * Sets the buffer that the codec should output the compressed data - * into. This call effectively sets the buffer pointer returned in the - * next VPX_CODEC_CX_FRAME_PKT packet. Subsequent packets will be - * appended into this buffer. The buffer is preserved across frames, - * so applications must periodically call this function after flushing - * the accumulated compressed data to disk or to the network to reset - * the pointer to the buffer's head. - * - * `pad_before` bytes will be skipped before writing the compressed - * data, and `pad_after` bytes will be appended to the packet. The size - * of the packet will be the sum of the size of the actual compressed - * data, pad_before, and pad_after. The padding bytes will be preserved - * (not overwritten). - * - * Note that calling this function does not guarantee that the returned - * compressed data will be placed into the specified buffer. In the - * event that the encoded data will not fit into the buffer provided, - * the returned packet \ref MAY point to an internal buffer, as it would - * if this call were never used. In this event, the output packet will - * NOT have any padding, and the application must free space and copy it - * to the proper place. This is of particular note in configurations - * that may output multiple packets for a single encoded frame (e.g., lagged - * encoding) or if the application does not reset the buffer periodically. - * - * Applications may restore the default behavior of the codec providing - * the compressed data buffer by calling this function with a NULL - * buffer. - * - * Applications \ref MUSTNOT call this function during iteration of - * vpx_codec_get_cx_data(). - * - * \param[in] ctx Pointer to this instance's context - * \param[in] buf Buffer to store compressed data into - * \param[in] pad_before Bytes to skip before writing compressed data - * \param[in] pad_after Bytes to skip after writing compressed data - * - * \retval #VPX_CODEC_OK - * The buffer was set successfully. - * \retval #VPX_CODEC_INVALID_PARAM - * A parameter was NULL, the image format is unsupported, etc. - */ - vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx, - const vpx_fixed_buf_t *buf, - unsigned int pad_before, - unsigned int pad_after); - - - /*!\brief Encoded data iterator - * - * Iterates over a list of data packets to be passed from the encoder to the - * application. The different kinds of packets available are enumerated in - * #vpx_codec_cx_pkt_kind. - * - * #VPX_CODEC_CX_FRAME_PKT packets should be passed to the application's - * muxer. Multiple compressed frames may be in the list. - * #VPX_CODEC_STATS_PKT packets should be appended to a global buffer. - * - * The application \ref MUST silently ignore any packet kinds that it does - * not recognize or support. - * - * The data buffers returned from this function are only guaranteed to be - * valid until the application makes another call to any vpx_codec_* function. - * - * \param[in] ctx Pointer to this instance's context - * \param[in,out] iter Iterator storage, initialized to NULL - * - * \return Returns a pointer to an output data packet (compressed frame data, - * two-pass statistics, etc.) or NULL to signal end-of-list. - * - */ - const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, - vpx_codec_iter_t *iter); - - - /*!\brief Get Preview Frame - * - * Returns an image that can be used as a preview. Shows the image as it would - * exist at the decompressor. The application \ref MUST NOT write into this - * image buffer. - * - * \param[in] ctx Pointer to this instance's context - * - * \return Returns a pointer to a preview image, or NULL if no image is - * available. - * - */ - const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx); - - - /*!@} - end defgroup encoder*/ -#ifdef __cplusplus -} -#endif -#endif // VPX_VPX_ENCODER_H_ - diff --git a/thirdparty/libvpx/vpx/vpx_frame_buffer.h b/thirdparty/libvpx/vpx/vpx_frame_buffer.h deleted file mode 100644 index 9036459af0..0000000000 --- a/thirdparty/libvpx/vpx/vpx_frame_buffer.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_VPX_FRAME_BUFFER_H_ -#define VPX_VPX_FRAME_BUFFER_H_ - -/*!\file - * \brief Describes the decoder external frame buffer interface. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_integer.h" - -/*!\brief The maximum number of work buffers used by libvpx. - * Support maximum 4 threads to decode video in parallel. - * Each thread will use one work buffer. - * TODO(hkuang): Add support to set number of worker threads dynamically. - */ -#define VPX_MAXIMUM_WORK_BUFFERS 8 - -/*!\brief The maximum number of reference buffers that a VP9 encoder may use. - */ -#define VP9_MAXIMUM_REF_BUFFERS 8 - -/*!\brief External frame buffer - * - * This structure holds allocated frame buffers used by the decoder. - */ -typedef struct vpx_codec_frame_buffer { - uint8_t *data; /**< Pointer to the data buffer */ - size_t size; /**< Size of data in bytes */ - void *priv; /**< Frame's private data */ -} vpx_codec_frame_buffer_t; - -/*!\brief get frame buffer callback prototype - * - * This callback is invoked by the decoder to retrieve data for the frame - * buffer in order for the decode call to complete. The callback must - * allocate at least min_size in bytes and assign it to fb->data. The callback - * must zero out all the data allocated. Then the callback must set fb->size - * to the allocated size. The application does not need to align the allocated - * data. The callback is triggered when the decoder needs a frame buffer to - * decode a compressed image into. This function may be called more than once - * for every call to vpx_codec_decode. The application may set fb->priv to - * some data which will be passed back in the ximage and the release function - * call. |fb| is guaranteed to not be NULL. On success the callback must - * return 0. Any failure the callback must return a value less than 0. - * - * \param[in] priv Callback's private data - * \param[in] new_size Size in bytes needed by the buffer - * \param[in,out] fb Pointer to vpx_codec_frame_buffer_t - */ -typedef int (*vpx_get_frame_buffer_cb_fn_t)( - void *priv, size_t min_size, vpx_codec_frame_buffer_t *fb); - -/*!\brief release frame buffer callback prototype - * - * This callback is invoked by the decoder when the frame buffer is not - * referenced by any other buffers. |fb| is guaranteed to not be NULL. On - * success the callback must return 0. Any failure the callback must return - * a value less than 0. - * - * \param[in] priv Callback's private data - * \param[in] fb Pointer to vpx_codec_frame_buffer_t - */ -typedef int (*vpx_release_frame_buffer_cb_fn_t)( - void *priv, vpx_codec_frame_buffer_t *fb); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VPX_FRAME_BUFFER_H_ diff --git a/thirdparty/libvpx/vpx/vpx_image.h b/thirdparty/libvpx/vpx/vpx_image.h deleted file mode 100644 index 7958c69806..0000000000 --- a/thirdparty/libvpx/vpx/vpx_image.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/*!\file - * \brief Describes the vpx image descriptor and associated operations - * - */ -#ifndef VPX_VPX_IMAGE_H_ -#define VPX_VPX_IMAGE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - - /*!\brief Current ABI version number - * - * \internal - * If this file is altered in any way that changes the ABI, this value - * must be bumped. Examples include, but are not limited to, changing - * types, removing or reassigning enums, adding/removing/rearranging - * fields to structures - */ -#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/ - - -#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ -#define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */ -#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel. */ -#define VPX_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */ - - /*!\brief List of supported image formats */ - typedef enum vpx_img_fmt { - VPX_IMG_FMT_NONE, - VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */ - VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */ - VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */ - VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */ - VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */ - VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */ - VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */ - VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */ - VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */ - VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */ - VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */ - VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */ - VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */ - VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ - VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, - VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */ - VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, - VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, - VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, - VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, - VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 6, - VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, - VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, - VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, - VPX_IMG_FMT_I44016 = VPX_IMG_FMT_I440 | VPX_IMG_FMT_HIGHBITDEPTH - } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ - - /*!\brief List of supported color spaces */ - typedef enum vpx_color_space { - VPX_CS_UNKNOWN = 0, /**< Unknown */ - VPX_CS_BT_601 = 1, /**< BT.601 */ - VPX_CS_BT_709 = 2, /**< BT.709 */ - VPX_CS_SMPTE_170 = 3, /**< SMPTE.170 */ - VPX_CS_SMPTE_240 = 4, /**< SMPTE.240 */ - VPX_CS_BT_2020 = 5, /**< BT.2020 */ - VPX_CS_RESERVED = 6, /**< Reserved */ - VPX_CS_SRGB = 7 /**< sRGB */ - } vpx_color_space_t; /**< alias for enum vpx_color_space */ - - /*!\brief List of supported color range */ - typedef enum vpx_color_range { - VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */ - VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */ - } vpx_color_range_t; /**< alias for enum vpx_color_range */ - - /**\brief Image Descriptor */ - typedef struct vpx_image { - vpx_img_fmt_t fmt; /**< Image Format */ - vpx_color_space_t cs; /**< Color Space */ - vpx_color_range_t range; /**< Color Range */ - - /* Image storage dimensions */ - unsigned int w; /**< Stored image width */ - unsigned int h; /**< Stored image height */ - unsigned int bit_depth; /**< Stored image bit-depth */ - - /* Image display dimensions */ - unsigned int d_w; /**< Displayed image width */ - unsigned int d_h; /**< Displayed image height */ - - /* Image intended rendering dimensions */ - unsigned int r_w; /**< Intended rendering image width */ - unsigned int r_h; /**< Intended rendering image height */ - - /* Chroma subsampling info */ - unsigned int x_chroma_shift; /**< subsampling order, X */ - unsigned int y_chroma_shift; /**< subsampling order, Y */ - - /* Image data pointers. */ -#define VPX_PLANE_PACKED 0 /**< To be used for all packed formats */ -#define VPX_PLANE_Y 0 /**< Y (Luminance) plane */ -#define VPX_PLANE_U 1 /**< U (Chroma) plane */ -#define VPX_PLANE_V 2 /**< V (Chroma) plane */ -#define VPX_PLANE_ALPHA 3 /**< A (Transparency) plane */ - unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */ - int stride[4]; /**< stride between rows for each plane */ - - int bps; /**< bits per sample (for packed formats) */ - - /* The following member may be set by the application to associate data - * with this image. - */ - void *user_priv; /**< may be set by the application to associate data - * with this image. */ - - /* The following members should be treated as private. */ - unsigned char *img_data; /**< private */ - int img_data_owner; /**< private */ - int self_allocd; /**< private */ - - void *fb_priv; /**< Frame buffer data associated with the image. */ - } vpx_image_t; /**< alias for struct vpx_image */ - - /**\brief Representation of a rectangle on a surface */ - typedef struct vpx_image_rect { - unsigned int x; /**< leftmost column */ - unsigned int y; /**< topmost row */ - unsigned int w; /**< width */ - unsigned int h; /**< height */ - } vpx_image_rect_t; /**< alias for struct vpx_image_rect */ - - /*!\brief Open a descriptor, allocating storage for the underlying image - * - * Returns a descriptor for storing an image of the given format. The - * storage for the descriptor is allocated on the heap. - * - * \param[in] img Pointer to storage for descriptor. If this parameter - * is NULL, the storage for the descriptor will be - * allocated on the heap. - * \param[in] fmt Format for the image - * \param[in] d_w Width of the image - * \param[in] d_h Height of the image - * \param[in] align Alignment, in bytes, of the image buffer and - * each row in the image(stride). - * - * \return Returns a pointer to the initialized image descriptor. If the img - * parameter is non-null, the value of the img parameter will be - * returned. - */ - vpx_image_t *vpx_img_alloc(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int align); - - /*!\brief Open a descriptor, using existing storage for the underlying image - * - * Returns a descriptor for storing an image of the given format. The - * storage for descriptor has been allocated elsewhere, and a descriptor is - * desired to "wrap" that storage. - * - * \param[in] img Pointer to storage for descriptor. If this parameter - * is NULL, the storage for the descriptor will be - * allocated on the heap. - * \param[in] fmt Format for the image - * \param[in] d_w Width of the image - * \param[in] d_h Height of the image - * \param[in] align Alignment, in bytes, of each row in the image. - * \param[in] img_data Storage to use for the image - * - * \return Returns a pointer to the initialized image descriptor. If the img - * parameter is non-null, the value of the img parameter will be - * returned. - */ - vpx_image_t *vpx_img_wrap(vpx_image_t *img, - vpx_img_fmt_t fmt, - unsigned int d_w, - unsigned int d_h, - unsigned int align, - unsigned char *img_data); - - - /*!\brief Set the rectangle identifying the displayed portion of the image - * - * Updates the displayed rectangle (aka viewport) on the image surface to - * match the specified coordinates and size. - * - * \param[in] img Image descriptor - * \param[in] x leftmost column - * \param[in] y topmost row - * \param[in] w width - * \param[in] h height - * - * \return 0 if the requested rectangle is valid, nonzero otherwise. - */ - int vpx_img_set_rect(vpx_image_t *img, - unsigned int x, - unsigned int y, - unsigned int w, - unsigned int h); - - - /*!\brief Flip the image vertically (top for bottom) - * - * Adjusts the image descriptor's pointers and strides to make the image - * be referenced upside-down. - * - * \param[in] img Image descriptor - */ - void vpx_img_flip(vpx_image_t *img); - - /*!\brief Close an image descriptor - * - * Frees all allocated storage associated with an image descriptor. - * - * \param[in] img Image descriptor - */ - void vpx_img_free(vpx_image_t *img); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_VPX_IMAGE_H_ diff --git a/thirdparty/libvpx/vpx/vpx_integer.h b/thirdparty/libvpx/vpx/vpx_integer.h deleted file mode 100644 index 829c9d132c..0000000000 --- a/thirdparty/libvpx/vpx/vpx_integer.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_VPX_INTEGER_H_ -#define VPX_VPX_INTEGER_H_ - -/* get ptrdiff_t, size_t, wchar_t, NULL */ -#include <stddef.h> - -#if defined(_MSC_VER) -#define VPX_FORCE_INLINE __forceinline -#define VPX_INLINE __inline -#else -#define VPX_FORCE_INLINE __inline__ __attribute__(always_inline) -// TODO(jbb): Allow a way to force inline off for older compilers. -#define VPX_INLINE inline -#endif - -#if (defined(_MSC_VER) && (_MSC_VER < 1600)) || defined(VPX_EMULATE_INTTYPES) -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; - -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; - -#if (defined(_MSC_VER) && (_MSC_VER < 1600)) -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; -#define INT64_MAX _I64_MAX -#define INT32_MAX _I32_MAX -#define INT32_MIN _I32_MIN -#define INT16_MAX _I16_MAX -#define INT16_MIN _I16_MIN -#endif - -#ifndef _UINTPTR_T_DEFINED -typedef size_t uintptr_t; -#endif - -#else - -/* Most platforms have the C99 standard integer types. */ - -#if defined(__cplusplus) -# if !defined(__STDC_FORMAT_MACROS) -# define __STDC_FORMAT_MACROS -# endif -# if !defined(__STDC_LIMIT_MACROS) -# define __STDC_LIMIT_MACROS -# endif -#endif // __cplusplus - -#include <stdint.h> - -#endif - -/* VS2010 defines stdint.h, but not inttypes.h */ -#if defined(_MSC_VER) && _MSC_VER < 1800 -#define PRId64 "I64d" -#else -#include <inttypes.h> -#endif - -#endif // VPX_VPX_INTEGER_H_ diff --git a/thirdparty/libvpx/vpx_config.asm b/thirdparty/libvpx/vpx_config.asm deleted file mode 100644 index 8f2119e7bc..0000000000 --- a/thirdparty/libvpx/vpx_config.asm +++ /dev/null @@ -1,65 +0,0 @@ -%ifdef X86_32 - ARCH_X86 equ 1 - ARCH_X86_64 equ 0 -%elifdef X86_64 - ARCH_X86 equ 0 - ARCH_X86_64 equ 1 -%endif - -HAVE_VPX_PORTS equ 1 -CONFIG_DEPENDENCY_TRACKING equ 0 -CONFIG_EXTERNAL_BUILD equ 0 -CONFIG_INSTALL_DOCS equ 0 -CONFIG_INSTALL_BINS equ 0 -CONFIG_INSTALL_LIBS equ 0 -CONFIG_INSTALL_SRCS equ 0 -CONFIG_USE_X86INC equ 1 -CONFIG_DEBUG equ 0 -CONFIG_GPROF equ 0 -CONFIG_GCOV equ 0 -CONFIG_RVCT equ 0 -CONFIG_PIC equ 1 ;TODO: autodetect -CONFIG_CODEC_SRCS equ 0 -CONFIG_DEBUG_LIBS equ 0 -CONFIG_DEQUANT_TOKENS equ 0 -CONFIG_DC_RECON equ 0 -CONFIG_RUNTIME_CPU_DETECT equ 1 -CONFIG_POSTPROC equ 0 -CONFIG_VP9_POSTPROC equ 0 -CONFIG_MULTITHREAD equ 1 -CONFIG_INTERNAL_STATS equ 0 -CONFIG_VP8_ENCODER equ 0 -CONFIG_VP8_DECODER equ 1 -CONFIG_VP9_ENCODER equ 0 -CONFIG_VP9_DECODER equ 1 -CONFIG_VP8 equ 1 -CONFIG_VP9 equ 1 -CONFIG_ENCODERS equ 0 -CONFIG_DECODERS equ 1 -CONFIG_STATIC_MSVCRT equ 0 -CONFIG_SPATIAL_RESAMPLING equ 0 -CONFIG_REALTIME_ONLY equ 0 -CONFIG_ONTHEFLY_BITPACKING equ 0 -CONFIG_ERROR_CONCEALMENT equ 0 -CONFIG_SHARED equ 0 -CONFIG_STATIC equ 0 -CONFIG_SMALL equ 0 -CONFIG_POSTPROC_VISUALIZER equ 0 -CONFIG_OS_SUPPORT equ 1 -CONFIG_UNIT_TESTS equ 0 -CONFIG_WEBM_IO equ 0 -CONFIG_LIBYUV equ 0 -CONFIG_DECODE_PERF_TESTS equ 0 -CONFIG_ENCODE_PERF_TESTS equ 0 -CONFIG_MULTI_RES_ENCODING equ 0 -CONFIG_TEMPORAL_DENOISING equ 1 -CONFIG_VP9_TEMPORAL_DENOISING equ 0 -CONFIG_COEFFICIENT_RANGE_CHECKING equ 0 -CONFIG_VP9_HIGHBITDEPTH equ 0 -CONFIG_BETTER_HW_COMPATIBILITY equ 0 -CONFIG_EXPERIMENTAL equ 0 -CONFIG_SIZE_LIMIT equ 0 -CONFIG_SPATIAL_SVC equ 0 -CONFIG_FP_MB_STATS equ 0 -CONFIG_EMULATE_HARDWARE equ 0 -CONFIG_MISC_FIXES equ 0 diff --git a/thirdparty/libvpx/vpx_config.h b/thirdparty/libvpx/vpx_config.h deleted file mode 100644 index e8e91fa6ef..0000000000 --- a/thirdparty/libvpx/vpx_config.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */ -/* */ -/* Use of this source code is governed by a BSD-style license */ -/* that can be found in the LICENSE file in the root of the source */ -/* tree. An additional intellectual property rights grant can be found */ -/* in the file PATENTS. All contributing project authors may */ -/* be found in the AUTHORS file in the root of the source tree. */ -/* This file automatically generated by configure. Do not edit! */ -#ifndef VPX_CONFIG_H -#define VPX_CONFIG_H -#define RESTRICT -#if defined(_MSC_VER) && (_MSC_VER < 1900) - #define INLINE __inline -#else - #define INLINE inline -#endif - -#define HAVE_MIPS32 0 -#define HAVE_MEDIA 0 - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) - #define ARCH_X86 1 - #define ARCH_X86_64 0 - - #define ARCH_ARM 0 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 - - #define HAVE_MMX 1 - #define HAVE_SSE2 1 - #define HAVE_SSSE3 1 - #define HAVE_AVX2 0 -#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) - #define ARCH_X86 0 - #define ARCH_X86_64 1 - - #define ARCH_ARM 0 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 - - #define HAVE_MMX 1 - #define HAVE_SSE2 1 - #define HAVE_SSSE3 1 - #define HAVE_AVX2 0 -#elif defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) - #define ARCH_X86 0 - #define ARCH_X86_64 0 - - #define ARCH_ARM 1 - #define HAVE_NEON 1 - #define HAVE_NEON_ASM 1 -#elif defined(__aarch64__) - #define ARCH_X86 0 - #define ARCH_X86_64 0 - - #define ARCH_ARM 1 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 -#else - #define ARCH_X86 0 - #define ARCH_X86_64 0 - - #define ARCH_ARM 0 - #define HAVE_NEON 0 - #define HAVE_NEON_ASM 0 -#endif - -#define CONFIG_BIG_ENDIAN 0 //TODO: Autodetect - -#ifdef __EMSCRIPTEN__ -#define CONFIG_MULTITHREAD 0 -#else -#define CONFIG_MULTITHREAD 1 -#endif - -#ifdef _WIN32 - #define HAVE_PTHREAD_H 0 - #define HAVE_UNISTD_H 0 -#else - #define HAVE_PTHREAD_H 1 - #define HAVE_UNISTD_H 1 -#endif - -/**/ - -#define HAVE_VPX_PORTS 1 -#define CONFIG_DEPENDENCY_TRACKING 0 -#define CONFIG_EXTERNAL_BUILD 0 -#define CONFIG_INSTALL_DOCS 0 -#define CONFIG_INSTALL_BINS 0 -#define CONFIG_INSTALL_LIBS 0 -#define CONFIG_INSTALL_SRCS 0 -#define CONFIG_DEBUG 0 -#define CONFIG_GPROF 0 -#define CONFIG_GCOV 0 -#define CONFIG_RVCT 0 -#define CONFIG_CODEC_SRCS 0 -#define CONFIG_DEBUG_LIBS 0 -#define CONFIG_DEQUANT_TOKENS 0 -#define CONFIG_DC_RECON 0 -#define CONFIG_RUNTIME_CPU_DETECT 1 -#define CONFIG_POSTPROC 0 -#define CONFIG_VP9_POSTPROC 0 -#define CONFIG_INTERNAL_STATS 0 -#define CONFIG_VP8_ENCODER 0 -#define CONFIG_VP8_DECODER 1 -#define CONFIG_VP9_ENCODER 0 -#define CONFIG_VP9_DECODER 1 -#define CONFIG_VP8 1 -#define CONFIG_VP9 1 -#define CONFIG_ENCODERS 0 -#define CONFIG_DECODERS 1 -#define CONFIG_STATIC_MSVCRT 0 -#define CONFIG_SPATIAL_RESAMPLING 0 -#define CONFIG_REALTIME_ONLY 0 -#define CONFIG_ONTHEFLY_BITPACKING 0 -#define CONFIG_ERROR_CONCEALMENT 0 -#define CONFIG_SHARED 0 -#define CONFIG_STATIC 0 -#define CONFIG_SMALL 0 -#define CONFIG_POSTPROC_VISUALIZER 0 -#define CONFIG_OS_SUPPORT 1 -#define CONFIG_UNIT_TESTS 0 -#define CONFIG_WEBM_IO 0 -#define CONFIG_LIBYUV 0 -#define CONFIG_DECODE_PERF_TESTS 0 -#define CONFIG_ENCODE_PERF_TESTS 0 -#define CONFIG_MULTI_RES_ENCODING 0 -#define CONFIG_TEMPORAL_DENOISING 0 -#define CONFIG_VP9_TEMPORAL_DENOISING 0 -#define CONFIG_COEFFICIENT_RANGE_CHECKING 0 -#define CONFIG_VP9_HIGHBITDEPTH 0 -#define CONFIG_BETTER_HW_COMPATIBILITY 0 -#define CONFIG_EXPERIMENTAL 0 -#define CONFIG_SIZE_LIMIT 0 -#define CONFIG_SPATIAL_SVC 0 -#define CONFIG_FP_MB_STATS 0 -#define CONFIG_EMULATE_HARDWARE 0 -#define CONFIG_MISC_FIXES 0 -#endif /* VPX_CONFIG_H */ diff --git a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm deleted file mode 100644 index b2846c410b..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm +++ /dev/null @@ -1,643 +0,0 @@ -; This file was created from a .asm file -; using the ads2armasm_ms.pl script. -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vpx_v_predictor_4x4_neon| - EXPORT |vpx_v_predictor_8x8_neon| - EXPORT |vpx_v_predictor_16x16_neon| - EXPORT |vpx_v_predictor_32x32_neon| - EXPORT |vpx_h_predictor_4x4_neon| - EXPORT |vpx_h_predictor_8x8_neon| - EXPORT |vpx_h_predictor_16x16_neon| - EXPORT |vpx_h_predictor_32x32_neon| - EXPORT |vpx_tm_predictor_4x4_neon| - EXPORT |vpx_tm_predictor_8x8_neon| - EXPORT |vpx_tm_predictor_16x16_neon| - EXPORT |vpx_tm_predictor_32x32_neon| - - - - AREA |.text|, CODE, READONLY, ALIGN=2 - -;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_4x4_neon| PROC - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - ENDP ; |vpx_v_predictor_4x4_neon| - ALIGN 4 - -;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_8x8_neon| PROC - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - ENDP ; |vpx_v_predictor_8x8_neon| - ALIGN 4 - -;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_16x16_neon| PROC - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - ENDP ; |vpx_v_predictor_16x16_neon| - ALIGN 4 - -;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_v_predictor_32x32_neon| PROC - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - ENDP ; |vpx_v_predictor_32x32_neon| - ALIGN 4 - -;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_4x4_neon| PROC - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - ENDP ; |vpx_h_predictor_4x4_neon| - ALIGN 4 - -;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_8x8_neon| PROC - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - ENDP ; |vpx_h_predictor_8x8_neon| - ALIGN 4 - -;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_16x16_neon| PROC - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - ENDP ; |vpx_h_predictor_16x16_neon| - ALIGN 4 - -;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_h_predictor_32x32_neon| PROC - sub r1, r1, #16 - mov r2, #2 -loop_h - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - ENDP ; |vpx_h_predictor_32x32_neon| - ALIGN 4 - -;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_4x4_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.u8 {d0[]}, [r12] - - ; Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - ; Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - ; Load left row by row and compute left + (above - ytop_left) - ; 1st row and 2nd row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3]! - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - ; 3rd row and 4th row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3] - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - ENDP ; |vpx_tm_predictor_4x4_neon| - ALIGN 4 - -;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_8x8_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - ; preload 8 left - vld1.8 {d30}, [r3] - - ; Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - ; Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - ; Load left row by row and compute left + (above - ytop_left) - ; 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - ; 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - ; 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - ; 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - ENDP ; |vpx_tm_predictor_8x8_neon| - ALIGN 4 - -;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_16x16_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - ; Load above 8 pixels - vld1.8 {q1}, [r2] - - ; preload 8 left into r12 - vld1.8 {d18}, [r3]! - - ; Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d0 - - vmovl.u8 q10, d18 - - ; Load left row by row and compute left + (above - ytop_left) - ; Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon - ; Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] ; proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - ; Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] ; proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] ; proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! ; preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - ENDP ; |vpx_tm_predictor_16x16_neon| - ALIGN 4 - -;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, -; const uint8_t *above, -; const uint8_t *left) -; r0 uint8_t *dst -; r1 ptrdiff_t y_stride -; r2 const uint8_t *above -; r3 const uint8_t *left - -|vpx_tm_predictor_32x32_neon| PROC - ; Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - ; Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - ; preload 8 left pixels - vld1.8 {d26}, [r3]! - - ; Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d0 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d0 - - vmovl.u8 q3, d26 - - ; Load left row by row and compute left + (above - ytop_left) - ; Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon - ; Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - ; Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! ; preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - ENDP ; |vpx_tm_predictor_32x32_neon| - ALIGN 4 - - END diff --git a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm deleted file mode 100644 index 9c3736faf8..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm +++ /dev/null @@ -1,641 +0,0 @@ -; This file was created from a .asm file -; using the ads2armasm_ms.pl script. -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vpx_lpf_horizontal_edge_8_neon| - EXPORT |vpx_lpf_horizontal_edge_16_neon| - EXPORT |vpx_lpf_vertical_16_neon| - - AREA |.text|, CODE, READONLY, ALIGN=2 - -; void mb_lpf_horizontal_edge(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh, -; int count) -; r0 uint8_t *s, -; r1 int p, /* pitch */ -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh, -; r12 int count -|mb_lpf_horizontal_edge| PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] ; load thresh - -h_count - vld1.8 {d16[]}, [r2] ; load *blimit - vld1.8 {d17[]}, [r3] ; load *limit - vld1.8 {d18[]}, [r4] ; load *thresh - - sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines - - vld1.u8 {d0}, [r8@64], r1 ; p7 - vld1.u8 {d1}, [r8@64], r1 ; p6 - vld1.u8 {d2}, [r8@64], r1 ; p5 - vld1.u8 {d3}, [r8@64], r1 ; p4 - vld1.u8 {d4}, [r8@64], r1 ; p3 - vld1.u8 {d5}, [r8@64], r1 ; p2 - vld1.u8 {d6}, [r8@64], r1 ; p1 - vld1.u8 {d7}, [r8@64], r1 ; p0 - vld1.u8 {d8}, [r8@64], r1 ; q0 - vld1.u8 {d9}, [r8@64], r1 ; q1 - vld1.u8 {d10}, [r8@64], r1 ; q2 - vld1.u8 {d11}, [r8@64], r1 ; q3 - vld1.u8 {d12}, [r8@64], r1 ; q4 - vld1.u8 {d13}, [r8@64], r1 ; q5 - vld1.u8 {d14}, [r8@64], r1 ; q6 - vld1.u8 {d15}, [r8@64], r1 ; q7 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq h_mbfilter - - ; flat && mask were not set for any of the channels. Just store the values - ; from filter. - sub r8, r0, r1, lsl #1 - - vst1.u8 {d25}, [r8@64], r1 ; store op1 - vst1.u8 {d24}, [r8@64], r1 ; store op0 - vst1.u8 {d23}, [r8@64], r1 ; store oq0 - vst1.u8 {d26}, [r8@64], r1 ; store oq1 - - b h_next - -h_mbfilter - tst r7, #2 - beq h_wide_mbfilter - - ; flat2 was not set for any of the channels. Just store the values from - ; mbfilter. - sub r8, r0, r1, lsl #1 - sub r8, r8, r1 - - vst1.u8 {d18}, [r8@64], r1 ; store op2 - vst1.u8 {d19}, [r8@64], r1 ; store op1 - vst1.u8 {d20}, [r8@64], r1 ; store op0 - vst1.u8 {d21}, [r8@64], r1 ; store oq0 - vst1.u8 {d22}, [r8@64], r1 ; store oq1 - vst1.u8 {d23}, [r8@64], r1 ; store oq2 - - b h_next - -h_wide_mbfilter - sub r8, r0, r1, lsl #3 - add r8, r8, r1 - - vst1.u8 {d16}, [r8@64], r1 ; store op6 - vst1.u8 {d24}, [r8@64], r1 ; store op5 - vst1.u8 {d25}, [r8@64], r1 ; store op4 - vst1.u8 {d26}, [r8@64], r1 ; store op3 - vst1.u8 {d27}, [r8@64], r1 ; store op2 - vst1.u8 {d18}, [r8@64], r1 ; store op1 - vst1.u8 {d19}, [r8@64], r1 ; store op0 - vst1.u8 {d20}, [r8@64], r1 ; store oq0 - vst1.u8 {d21}, [r8@64], r1 ; store oq1 - vst1.u8 {d22}, [r8@64], r1 ; store oq2 - vst1.u8 {d23}, [r8@64], r1 ; store oq3 - vst1.u8 {d1}, [r8@64], r1 ; store oq4 - vst1.u8 {d2}, [r8@64], r1 ; store oq5 - vst1.u8 {d3}, [r8@64], r1 ; store oq6 - -h_next - add r0, r0, #8 - subs r12, r12, #1 - bne h_count - - vpop {d8-d15} - pop {r4-r8, pc} - - ENDP ; |mb_lpf_horizontal_edge| - ALIGN 4 - -; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) -; r0 uint8_t *s, -; r1 int pitch, -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh -|vpx_lpf_horizontal_edge_8_neon| PROC - mov r12, #1 - b mb_lpf_horizontal_edge - ENDP ; |vpx_lpf_horizontal_edge_8_neon| - ALIGN 4 - -; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) -; r0 uint8_t *s, -; r1 int pitch, -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh -|vpx_lpf_horizontal_edge_16_neon| PROC - mov r12, #2 - b mb_lpf_horizontal_edge - ENDP ; |vpx_lpf_horizontal_edge_16_neon| - ALIGN 4 - -; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) -; r0 uint8_t *s, -; r1 int p, /* pitch */ -; r2 const uint8_t *blimit, -; r3 const uint8_t *limit, -; sp const uint8_t *thresh, -|vpx_lpf_vertical_16_neon| PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] ; load thresh - - vld1.8 {d16[]}, [r2] ; load *blimit - vld1.8 {d17[]}, [r3] ; load *limit - vld1.8 {d18[]}, [r4] ; load *thresh - - sub r8, r0, #8 - - vld1.8 {d0}, [r8@64], r1 - vld1.8 {d8}, [r0@64], r1 - vld1.8 {d1}, [r8@64], r1 - vld1.8 {d9}, [r0@64], r1 - vld1.8 {d2}, [r8@64], r1 - vld1.8 {d10}, [r0@64], r1 - vld1.8 {d3}, [r8@64], r1 - vld1.8 {d11}, [r0@64], r1 - vld1.8 {d4}, [r8@64], r1 - vld1.8 {d12}, [r0@64], r1 - vld1.8 {d5}, [r8@64], r1 - vld1.8 {d13}, [r0@64], r1 - vld1.8 {d6}, [r8@64], r1 - vld1.8 {d14}, [r0@64], r1 - vld1.8 {d7}, [r8@64], r1 - vld1.8 {d15}, [r0@64], r1 - - sub r0, r0, r1, lsl #3 - - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - vtrn.8 d4, d5 - vtrn.8 d6, d7 - - vtrn.8 d8, d9 - vtrn.8 d10, d11 - vtrn.8 d12, d13 - vtrn.8 d14, d15 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq v_mbfilter - - ; flat && mask were not set for any of the channels. Just store the values - ; from filter. - sub r8, r0, #2 - - vswp d23, d25 - - vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 - vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 - vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 - vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 - vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 - vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 - vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 - vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 - - b v_end - -v_mbfilter - tst r7, #2 - beq v_wide_mbfilter - - ; flat2 was not set for any of the channels. Just store the values from - ; mbfilter. - sub r8, r0, #3 - - vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 - vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 - vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 - vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 - vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 - vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 - vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 - vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 - vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 - vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 - vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 - vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 - vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 - vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 - vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 - vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 - - b v_end - -v_wide_mbfilter - sub r8, r0, #8 - - vtrn.32 d0, d26 - vtrn.32 d16, d27 - vtrn.32 d24, d18 - vtrn.32 d25, d19 - - vtrn.16 d0, d24 - vtrn.16 d16, d25 - vtrn.16 d26, d18 - vtrn.16 d27, d19 - - vtrn.8 d0, d16 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - vtrn.8 d18, d19 - - vtrn.32 d20, d1 - vtrn.32 d21, d2 - vtrn.32 d22, d3 - vtrn.32 d23, d15 - - vtrn.16 d20, d22 - vtrn.16 d21, d23 - vtrn.16 d1, d3 - vtrn.16 d2, d15 - - vtrn.8 d20, d21 - vtrn.8 d22, d23 - vtrn.8 d1, d2 - vtrn.8 d3, d15 - - vst1.8 {d0}, [r8@64], r1 - vst1.8 {d20}, [r0@64], r1 - vst1.8 {d16}, [r8@64], r1 - vst1.8 {d21}, [r0@64], r1 - vst1.8 {d24}, [r8@64], r1 - vst1.8 {d22}, [r0@64], r1 - vst1.8 {d25}, [r8@64], r1 - vst1.8 {d23}, [r0@64], r1 - vst1.8 {d26}, [r8@64], r1 - vst1.8 {d1}, [r0@64], r1 - vst1.8 {d27}, [r8@64], r1 - vst1.8 {d2}, [r0@64], r1 - vst1.8 {d18}, [r8@64], r1 - vst1.8 {d3}, [r0@64], r1 - vst1.8 {d19}, [r8@64], r1 - vst1.8 {d15}, [r0@64], r1 - -v_end - vpop {d8-d15} - pop {r4-r8, pc} - - ENDP ; |vpx_lpf_vertical_16_neon| - ALIGN 4 - -; void vpx_wide_mbfilter_neon(); -; This is a helper function for the loopfilters. The invidual functions do the -; necessary load, transpose (if necessary) and store. -; -; r0-r3 PRESERVE -; d16 blimit -; d17 limit -; d18 thresh -; d0 p7 -; d1 p6 -; d2 p5 -; d3 p4 -; d4 p3 -; d5 p2 -; d6 p1 -; d7 p0 -; d8 q0 -; d9 q1 -; d10 q2 -; d11 q3 -; d12 q4 -; d13 q5 -; d14 q6 -; d15 q7 -|vpx_wide_mbfilter_neon| PROC - mov r7, #0 - - ; filter_mask - vabd.u8 d19, d4, d5 ; abs(p3 - p2) - vabd.u8 d20, d5, d6 ; abs(p2 - p1) - vabd.u8 d21, d6, d7 ; abs(p1 - p0) - vabd.u8 d22, d9, d8 ; abs(q1 - q0) - vabd.u8 d23, d10, d9 ; abs(q2 - q1) - vabd.u8 d24, d11, d10 ; abs(q3 - q2) - - ; only compare the largest value to limit - vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) - vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) - vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) - vmax.u8 d19, d19, d20 - - vabd.u8 d24, d7, d8 ; abs(p0 - q0) - - vmax.u8 d19, d19, d23 - - vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) - vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 - - ; abs () > limit - vcge.u8 d19, d17, d19 - - ; flatmask4 - vabd.u8 d25, d7, d5 ; abs(p0 - p2) - vabd.u8 d26, d8, d10 ; abs(q0 - q2) - vabd.u8 d27, d4, d7 ; abs(p3 - p0) - vabd.u8 d28, d11, d8 ; abs(q3 - q0) - - ; only compare the largest value to thresh - vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) - vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) - vmax.u8 d25, d25, d26 - vmax.u8 d20, d20, d25 - - vshr.u8 d23, d23, #1 ; a = a / 2 - vqadd.u8 d24, d24, d23 ; a = b + a - - vmov.u8 d30, #1 - vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 - - vcge.u8 d20, d30, d20 ; flat - - vand d19, d19, d24 ; mask - - ; hevmask - vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 - vorr d21, d21, d22 ; hev - - vand d16, d20, d19 ; flat && mask - vmov r5, r6, d16 - - ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) - vabd.u8 d22, d3, d7 ; abs(p4 - p0) - vabd.u8 d23, d12, d8 ; abs(q4 - q0) - vabd.u8 d24, d7, d2 ; abs(p0 - p5) - vabd.u8 d25, d8, d13 ; abs(q0 - q5) - vabd.u8 d26, d1, d7 ; abs(p6 - p0) - vabd.u8 d27, d14, d8 ; abs(q6 - q0) - vabd.u8 d28, d0, d7 ; abs(p7 - p0) - vabd.u8 d29, d15, d8 ; abs(q7 - q0) - - ; only compare the largest value to thresh - vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) - vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) - vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) - vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) - - vmax.u8 d26, d22, d23 - vmax.u8 d27, d24, d25 - vmax.u8 d23, d26, d27 - - vcge.u8 d18, d30, d23 ; flat2 - - vmov.u8 d22, #0x80 - - orrs r5, r5, r6 ; Check for 0 - orreq r7, r7, #1 ; Only do filter branch - - vand d17, d18, d16 ; flat2 && flat && mask - vmov r5, r6, d17 - - ; mbfilter() function - - ; filter() function - ; convert to signed - veor d23, d8, d22 ; qs0 - veor d24, d7, d22 ; ps0 - veor d25, d6, d22 ; ps1 - veor d26, d9, d22 ; qs1 - - vmov.u8 d27, #3 - - vsub.s8 d28, d23, d24 ; ( qs0 - ps0) - vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) - vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) - vand d29, d29, d21 ; filter &= hev - vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) - vmov.u8 d29, #4 - - ; filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d28, q15 - - vand d28, d28, d19 ; filter &= mask - - vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) - vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) - vshr.s8 d30, d30, #3 ; filter2 >>= 3 - vshr.s8 d29, d29, #3 ; filter1 >>= 3 - - - vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) - vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) - - ; outer tap adjustments: ++filter1 >> 1 - vrshr.s8 d29, d29, #1 - vbic d29, d29, d21 ; filter &= ~hev - - vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) - vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) - - veor d24, d24, d22 ; *f_op0 = u^0x80 - veor d23, d23, d22 ; *f_oq0 = u^0x80 - veor d25, d25, d22 ; *f_op1 = u^0x80 - veor d26, d26, d22 ; *f_oq1 = u^0x80 - - tst r7, #1 - bxne lr - - orrs r5, r5, r6 ; Check for 0 - orreq r7, r7, #2 ; Only do mbfilter branch - - ; mbfilter flat && mask branch - ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's - ; and using vibt on the q's? - vmov.u8 d29, #2 - vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 - vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 - vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 - vaddl.u8 q10, d4, d5 - vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 - vaddl.u8 q14, d6, d9 - vqrshrn.u16 d18, q15, #3 ; r_op2 - - vsub.i16 q15, q10 - vaddl.u8 q10, d4, d6 - vadd.i16 q15, q14 - vaddl.u8 q14, d7, d10 - vqrshrn.u16 d19, q15, #3 ; r_op1 - - vsub.i16 q15, q10 - vadd.i16 q15, q14 - vaddl.u8 q14, d8, d11 - vqrshrn.u16 d20, q15, #3 ; r_op0 - - vsubw.u8 q15, d4 ; oq0 = op0 - p3 - vsubw.u8 q15, d7 ; oq0 -= p0 - vadd.i16 q15, q14 - vaddl.u8 q14, d9, d11 - vqrshrn.u16 d21, q15, #3 ; r_oq0 - - vsubw.u8 q15, d5 ; oq1 = oq0 - p2 - vsubw.u8 q15, d8 ; oq1 -= q0 - vadd.i16 q15, q14 - vaddl.u8 q14, d10, d11 - vqrshrn.u16 d22, q15, #3 ; r_oq1 - - vsubw.u8 q15, d6 ; oq2 = oq0 - p1 - vsubw.u8 q15, d9 ; oq2 -= q1 - vadd.i16 q15, q14 - vqrshrn.u16 d27, q15, #3 ; r_oq2 - - ; Filter does not set op2 or oq2, so use p2 and q2. - vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) - vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) - vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) - vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) - vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) - - vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) - vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) - - tst r7, #2 - bxne lr - - ; wide_mbfilter flat2 && flat && mask branch - vmov.u8 d16, #7 - vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 - vaddl.u8 q12, d2, d3 - vaddl.u8 q13, d4, d5 - vaddl.u8 q14, d1, d6 - vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 - vadd.i16 q12, q13 - vadd.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vadd.i16 q15, q12 - vaddl.u8 q12, d0, d1 - vaddw.u8 q15, d1 - vaddl.u8 q13, d0, d2 - vadd.i16 q14, q15, q14 - vqrshrn.u16 d16, q15, #4 ; w_op6 - - vsub.i16 q15, q14, q12 - vaddl.u8 q14, d3, d10 - vqrshrn.u16 d24, q15, #4 ; w_op5 - - vsub.i16 q15, q13 - vaddl.u8 q13, d0, d3 - vadd.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vqrshrn.u16 d25, q15, #4 ; w_op4 - - vadd.i16 q15, q14 - vaddl.u8 q14, d0, d4 - vsub.i16 q15, q13 - vsub.i16 q14, q15, q14 - vqrshrn.u16 d26, q15, #4 ; w_op3 - - vaddw.u8 q15, q14, d5 ; op2 += p2 - vaddl.u8 q14, d0, d5 - vaddw.u8 q15, d12 ; op2 += q4 - vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) - vqrshrn.u16 d27, q15, #4 ; w_op2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d6 - vaddw.u8 q15, d6 ; op1 += p1 - vaddw.u8 q15, d13 ; op1 += q5 - vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) - vqrshrn.u16 d18, q15, #4 ; w_op1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d7 - vaddw.u8 q15, d7 ; op0 += p0 - vaddw.u8 q15, d14 ; op0 += q6 - vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) - vqrshrn.u16 d19, q15, #4 ; w_op0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d1, d8 - vaddw.u8 q15, d8 ; oq0 += q0 - vaddw.u8 q15, d15 ; oq0 += q7 - vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) - vqrshrn.u16 d20, q15, #4 ; w_oq0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vaddw.u8 q15, d9 ; oq1 += q1 - vaddl.u8 q4, d10, d15 - vaddw.u8 q15, d15 ; oq1 += q7 - vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) - vqrshrn.u16 d21, q15, #4 ; w_oq1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d3, d10 - vadd.i16 q15, q4 - vaddl.u8 q4, d11, d15 - vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) - vqrshrn.u16 d22, q15, #4 ; w_oq2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vadd.i16 q15, q4 - vaddl.u8 q4, d12, d15 - vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) - vqrshrn.u16 d23, q15, #4 ; w_oq3 - - vsub.i16 q15, q14 - vaddl.u8 q14, d5, d12 - vadd.i16 q15, q4 - vaddl.u8 q4, d13, d15 - vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) - vqrshrn.u16 d1, q15, #4 ; w_oq4 - - vsub.i16 q15, q14 - vaddl.u8 q14, d6, d13 - vadd.i16 q15, q4 - vaddl.u8 q4, d14, d15 - vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) - vqrshrn.u16 d2, q15, #4 ; w_oq5 - - vsub.i16 q15, q14 - vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) - vadd.i16 q15, q4 - vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) - vqrshrn.u16 d3, q15, #4 ; w_oq6 - vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) - vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) - vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) - - bx lr - ENDP ; |vpx_wide_mbfilter_neon| - ALIGN 4 - - END diff --git a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm deleted file mode 100644 index 4cf9988e65..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm +++ /dev/null @@ -1,39 +0,0 @@ -; This file was created from a .asm file -; using the ads2armasm_ms.pl script. -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vpx_push_neon| - EXPORT |vpx_pop_neon| - - - - - AREA |.text|, CODE, READONLY, ALIGN=2 - -|vpx_push_neon| PROC - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - ALIGN 4 - -|vpx_pop_neon| PROC - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - ENDP - ALIGN 4 - - END - diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s b/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s deleted file mode 100644 index 3932227fc5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s +++ /dev/null @@ -1,658 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 -@ -@ Copyright (c) 2014 The WebM project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. -@ - - .global vpx_v_predictor_4x4_neon - .type vpx_v_predictor_4x4_neon, function - .global vpx_v_predictor_8x8_neon - .type vpx_v_predictor_8x8_neon, function - .global vpx_v_predictor_16x16_neon - .type vpx_v_predictor_16x16_neon, function - .global vpx_v_predictor_32x32_neon - .type vpx_v_predictor_32x32_neon, function - .global vpx_h_predictor_4x4_neon - .type vpx_h_predictor_4x4_neon, function - .global vpx_h_predictor_8x8_neon - .type vpx_h_predictor_8x8_neon, function - .global vpx_h_predictor_16x16_neon - .type vpx_h_predictor_16x16_neon, function - .global vpx_h_predictor_32x32_neon - .type vpx_h_predictor_32x32_neon, function - .global vpx_tm_predictor_4x4_neon - .type vpx_tm_predictor_4x4_neon, function - .global vpx_tm_predictor_8x8_neon - .type vpx_tm_predictor_8x8_neon, function - .global vpx_tm_predictor_16x16_neon - .type vpx_tm_predictor_16x16_neon, function - .global vpx_tm_predictor_32x32_neon - .type vpx_tm_predictor_32x32_neon, function - .arm - .eabi_attribute 24, 1 @Tag_ABI_align_needed - .eabi_attribute 25, 1 @Tag_ABI_align_preserved - -.text -.p2align 2 - -@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_4x4_neon: - vpx_v_predictor_4x4_neon: @ PROC - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - .size vpx_v_predictor_4x4_neon, .-vpx_v_predictor_4x4_neon @ ENDP @ |vpx_v_predictor_4x4_neon| - -@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_8x8_neon: - vpx_v_predictor_8x8_neon: @ PROC - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - .size vpx_v_predictor_8x8_neon, .-vpx_v_predictor_8x8_neon @ ENDP @ |vpx_v_predictor_8x8_neon| - -@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_16x16_neon: - vpx_v_predictor_16x16_neon: @ PROC - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - .size vpx_v_predictor_16x16_neon, .-vpx_v_predictor_16x16_neon @ ENDP @ |vpx_v_predictor_16x16_neon| - -@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_v_predictor_32x32_neon: - vpx_v_predictor_32x32_neon: @ PROC - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v: - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - .size vpx_v_predictor_32x32_neon, .-vpx_v_predictor_32x32_neon @ ENDP @ |vpx_v_predictor_32x32_neon| - -@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_4x4_neon: - vpx_h_predictor_4x4_neon: @ PROC - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - .size vpx_h_predictor_4x4_neon, .-vpx_h_predictor_4x4_neon @ ENDP @ |vpx_h_predictor_4x4_neon| - -@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_8x8_neon: - vpx_h_predictor_8x8_neon: @ PROC - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - .size vpx_h_predictor_8x8_neon, .-vpx_h_predictor_8x8_neon @ ENDP @ |vpx_h_predictor_8x8_neon| - -@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_16x16_neon: - vpx_h_predictor_16x16_neon: @ PROC - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - .size vpx_h_predictor_16x16_neon, .-vpx_h_predictor_16x16_neon @ ENDP @ |vpx_h_predictor_16x16_neon| - -@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_h_predictor_32x32_neon: - vpx_h_predictor_32x32_neon: @ PROC - sub r1, r1, #16 - mov r2, #2 -loop_h: - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - .size vpx_h_predictor_32x32_neon, .-vpx_h_predictor_32x32_neon @ ENDP @ |vpx_h_predictor_32x32_neon| - -@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_4x4_neon: - vpx_tm_predictor_4x4_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.u8 {d0[]}, [r12] - - @ Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3]! - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - @ 3rd row and 4th row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3] - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - .size vpx_tm_predictor_4x4_neon, .-vpx_tm_predictor_4x4_neon @ ENDP @ |vpx_tm_predictor_4x4_neon| - -@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_8x8_neon: - vpx_tm_predictor_8x8_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ preload 8 left - vld1.8 {d30}, [r3] - - @ Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - @ 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - .size vpx_tm_predictor_8x8_neon, .-vpx_tm_predictor_8x8_neon @ ENDP @ |vpx_tm_predictor_8x8_neon| - -@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_16x16_neon: - vpx_tm_predictor_16x16_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 8 pixels - vld1.8 {q1}, [r2] - - @ preload 8 left into r12 - vld1.8 {d18}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d0 - - vmovl.u8 q10, d18 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon: - @ Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] @ proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - @ Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] @ proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] @ proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! @ preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - .size vpx_tm_predictor_16x16_neon, .-vpx_tm_predictor_16x16_neon @ ENDP @ |vpx_tm_predictor_16x16_neon| - -@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, -@ const uint8_t *above, -@ const uint8_t *left) -@ r0 uint8_t *dst -@ r1 ptrdiff_t y_stride -@ r2 const uint8_t *above -@ r3 const uint8_t *left - -_vpx_tm_predictor_32x32_neon: - vpx_tm_predictor_32x32_neon: @ PROC - @ Load ytop_left = above[-1]; - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - @ preload 8 left pixels - vld1.8 {d26}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d0 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d0 - - vmovl.u8 q3, d26 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon: - @ Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! @ preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - .size vpx_tm_predictor_32x32_neon, .-vpx_tm_predictor_32x32_neon @ ENDP @ |vpx_tm_predictor_32x32_neon| - - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s deleted file mode 100644 index f6b05406fb..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s +++ /dev/null @@ -1,647 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 -@ -@ Copyright (c) 2013 The WebM project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. -@ - - .global vpx_lpf_horizontal_edge_8_neon - .type vpx_lpf_horizontal_edge_8_neon, function - .global vpx_lpf_horizontal_edge_16_neon - .type vpx_lpf_horizontal_edge_16_neon, function - .global vpx_lpf_vertical_16_neon - .type vpx_lpf_vertical_16_neon, function - .arm - -.text -.p2align 2 - -@ void mb_lpf_horizontal_edge(uint8_t *s, int p, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh, -@ int count) -@ r0 uint8_t *s, -@ r1 int p, /* pitch */ -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh, -@ r12 int count -_mb_lpf_horizontal_edge: - mb_lpf_horizontal_edge: @ PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - -h_count: - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines - - vld1.u8 {d0}, [r8,:64], r1 @ p7 - vld1.u8 {d1}, [r8,:64], r1 @ p6 - vld1.u8 {d2}, [r8,:64], r1 @ p5 - vld1.u8 {d3}, [r8,:64], r1 @ p4 - vld1.u8 {d4}, [r8,:64], r1 @ p3 - vld1.u8 {d5}, [r8,:64], r1 @ p2 - vld1.u8 {d6}, [r8,:64], r1 @ p1 - vld1.u8 {d7}, [r8,:64], r1 @ p0 - vld1.u8 {d8}, [r8,:64], r1 @ q0 - vld1.u8 {d9}, [r8,:64], r1 @ q1 - vld1.u8 {d10}, [r8,:64], r1 @ q2 - vld1.u8 {d11}, [r8,:64], r1 @ q3 - vld1.u8 {d12}, [r8,:64], r1 @ q4 - vld1.u8 {d13}, [r8,:64], r1 @ q5 - vld1.u8 {d14}, [r8,:64], r1 @ q6 - vld1.u8 {d15}, [r8,:64], r1 @ q7 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq h_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, r1, lsl #1 - - vst1.u8 {d25}, [r8,:64], r1 @ store op1 - vst1.u8 {d24}, [r8,:64], r1 @ store op0 - vst1.u8 {d23}, [r8,:64], r1 @ store oq0 - vst1.u8 {d26}, [r8,:64], r1 @ store oq1 - - b h_next - -h_mbfilter: - tst r7, #2 - beq h_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, r1, lsl #1 - sub r8, r8, r1 - - vst1.u8 {d18}, [r8,:64], r1 @ store op2 - vst1.u8 {d19}, [r8,:64], r1 @ store op1 - vst1.u8 {d20}, [r8,:64], r1 @ store op0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq0 - vst1.u8 {d22}, [r8,:64], r1 @ store oq1 - vst1.u8 {d23}, [r8,:64], r1 @ store oq2 - - b h_next - -h_wide_mbfilter: - sub r8, r0, r1, lsl #3 - add r8, r8, r1 - - vst1.u8 {d16}, [r8,:64], r1 @ store op6 - vst1.u8 {d24}, [r8,:64], r1 @ store op5 - vst1.u8 {d25}, [r8,:64], r1 @ store op4 - vst1.u8 {d26}, [r8,:64], r1 @ store op3 - vst1.u8 {d27}, [r8,:64], r1 @ store op2 - vst1.u8 {d18}, [r8,:64], r1 @ store op1 - vst1.u8 {d19}, [r8,:64], r1 @ store op0 - vst1.u8 {d20}, [r8,:64], r1 @ store oq0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq1 - vst1.u8 {d22}, [r8,:64], r1 @ store oq2 - vst1.u8 {d23}, [r8,:64], r1 @ store oq3 - vst1.u8 {d1}, [r8,:64], r1 @ store oq4 - vst1.u8 {d2}, [r8,:64], r1 @ store oq5 - vst1.u8 {d3}, [r8,:64], r1 @ store oq6 - -h_next: - add r0, r0, #8 - subs r12, r12, #1 - bne h_count - - vpop {d8-d15} - pop {r4-r8, pc} - - .size mb_lpf_horizontal_edge, .-mb_lpf_horizontal_edge @ ENDP @ |mb_lpf_horizontal_edge| - -@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh) -@ r0 uint8_t *s, -@ r1 int pitch, -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_8_neon: - vpx_lpf_horizontal_edge_8_neon: @ PROC - mov r12, #1 - b mb_lpf_horizontal_edge - .size vpx_lpf_horizontal_edge_8_neon, .-vpx_lpf_horizontal_edge_8_neon @ ENDP @ |vpx_lpf_horizontal_edge_8_neon| - -@ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh) -@ r0 uint8_t *s, -@ r1 int pitch, -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_16_neon: - vpx_lpf_horizontal_edge_16_neon: @ PROC - mov r12, #2 - b mb_lpf_horizontal_edge - .size vpx_lpf_horizontal_edge_16_neon, .-vpx_lpf_horizontal_edge_16_neon @ ENDP @ |vpx_lpf_horizontal_edge_16_neon| - -@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, -@ const uint8_t *blimit, -@ const uint8_t *limit, -@ const uint8_t *thresh) -@ r0 uint8_t *s, -@ r1 int p, /* pitch */ -@ r2 const uint8_t *blimit, -@ r3 const uint8_t *limit, -@ sp const uint8_t *thresh, -_vpx_lpf_vertical_16_neon: - vpx_lpf_vertical_16_neon: @ PROC - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, #8 - - vld1.8 {d0}, [r8,:64], r1 - vld1.8 {d8}, [r0,:64], r1 - vld1.8 {d1}, [r8,:64], r1 - vld1.8 {d9}, [r0,:64], r1 - vld1.8 {d2}, [r8,:64], r1 - vld1.8 {d10}, [r0,:64], r1 - vld1.8 {d3}, [r8,:64], r1 - vld1.8 {d11}, [r0,:64], r1 - vld1.8 {d4}, [r8,:64], r1 - vld1.8 {d12}, [r0,:64], r1 - vld1.8 {d5}, [r8,:64], r1 - vld1.8 {d13}, [r0,:64], r1 - vld1.8 {d6}, [r8,:64], r1 - vld1.8 {d14}, [r0,:64], r1 - vld1.8 {d7}, [r8,:64], r1 - vld1.8 {d15}, [r0,:64], r1 - - sub r0, r0, r1, lsl #3 - - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - vtrn.8 d4, d5 - vtrn.8 d6, d7 - - vtrn.8 d8, d9 - vtrn.8 d10, d11 - vtrn.8 d12, d13 - vtrn.8 d14, d15 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq v_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, #2 - - vswp d23, d25 - - vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 - vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 - vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 - vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 - vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 - vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 - vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 - vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 - - b v_end - -v_mbfilter: - tst r7, #2 - beq v_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, #3 - - vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 - vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 - vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 - vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 - vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 - vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 - vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 - vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 - vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 - vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 - vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 - vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 - vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 - vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 - vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 - vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 - - b v_end - -v_wide_mbfilter: - sub r8, r0, #8 - - vtrn.32 d0, d26 - vtrn.32 d16, d27 - vtrn.32 d24, d18 - vtrn.32 d25, d19 - - vtrn.16 d0, d24 - vtrn.16 d16, d25 - vtrn.16 d26, d18 - vtrn.16 d27, d19 - - vtrn.8 d0, d16 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - vtrn.8 d18, d19 - - vtrn.32 d20, d1 - vtrn.32 d21, d2 - vtrn.32 d22, d3 - vtrn.32 d23, d15 - - vtrn.16 d20, d22 - vtrn.16 d21, d23 - vtrn.16 d1, d3 - vtrn.16 d2, d15 - - vtrn.8 d20, d21 - vtrn.8 d22, d23 - vtrn.8 d1, d2 - vtrn.8 d3, d15 - - vst1.8 {d0}, [r8,:64], r1 - vst1.8 {d20}, [r0,:64], r1 - vst1.8 {d16}, [r8,:64], r1 - vst1.8 {d21}, [r0,:64], r1 - vst1.8 {d24}, [r8,:64], r1 - vst1.8 {d22}, [r0,:64], r1 - vst1.8 {d25}, [r8,:64], r1 - vst1.8 {d23}, [r0,:64], r1 - vst1.8 {d26}, [r8,:64], r1 - vst1.8 {d1}, [r0,:64], r1 - vst1.8 {d27}, [r8,:64], r1 - vst1.8 {d2}, [r0,:64], r1 - vst1.8 {d18}, [r8,:64], r1 - vst1.8 {d3}, [r0,:64], r1 - vst1.8 {d19}, [r8,:64], r1 - vst1.8 {d15}, [r0,:64], r1 - -v_end: - vpop {d8-d15} - pop {r4-r8, pc} - - .size vpx_lpf_vertical_16_neon, .-vpx_lpf_vertical_16_neon @ ENDP @ |vpx_lpf_vertical_16_neon| - -@ void vpx_wide_mbfilter_neon(); -@ This is a helper function for the loopfilters. The invidual functions do the -@ necessary load, transpose (if necessary) and store. -@ -@ r0-r3 PRESERVE -@ d16 blimit -@ d17 limit -@ d18 thresh -@ d0 p7 -@ d1 p6 -@ d2 p5 -@ d3 p4 -@ d4 p3 -@ d5 p2 -@ d6 p1 -@ d7 p0 -@ d8 q0 -@ d9 q1 -@ d10 q2 -@ d11 q3 -@ d12 q4 -@ d13 q5 -@ d14 q6 -@ d15 q7 -_vpx_wide_mbfilter_neon: - vpx_wide_mbfilter_neon: @ PROC - mov r7, #0 - - @ filter_mask - vabd.u8 d19, d4, d5 @ abs(p3 - p2) - vabd.u8 d20, d5, d6 @ abs(p2 - p1) - vabd.u8 d21, d6, d7 @ abs(p1 - p0) - vabd.u8 d22, d9, d8 @ abs(q1 - q0) - vabd.u8 d23, d10, d9 @ abs(q2 - q1) - vabd.u8 d24, d11, d10 @ abs(q3 - q2) - - @ only compare the largest value to limit - vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1)) - vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0)) - vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2)) - vmax.u8 d19, d19, d20 - - vabd.u8 d24, d7, d8 @ abs(p0 - q0) - - vmax.u8 d19, d19, d23 - - vabd.u8 d23, d6, d9 @ a = abs(p1 - q1) - vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2 - - @ abs () > limit - vcge.u8 d19, d17, d19 - - @ flatmask4 - vabd.u8 d25, d7, d5 @ abs(p0 - p2) - vabd.u8 d26, d8, d10 @ abs(q0 - q2) - vabd.u8 d27, d4, d7 @ abs(p3 - p0) - vabd.u8 d28, d11, d8 @ abs(q3 - q0) - - @ only compare the largest value to thresh - vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2)) - vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0)) - vmax.u8 d25, d25, d26 - vmax.u8 d20, d20, d25 - - vshr.u8 d23, d23, #1 @ a = a / 2 - vqadd.u8 d24, d24, d23 @ a = b + a - - vmov.u8 d30, #1 - vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1 - - vcge.u8 d20, d30, d20 @ flat - - vand d19, d19, d24 @ mask - - @ hevmask - vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1 - vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1 - vorr d21, d21, d22 @ hev - - vand d16, d20, d19 @ flat && mask - vmov r5, r6, d16 - - @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) - vabd.u8 d22, d3, d7 @ abs(p4 - p0) - vabd.u8 d23, d12, d8 @ abs(q4 - q0) - vabd.u8 d24, d7, d2 @ abs(p0 - p5) - vabd.u8 d25, d8, d13 @ abs(q0 - q5) - vabd.u8 d26, d1, d7 @ abs(p6 - p0) - vabd.u8 d27, d14, d8 @ abs(q6 - q0) - vabd.u8 d28, d0, d7 @ abs(p7 - p0) - vabd.u8 d29, d15, d8 @ abs(q7 - q0) - - @ only compare the largest value to thresh - vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0)) - vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5)) - vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0)) - vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0)) - - vmax.u8 d26, d22, d23 - vmax.u8 d27, d24, d25 - vmax.u8 d23, d26, d27 - - vcge.u8 d18, d30, d23 @ flat2 - - vmov.u8 d22, #0x80 - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #1 @ Only do filter branch - - vand d17, d18, d16 @ flat2 && flat && mask - vmov r5, r6, d17 - - @ mbfilter() function - - @ filter() function - @ convert to signed - veor d23, d8, d22 @ qs0 - veor d24, d7, d22 @ ps0 - veor d25, d6, d22 @ ps1 - veor d26, d9, d22 @ qs1 - - vmov.u8 d27, #3 - - vsub.s8 d28, d23, d24 @ ( qs0 - ps0) - vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1) - vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0) - vand d29, d29, d21 @ filter &= hev - vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0) - vmov.u8 d29, #4 - - @ filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d28, q15 - - vand d28, d28, d19 @ filter &= mask - - vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3) - vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4) - vshr.s8 d30, d30, #3 @ filter2 >>= 3 - vshr.s8 d29, d29, #3 @ filter1 >>= 3 - - - vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2) - vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1) - - @ outer tap adjustments: ++filter1 >> 1 - vrshr.s8 d29, d29, #1 - vbic d29, d29, d21 @ filter &= ~hev - - vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter) - vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter) - - veor d24, d24, d22 @ *f_op0 = u^0x80 - veor d23, d23, d22 @ *f_oq0 = u^0x80 - veor d25, d25, d22 @ *f_op1 = u^0x80 - veor d26, d26, d22 @ *f_oq1 = u^0x80 - - tst r7, #1 - bxne lr - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #2 @ Only do mbfilter branch - - @ mbfilter flat && mask branch - @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's - @ and using vibt on the q's? - vmov.u8 d29, #2 - vaddl.u8 q15, d7, d8 @ op2 = p0 + q0 - vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3 - vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2 - vaddl.u8 q10, d4, d5 - vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2 - vaddl.u8 q14, d6, d9 - vqrshrn.u16 d18, q15, #3 @ r_op2 - - vsub.i16 q15, q10 - vaddl.u8 q10, d4, d6 - vadd.i16 q15, q14 - vaddl.u8 q14, d7, d10 - vqrshrn.u16 d19, q15, #3 @ r_op1 - - vsub.i16 q15, q10 - vadd.i16 q15, q14 - vaddl.u8 q14, d8, d11 - vqrshrn.u16 d20, q15, #3 @ r_op0 - - vsubw.u8 q15, d4 @ oq0 = op0 - p3 - vsubw.u8 q15, d7 @ oq0 -= p0 - vadd.i16 q15, q14 - vaddl.u8 q14, d9, d11 - vqrshrn.u16 d21, q15, #3 @ r_oq0 - - vsubw.u8 q15, d5 @ oq1 = oq0 - p2 - vsubw.u8 q15, d8 @ oq1 -= q0 - vadd.i16 q15, q14 - vaddl.u8 q14, d10, d11 - vqrshrn.u16 d22, q15, #3 @ r_oq1 - - vsubw.u8 q15, d6 @ oq2 = oq0 - p1 - vsubw.u8 q15, d9 @ oq2 -= q1 - vadd.i16 q15, q14 - vqrshrn.u16 d27, q15, #3 @ r_oq2 - - @ Filter does not set op2 or oq2, so use p2 and q2. - vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask) - vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask) - vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask) - vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask) - vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask) - - vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask) - vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask) - - tst r7, #2 - bxne lr - - @ wide_mbfilter flat2 && flat && mask branch - vmov.u8 d16, #7 - vaddl.u8 q15, d7, d8 @ op6 = p0 + q0 - vaddl.u8 q12, d2, d3 - vaddl.u8 q13, d4, d5 - vaddl.u8 q14, d1, d6 - vmlal.u8 q15, d0, d16 @ op6 += p7 * 3 - vadd.i16 q12, q13 - vadd.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vadd.i16 q15, q12 - vaddl.u8 q12, d0, d1 - vaddw.u8 q15, d1 - vaddl.u8 q13, d0, d2 - vadd.i16 q14, q15, q14 - vqrshrn.u16 d16, q15, #4 @ w_op6 - - vsub.i16 q15, q14, q12 - vaddl.u8 q14, d3, d10 - vqrshrn.u16 d24, q15, #4 @ w_op5 - - vsub.i16 q15, q13 - vaddl.u8 q13, d0, d3 - vadd.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vqrshrn.u16 d25, q15, #4 @ w_op4 - - vadd.i16 q15, q14 - vaddl.u8 q14, d0, d4 - vsub.i16 q15, q13 - vsub.i16 q14, q15, q14 - vqrshrn.u16 d26, q15, #4 @ w_op3 - - vaddw.u8 q15, q14, d5 @ op2 += p2 - vaddl.u8 q14, d0, d5 - vaddw.u8 q15, d12 @ op2 += q4 - vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m) - vqrshrn.u16 d27, q15, #4 @ w_op2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d6 - vaddw.u8 q15, d6 @ op1 += p1 - vaddw.u8 q15, d13 @ op1 += q5 - vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m) - vqrshrn.u16 d18, q15, #4 @ w_op1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d7 - vaddw.u8 q15, d7 @ op0 += p0 - vaddw.u8 q15, d14 @ op0 += q6 - vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m) - vqrshrn.u16 d19, q15, #4 @ w_op0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d1, d8 - vaddw.u8 q15, d8 @ oq0 += q0 - vaddw.u8 q15, d15 @ oq0 += q7 - vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m) - vqrshrn.u16 d20, q15, #4 @ w_oq0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vaddw.u8 q15, d9 @ oq1 += q1 - vaddl.u8 q4, d10, d15 - vaddw.u8 q15, d15 @ oq1 += q7 - vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m) - vqrshrn.u16 d21, q15, #4 @ w_oq1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d3, d10 - vadd.i16 q15, q4 - vaddl.u8 q4, d11, d15 - vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m) - vqrshrn.u16 d22, q15, #4 @ w_oq2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vadd.i16 q15, q4 - vaddl.u8 q4, d12, d15 - vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m) - vqrshrn.u16 d23, q15, #4 @ w_oq3 - - vsub.i16 q15, q14 - vaddl.u8 q14, d5, d12 - vadd.i16 q15, q4 - vaddl.u8 q4, d13, d15 - vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m) - vqrshrn.u16 d1, q15, #4 @ w_oq4 - - vsub.i16 q15, q14 - vaddl.u8 q14, d6, d13 - vadd.i16 q15, q4 - vaddl.u8 q4, d14, d15 - vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m) - vqrshrn.u16 d2, q15, #4 @ w_oq5 - - vsub.i16 q15, q14 - vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m) - vadd.i16 q15, q4 - vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m) - vqrshrn.u16 d3, q15, #4 @ w_oq6 - vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m) - vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m) - vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m) - - bx lr - .size vpx_wide_mbfilter_neon, .-vpx_wide_mbfilter_neon @ ENDP @ |vpx_wide_mbfilter_neon| - - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s deleted file mode 100644 index e8852fa0d0..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s +++ /dev/null @@ -1,44 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 -@ -@ Copyright (c) 2010 The WebM project authors. All Rights Reserved. -@ -@ Use of this source code is governed by a BSD-style license -@ that can be found in the LICENSE file in the root of the source -@ tree. An additional intellectual property rights grant can be found -@ in the file PATENTS. All contributing project authors may -@ be found in the AUTHORS file in the root of the source tree. -@ - - - .global vpx_push_neon - .type vpx_push_neon, function - .global vpx_pop_neon - .type vpx_pop_neon, function - - .arm - .eabi_attribute 24, 1 @Tag_ABI_align_needed - .eabi_attribute 25, 1 @Tag_ABI_align_preserved - -.text -.p2align 2 - -_vpx_push_neon: - vpx_push_neon: @ PROC - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - .size vpx_push_neon, .-vpx_push_neon @ ENDP - -_vpx_pop_neon: - vpx_pop_neon: @ PROC - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - .size vpx_pop_neon, .-vpx_pop_neon @ ENDP - - - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s deleted file mode 100644 index 1c527afcff..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s +++ /dev/null @@ -1,660 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas_apple.pl script. - - .set WIDE_REFERENCE, 0 - .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 - @ - @ Copyright (c) 2014 The WebM project authors. All Rights Reserved. - @ - @ Use of this source code is governed by a BSD-style license - @ that can be found in the LICENSE file in the root of the source - @ tree. An additional intellectual property rights grant can be found - @ in the file PATENTS. All contributing project authors may - @ be found in the AUTHORS file in the root of the source tree. - @ - - .globl _vpx_v_predictor_4x4_neon - .globl vpx_v_predictor_4x4_neon - .globl _vpx_v_predictor_8x8_neon - .globl vpx_v_predictor_8x8_neon - .globl _vpx_v_predictor_16x16_neon - .globl vpx_v_predictor_16x16_neon - .globl _vpx_v_predictor_32x32_neon - .globl vpx_v_predictor_32x32_neon - .globl _vpx_h_predictor_4x4_neon - .globl vpx_h_predictor_4x4_neon - .globl _vpx_h_predictor_8x8_neon - .globl vpx_h_predictor_8x8_neon - .globl _vpx_h_predictor_16x16_neon - .globl vpx_h_predictor_16x16_neon - .globl _vpx_h_predictor_32x32_neon - .globl vpx_h_predictor_32x32_neon - .globl _vpx_tm_predictor_4x4_neon - .globl vpx_tm_predictor_4x4_neon - .globl _vpx_tm_predictor_8x8_neon - .globl vpx_tm_predictor_8x8_neon - .globl _vpx_tm_predictor_16x16_neon - .globl vpx_tm_predictor_16x16_neon - .globl _vpx_tm_predictor_32x32_neon - .globl vpx_tm_predictor_32x32_neon - @ ARM - @ - @ PRESERVE8 - -.text -.p2align 2 - - @void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_4x4_neon: - vpx_v_predictor_4x4_neon: @ - vld1.32 {d0[0]}, [r2] - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d0[0]}, [r0], r1 - bx lr - @ @ |vpx_v_predictor_4x4_neon| - - @void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_8x8_neon: - vpx_v_predictor_8x8_neon: @ - vld1.8 {d0}, [r2] - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - vst1.8 {d0}, [r0], r1 - bx lr - @ @ |vpx_v_predictor_8x8_neon| - - @void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_16x16_neon: - vpx_v_predictor_16x16_neon: @ - vld1.8 {q0}, [r2] - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - vst1.8 {q0}, [r0], r1 - bx lr - @ @ |vpx_v_predictor_16x16_neon| - - @void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_v_predictor_32x32_neon: - vpx_v_predictor_32x32_neon: @ - vld1.8 {q0, q1}, [r2] - mov r2, #2 -loop_v: - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - vst1.8 {q0, q1}, [r0], r1 - subs r2, r2, #1 - bgt loop_v - bx lr - @ @ |vpx_v_predictor_32x32_neon| - - @void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_4x4_neon: - vpx_h_predictor_4x4_neon: @ - vld1.32 {d1[0]}, [r3] - vdup.8 d0, d1[0] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[1] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[2] - vst1.32 {d0[0]}, [r0], r1 - vdup.8 d0, d1[3] - vst1.32 {d0[0]}, [r0], r1 - bx lr - @ @ |vpx_h_predictor_4x4_neon| - - @void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_8x8_neon: - vpx_h_predictor_8x8_neon: @ - vld1.64 {d1}, [r3] - vdup.8 d0, d1[0] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[1] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[2] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[3] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[4] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[5] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[6] - vst1.64 {d0}, [r0], r1 - vdup.8 d0, d1[7] - vst1.64 {d0}, [r0], r1 - bx lr - @ @ |vpx_h_predictor_8x8_neon| - - @void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_16x16_neon: - vpx_h_predictor_16x16_neon: @ - vld1.8 {q1}, [r3] - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0], r1 - bx lr - @ @ |vpx_h_predictor_16x16_neon| - - @void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_h_predictor_32x32_neon: - vpx_h_predictor_32x32_neon: @ - sub r1, r1, #16 - mov r2, #2 -loop_h: - vld1.8 {q1}, [r3]! - vdup.8 q0, d2[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d2[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[0] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[1] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[2] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[3] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[4] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[5] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[6] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - vdup.8 q0, d3[7] - vst1.8 {q0}, [r0]! - vst1.8 {q0}, [r0], r1 - subs r2, r2, #1 - bgt loop_h - bx lr - @ @ |vpx_h_predictor_32x32_neon| - - @void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_4x4_neon: - vpx_tm_predictor_4x4_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.u8 {d0[]}, [r12] - - @ Load above 4 pixels - vld1.32 {d2[0]}, [r2] - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3]! - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - - @ 3rd row and 4th row - vld1.u8 {d2[]}, [r3]! - vld1.u8 {d4[]}, [r3] - vmovl.u8 q1, d2 - vmovl.u8 q2, d4 - vadd.s16 q1, q1, q3 - vadd.s16 q2, q2, q3 - vqmovun.s16 d0, q1 - vqmovun.s16 d1, q2 - vst1.32 {d0[0]}, [r0], r1 - vst1.32 {d1[0]}, [r0], r1 - bx lr - @ @ |vpx_tm_predictor_4x4_neon| - - @void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_8x8_neon: - vpx_tm_predictor_8x8_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ preload 8 left - vld1.8 {d30}, [r3] - - @ Load above 8 pixels - vld1.64 {d2}, [r2] - - vmovl.u8 q10, d30 - - @ Compute above - ytop_left - vsubl.u8 q3, d2, d0 - - @ Load left row by row and compute left + (above - ytop_left) - @ 1st row and 2nd row - vdup.16 q0, d20[0] - vdup.16 q1, d20[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 3rd row and 4th row - vdup.16 q8, d20[2] - vdup.16 q9, d20[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - @ 5th row and 6th row - vdup.16 q0, d21[0] - vdup.16 q1, d21[1] - vadd.s16 q0, q3, q0 - vadd.s16 q1, q3, q1 - - @ 7th row and 8th row - vdup.16 q8, d21[2] - vdup.16 q9, d21[3] - vadd.s16 q8, q3, q8 - vadd.s16 q9, q3, q9 - - vqmovun.s16 d0, q0 - vqmovun.s16 d1, q1 - vqmovun.s16 d2, q8 - vqmovun.s16 d3, q9 - - vst1.64 {d0}, [r0], r1 - vst1.64 {d1}, [r0], r1 - vst1.64 {d2}, [r0], r1 - vst1.64 {d3}, [r0], r1 - - bx lr - @ @ |vpx_tm_predictor_8x8_neon| - - @void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_16x16_neon: - vpx_tm_predictor_16x16_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 8 pixels - vld1.8 {q1}, [r2] - - @ preload 8 left into r12 - vld1.8 {d18}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q2, d2, d0 - vsubl.u8 q3, d3, d0 - - vmovl.u8 q10, d18 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 2 times to process 16 rows. - mov r2, #2 - -loop_16x16_neon: - @ Process two rows. - vdup.16 q0, d20[0] - vdup.16 q8, d20[1] - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d20[2] @ proload next 2 rows data - vdup.16 q8, d20[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - @ Process two rows. - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[0] @ proload next 2 rows data - vdup.16 q8, d21[1] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vdup.16 q0, d21[2] @ proload next 2 rows data - vdup.16 q8, d21[3] - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - - vadd.s16 q1, q0, q2 - vadd.s16 q0, q0, q3 - vadd.s16 q11, q8, q2 - vadd.s16 q8, q8, q3 - vqmovun.s16 d2, q1 - vqmovun.s16 d3, q0 - vqmovun.s16 d22, q11 - vqmovun.s16 d23, q8 - vld1.8 {d18}, [r3]! @ preload 8 left into r12 - vmovl.u8 q10, d18 - vst1.64 {d2,d3}, [r0], r1 - vst1.64 {d22,d23}, [r0], r1 - - subs r2, r2, #1 - bgt loop_16x16_neon - - bx lr - @ @ |vpx_tm_predictor_16x16_neon| - - @void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, - @ const uint8_t *above, - @ const uint8_t *left) - @ r0 uint8_t *dst - @ r1 ptrdiff_t y_stride - @ r2 const uint8_t *above - @ r3 const uint8_t *left - -_vpx_tm_predictor_32x32_neon: - vpx_tm_predictor_32x32_neon: @ - @ Load ytop_left = above[-1] @ - sub r12, r2, #1 - vld1.8 {d0[]}, [r12] - - @ Load above 32 pixels - vld1.8 {q1}, [r2]! - vld1.8 {q2}, [r2] - - @ preload 8 left pixels - vld1.8 {d26}, [r3]! - - @ Compute above - ytop_left - vsubl.u8 q8, d2, d0 - vsubl.u8 q9, d3, d0 - vsubl.u8 q10, d4, d0 - vsubl.u8 q11, d5, d0 - - vmovl.u8 q3, d26 - - @ Load left row by row and compute left + (above - ytop_left) - @ Process 8 rows in each single loop and loop 4 times to process 32 rows. - mov r2, #4 - -loop_32x32_neon: - @ Process two rows. - vdup.16 q0, d6[0] - vdup.16 q2, d6[1] - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q1, d6[2] - vdup.16 q2, d6[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q1, q8 - vadd.s16 q13, q1, q9 - vadd.s16 q14, q1, q10 - vadd.s16 q15, q1, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[0] - vdup.16 q2, d7[1] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vdup.16 q0, d7[2] - vdup.16 q2, d7[3] - vst1.64 {d24-d27}, [r0], r1 - - @ Process two rows. - vadd.s16 q12, q0, q8 - vadd.s16 q13, q0, q9 - vadd.s16 q14, q0, q10 - vadd.s16 q15, q0, q11 - vqmovun.s16 d0, q12 - vqmovun.s16 d1, q13 - vadd.s16 q12, q2, q8 - vadd.s16 q13, q2, q9 - vqmovun.s16 d2, q14 - vqmovun.s16 d3, q15 - vadd.s16 q14, q2, q10 - vadd.s16 q15, q2, q11 - vst1.64 {d0-d3}, [r0], r1 - vqmovun.s16 d24, q12 - vqmovun.s16 d25, q13 - vld1.8 {d0}, [r3]! @ preload 8 left pixels - vqmovun.s16 d26, q14 - vqmovun.s16 d27, q15 - vmovl.u8 q3, d0 - vst1.64 {d24-d27}, [r0], r1 - - subs r2, r2, #1 - bgt loop_32x32_neon - - bx lr - @ @ |vpx_tm_predictor_32x32_neon| - diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s deleted file mode 100644 index 69f7e5207e..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s +++ /dev/null @@ -1,649 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas_apple.pl script. - - .set WIDE_REFERENCE, 0 - .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 - @ - @ Copyright (c) 2013 The WebM project authors. All Rights Reserved. - @ - @ Use of this source code is governed by a BSD-style license - @ that can be found in the LICENSE file in the root of the source - @ tree. An additional intellectual property rights grant can be found - @ in the file PATENTS. All contributing project authors may - @ be found in the AUTHORS file in the root of the source tree. - @ - - .globl _vpx_lpf_horizontal_edge_8_neon - .globl vpx_lpf_horizontal_edge_8_neon - .globl _vpx_lpf_horizontal_edge_16_neon - .globl vpx_lpf_horizontal_edge_16_neon - .globl _vpx_lpf_vertical_16_neon - .globl vpx_lpf_vertical_16_neon - @ ARM - -.text -.p2align 2 - - @ void mb_lpf_horizontal_edge(uint8_t *s, int p, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh, - @ int count) - @ r0 uint8_t *s, - @ r1 int p, /* pitch */ - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh, - @ r12 int count -_mb_lpf_horizontal_edge: - mb_lpf_horizontal_edge: @ - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - -h_count: - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines - - vld1.u8 {d0}, [r8,:64], r1 @ p7 - vld1.u8 {d1}, [r8,:64], r1 @ p6 - vld1.u8 {d2}, [r8,:64], r1 @ p5 - vld1.u8 {d3}, [r8,:64], r1 @ p4 - vld1.u8 {d4}, [r8,:64], r1 @ p3 - vld1.u8 {d5}, [r8,:64], r1 @ p2 - vld1.u8 {d6}, [r8,:64], r1 @ p1 - vld1.u8 {d7}, [r8,:64], r1 @ p0 - vld1.u8 {d8}, [r8,:64], r1 @ q0 - vld1.u8 {d9}, [r8,:64], r1 @ q1 - vld1.u8 {d10}, [r8,:64], r1 @ q2 - vld1.u8 {d11}, [r8,:64], r1 @ q3 - vld1.u8 {d12}, [r8,:64], r1 @ q4 - vld1.u8 {d13}, [r8,:64], r1 @ q5 - vld1.u8 {d14}, [r8,:64], r1 @ q6 - vld1.u8 {d15}, [r8,:64], r1 @ q7 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq h_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, r1, lsl #1 - - vst1.u8 {d25}, [r8,:64], r1 @ store op1 - vst1.u8 {d24}, [r8,:64], r1 @ store op0 - vst1.u8 {d23}, [r8,:64], r1 @ store oq0 - vst1.u8 {d26}, [r8,:64], r1 @ store oq1 - - b h_next - -h_mbfilter: - tst r7, #2 - beq h_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, r1, lsl #1 - sub r8, r8, r1 - - vst1.u8 {d18}, [r8,:64], r1 @ store op2 - vst1.u8 {d19}, [r8,:64], r1 @ store op1 - vst1.u8 {d20}, [r8,:64], r1 @ store op0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq0 - vst1.u8 {d22}, [r8,:64], r1 @ store oq1 - vst1.u8 {d23}, [r8,:64], r1 @ store oq2 - - b h_next - -h_wide_mbfilter: - sub r8, r0, r1, lsl #3 - add r8, r8, r1 - - vst1.u8 {d16}, [r8,:64], r1 @ store op6 - vst1.u8 {d24}, [r8,:64], r1 @ store op5 - vst1.u8 {d25}, [r8,:64], r1 @ store op4 - vst1.u8 {d26}, [r8,:64], r1 @ store op3 - vst1.u8 {d27}, [r8,:64], r1 @ store op2 - vst1.u8 {d18}, [r8,:64], r1 @ store op1 - vst1.u8 {d19}, [r8,:64], r1 @ store op0 - vst1.u8 {d20}, [r8,:64], r1 @ store oq0 - vst1.u8 {d21}, [r8,:64], r1 @ store oq1 - vst1.u8 {d22}, [r8,:64], r1 @ store oq2 - vst1.u8 {d23}, [r8,:64], r1 @ store oq3 - vst1.u8 {d1}, [r8,:64], r1 @ store oq4 - vst1.u8 {d2}, [r8,:64], r1 @ store oq5 - vst1.u8 {d3}, [r8,:64], r1 @ store oq6 - -h_next: - add r0, r0, #8 - subs r12, r12, #1 - bne h_count - - vpop {d8-d15} - pop {r4-r8, pc} - - @ @ |mb_lpf_horizontal_edge| - - @ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh) - @ r0 uint8_t *s, - @ r1 int pitch, - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_8_neon: - vpx_lpf_horizontal_edge_8_neon: @ - mov r12, #1 - b mb_lpf_horizontal_edge - @ @ |vpx_lpf_horizontal_edge_8_neon| - - @ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh) - @ r0 uint8_t *s, - @ r1 int pitch, - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh -_vpx_lpf_horizontal_edge_16_neon: - vpx_lpf_horizontal_edge_16_neon: @ - mov r12, #2 - b mb_lpf_horizontal_edge - @ @ |vpx_lpf_horizontal_edge_16_neon| - - @ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, - @ const uint8_t *blimit, - @ const uint8_t *limit, - @ const uint8_t *thresh) - @ r0 uint8_t *s, - @ r1 int p, /* pitch */ - @ r2 const uint8_t *blimit, - @ r3 const uint8_t *limit, - @ sp const uint8_t *thresh, -_vpx_lpf_vertical_16_neon: - vpx_lpf_vertical_16_neon: @ - push {r4-r8, lr} - vpush {d8-d15} - ldr r4, [sp, #88] @ load thresh - - vld1.8 {d16[]}, [r2] @ load *blimit - vld1.8 {d17[]}, [r3] @ load *limit - vld1.8 {d18[]}, [r4] @ load *thresh - - sub r8, r0, #8 - - vld1.8 {d0}, [r8,:64], r1 - vld1.8 {d8}, [r0,:64], r1 - vld1.8 {d1}, [r8,:64], r1 - vld1.8 {d9}, [r0,:64], r1 - vld1.8 {d2}, [r8,:64], r1 - vld1.8 {d10}, [r0,:64], r1 - vld1.8 {d3}, [r8,:64], r1 - vld1.8 {d11}, [r0,:64], r1 - vld1.8 {d4}, [r8,:64], r1 - vld1.8 {d12}, [r0,:64], r1 - vld1.8 {d5}, [r8,:64], r1 - vld1.8 {d13}, [r0,:64], r1 - vld1.8 {d6}, [r8,:64], r1 - vld1.8 {d14}, [r0,:64], r1 - vld1.8 {d7}, [r8,:64], r1 - vld1.8 {d15}, [r0,:64], r1 - - sub r0, r0, r1, lsl #3 - - vtrn.32 q0, q2 - vtrn.32 q1, q3 - vtrn.32 q4, q6 - vtrn.32 q5, q7 - - vtrn.16 q0, q1 - vtrn.16 q2, q3 - vtrn.16 q4, q5 - vtrn.16 q6, q7 - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - vtrn.8 d4, d5 - vtrn.8 d6, d7 - - vtrn.8 d8, d9 - vtrn.8 d10, d11 - vtrn.8 d12, d13 - vtrn.8 d14, d15 - - bl vpx_wide_mbfilter_neon - - tst r7, #1 - beq v_mbfilter - - @ flat && mask were not set for any of the channels. Just store the values - @ from filter. - sub r8, r0, #2 - - vswp d23, d25 - - vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 - vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 - vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 - vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 - vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 - vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 - vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 - vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 - - b v_end - -v_mbfilter: - tst r7, #2 - beq v_wide_mbfilter - - @ flat2 was not set for any of the channels. Just store the values from - @ mbfilter. - sub r8, r0, #3 - - vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 - vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 - vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 - vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 - vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 - vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 - vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 - vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 - vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 - vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 - vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 - vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 - vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 - vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 - vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 - vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 - - b v_end - -v_wide_mbfilter: - sub r8, r0, #8 - - vtrn.32 d0, d26 - vtrn.32 d16, d27 - vtrn.32 d24, d18 - vtrn.32 d25, d19 - - vtrn.16 d0, d24 - vtrn.16 d16, d25 - vtrn.16 d26, d18 - vtrn.16 d27, d19 - - vtrn.8 d0, d16 - vtrn.8 d24, d25 - vtrn.8 d26, d27 - vtrn.8 d18, d19 - - vtrn.32 d20, d1 - vtrn.32 d21, d2 - vtrn.32 d22, d3 - vtrn.32 d23, d15 - - vtrn.16 d20, d22 - vtrn.16 d21, d23 - vtrn.16 d1, d3 - vtrn.16 d2, d15 - - vtrn.8 d20, d21 - vtrn.8 d22, d23 - vtrn.8 d1, d2 - vtrn.8 d3, d15 - - vst1.8 {d0}, [r8,:64], r1 - vst1.8 {d20}, [r0,:64], r1 - vst1.8 {d16}, [r8,:64], r1 - vst1.8 {d21}, [r0,:64], r1 - vst1.8 {d24}, [r8,:64], r1 - vst1.8 {d22}, [r0,:64], r1 - vst1.8 {d25}, [r8,:64], r1 - vst1.8 {d23}, [r0,:64], r1 - vst1.8 {d26}, [r8,:64], r1 - vst1.8 {d1}, [r0,:64], r1 - vst1.8 {d27}, [r8,:64], r1 - vst1.8 {d2}, [r0,:64], r1 - vst1.8 {d18}, [r8,:64], r1 - vst1.8 {d3}, [r0,:64], r1 - vst1.8 {d19}, [r8,:64], r1 - vst1.8 {d15}, [r0,:64], r1 - -v_end: - vpop {d8-d15} - pop {r4-r8, pc} - - @ @ |vpx_lpf_vertical_16_neon| - - @ void vpx_wide_mbfilter_neon() @ - @ This is a helper function for the loopfilters. The invidual functions do the - @ necessary load, transpose (if necessary) and store. - @ - @ r0-r3 PRESERVE - @ d16 blimit - @ d17 limit - @ d18 thresh - @ d0 p7 - @ d1 p6 - @ d2 p5 - @ d3 p4 - @ d4 p3 - @ d5 p2 - @ d6 p1 - @ d7 p0 - @ d8 q0 - @ d9 q1 - @ d10 q2 - @ d11 q3 - @ d12 q4 - @ d13 q5 - @ d14 q6 - @ d15 q7 -_vpx_wide_mbfilter_neon: - vpx_wide_mbfilter_neon: @ - mov r7, #0 - - @ filter_mask - vabd.u8 d19, d4, d5 @ abs(p3 - p2) - vabd.u8 d20, d5, d6 @ abs(p2 - p1) - vabd.u8 d21, d6, d7 @ abs(p1 - p0) - vabd.u8 d22, d9, d8 @ abs(q1 - q0) - vabd.u8 d23, d10, d9 @ abs(q2 - q1) - vabd.u8 d24, d11, d10 @ abs(q3 - q2) - - @ only compare the largest value to limit - vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1)) - vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0)) - vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2)) - vmax.u8 d19, d19, d20 - - vabd.u8 d24, d7, d8 @ abs(p0 - q0) - - vmax.u8 d19, d19, d23 - - vabd.u8 d23, d6, d9 @ a = abs(p1 - q1) - vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2 - - @ abs () > limit - vcge.u8 d19, d17, d19 - - @ flatmask4 - vabd.u8 d25, d7, d5 @ abs(p0 - p2) - vabd.u8 d26, d8, d10 @ abs(q0 - q2) - vabd.u8 d27, d4, d7 @ abs(p3 - p0) - vabd.u8 d28, d11, d8 @ abs(q3 - q0) - - @ only compare the largest value to thresh - vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2)) - vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0)) - vmax.u8 d25, d25, d26 - vmax.u8 d20, d20, d25 - - vshr.u8 d23, d23, #1 @ a = a / 2 - vqadd.u8 d24, d24, d23 @ a = b + a - - vmov.u8 d30, #1 - vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1 - - vcge.u8 d20, d30, d20 @ flat - - vand d19, d19, d24 @ mask - - @ hevmask - vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1 - vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1 - vorr d21, d21, d22 @ hev - - vand d16, d20, d19 @ flat && mask - vmov r5, r6, d16 - - @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) - vabd.u8 d22, d3, d7 @ abs(p4 - p0) - vabd.u8 d23, d12, d8 @ abs(q4 - q0) - vabd.u8 d24, d7, d2 @ abs(p0 - p5) - vabd.u8 d25, d8, d13 @ abs(q0 - q5) - vabd.u8 d26, d1, d7 @ abs(p6 - p0) - vabd.u8 d27, d14, d8 @ abs(q6 - q0) - vabd.u8 d28, d0, d7 @ abs(p7 - p0) - vabd.u8 d29, d15, d8 @ abs(q7 - q0) - - @ only compare the largest value to thresh - vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0)) - vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5)) - vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0)) - vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0)) - - vmax.u8 d26, d22, d23 - vmax.u8 d27, d24, d25 - vmax.u8 d23, d26, d27 - - vcge.u8 d18, d30, d23 @ flat2 - - vmov.u8 d22, #0x80 - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #1 @ Only do filter branch - - vand d17, d18, d16 @ flat2 && flat && mask - vmov r5, r6, d17 - - @ mbfilter() function - - @ filter() function - @ convert to signed - veor d23, d8, d22 @ qs0 - veor d24, d7, d22 @ ps0 - veor d25, d6, d22 @ ps1 - veor d26, d9, d22 @ qs1 - - vmov.u8 d27, #3 - - vsub.s8 d28, d23, d24 @ ( qs0 - ps0) - vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1) - vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0) - vand d29, d29, d21 @ filter &= hev - vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0) - vmov.u8 d29, #4 - - @ filter = clamp(filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d28, q15 - - vand d28, d28, d19 @ filter &= mask - - vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3) - vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4) - vshr.s8 d30, d30, #3 @ filter2 >>= 3 - vshr.s8 d29, d29, #3 @ filter1 >>= 3 - - - vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2) - vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1) - - @ outer tap adjustments: ++filter1 >> 1 - vrshr.s8 d29, d29, #1 - vbic d29, d29, d21 @ filter &= ~hev - - vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter) - vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter) - - veor d24, d24, d22 @ *f_op0 = u^0x80 - veor d23, d23, d22 @ *f_oq0 = u^0x80 - veor d25, d25, d22 @ *f_op1 = u^0x80 - veor d26, d26, d22 @ *f_oq1 = u^0x80 - - tst r7, #1 - bxne lr - - orrs r5, r5, r6 @ Check for 0 - orreq r7, r7, #2 @ Only do mbfilter branch - - @ mbfilter flat && mask branch - @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's - @ and using vibt on the q's? - vmov.u8 d29, #2 - vaddl.u8 q15, d7, d8 @ op2 = p0 + q0 - vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3 - vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2 - vaddl.u8 q10, d4, d5 - vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2 - vaddl.u8 q14, d6, d9 - vqrshrn.u16 d18, q15, #3 @ r_op2 - - vsub.i16 q15, q10 - vaddl.u8 q10, d4, d6 - vadd.i16 q15, q14 - vaddl.u8 q14, d7, d10 - vqrshrn.u16 d19, q15, #3 @ r_op1 - - vsub.i16 q15, q10 - vadd.i16 q15, q14 - vaddl.u8 q14, d8, d11 - vqrshrn.u16 d20, q15, #3 @ r_op0 - - vsubw.u8 q15, d4 @ oq0 = op0 - p3 - vsubw.u8 q15, d7 @ oq0 -= p0 - vadd.i16 q15, q14 - vaddl.u8 q14, d9, d11 - vqrshrn.u16 d21, q15, #3 @ r_oq0 - - vsubw.u8 q15, d5 @ oq1 = oq0 - p2 - vsubw.u8 q15, d8 @ oq1 -= q0 - vadd.i16 q15, q14 - vaddl.u8 q14, d10, d11 - vqrshrn.u16 d22, q15, #3 @ r_oq1 - - vsubw.u8 q15, d6 @ oq2 = oq0 - p1 - vsubw.u8 q15, d9 @ oq2 -= q1 - vadd.i16 q15, q14 - vqrshrn.u16 d27, q15, #3 @ r_oq2 - - @ Filter does not set op2 or oq2, so use p2 and q2. - vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask) - vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask) - vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask) - vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask) - vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask) - - vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask) - vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask) - - tst r7, #2 - bxne lr - - @ wide_mbfilter flat2 && flat && mask branch - vmov.u8 d16, #7 - vaddl.u8 q15, d7, d8 @ op6 = p0 + q0 - vaddl.u8 q12, d2, d3 - vaddl.u8 q13, d4, d5 - vaddl.u8 q14, d1, d6 - vmlal.u8 q15, d0, d16 @ op6 += p7 * 3 - vadd.i16 q12, q13 - vadd.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vadd.i16 q15, q12 - vaddl.u8 q12, d0, d1 - vaddw.u8 q15, d1 - vaddl.u8 q13, d0, d2 - vadd.i16 q14, q15, q14 - vqrshrn.u16 d16, q15, #4 @ w_op6 - - vsub.i16 q15, q14, q12 - vaddl.u8 q14, d3, d10 - vqrshrn.u16 d24, q15, #4 @ w_op5 - - vsub.i16 q15, q13 - vaddl.u8 q13, d0, d3 - vadd.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vqrshrn.u16 d25, q15, #4 @ w_op4 - - vadd.i16 q15, q14 - vaddl.u8 q14, d0, d4 - vsub.i16 q15, q13 - vsub.i16 q14, q15, q14 - vqrshrn.u16 d26, q15, #4 @ w_op3 - - vaddw.u8 q15, q14, d5 @ op2 += p2 - vaddl.u8 q14, d0, d5 - vaddw.u8 q15, d12 @ op2 += q4 - vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m) - vqrshrn.u16 d27, q15, #4 @ w_op2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d6 - vaddw.u8 q15, d6 @ op1 += p1 - vaddw.u8 q15, d13 @ op1 += q5 - vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m) - vqrshrn.u16 d18, q15, #4 @ w_op1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d0, d7 - vaddw.u8 q15, d7 @ op0 += p0 - vaddw.u8 q15, d14 @ op0 += q6 - vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m) - vqrshrn.u16 d19, q15, #4 @ w_op0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d1, d8 - vaddw.u8 q15, d8 @ oq0 += q0 - vaddw.u8 q15, d15 @ oq0 += q7 - vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m) - vqrshrn.u16 d20, q15, #4 @ w_oq0 - - vsub.i16 q15, q14 - vaddl.u8 q14, d2, d9 - vaddw.u8 q15, d9 @ oq1 += q1 - vaddl.u8 q4, d10, d15 - vaddw.u8 q15, d15 @ oq1 += q7 - vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m) - vqrshrn.u16 d21, q15, #4 @ w_oq1 - - vsub.i16 q15, q14 - vaddl.u8 q14, d3, d10 - vadd.i16 q15, q4 - vaddl.u8 q4, d11, d15 - vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m) - vqrshrn.u16 d22, q15, #4 @ w_oq2 - - vsub.i16 q15, q14 - vaddl.u8 q14, d4, d11 - vadd.i16 q15, q4 - vaddl.u8 q4, d12, d15 - vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m) - vqrshrn.u16 d23, q15, #4 @ w_oq3 - - vsub.i16 q15, q14 - vaddl.u8 q14, d5, d12 - vadd.i16 q15, q4 - vaddl.u8 q4, d13, d15 - vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m) - vqrshrn.u16 d1, q15, #4 @ w_oq4 - - vsub.i16 q15, q14 - vaddl.u8 q14, d6, d13 - vadd.i16 q15, q4 - vaddl.u8 q4, d14, d15 - vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m) - vqrshrn.u16 d2, q15, #4 @ w_oq5 - - vsub.i16 q15, q14 - vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m) - vadd.i16 q15, q4 - vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m) - vqrshrn.u16 d3, q15, #4 @ w_oq6 - vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m) - vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m) - vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m) - - bx lr - @ @ |vpx_wide_mbfilter_neon| - diff --git a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s deleted file mode 100644 index f322b698b4..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s +++ /dev/null @@ -1,46 +0,0 @@ -@ This file was created from a .asm file -@ using the ads2gas_apple.pl script. - - .set WIDE_REFERENCE, 0 - .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 - @ - @ Copyright (c) 2010 The WebM project authors. All Rights Reserved. - @ - @ Use of this source code is governed by a BSD-style license - @ that can be found in the LICENSE file in the root of the source - @ tree. An additional intellectual property rights grant can be found - @ in the file PATENTS. All contributing project authors may - @ be found in the AUTHORS file in the root of the source tree. - @ - - - .globl _vpx_push_neon - .globl vpx_push_neon - .globl _vpx_pop_neon - .globl vpx_pop_neon - - @ ARM - @ - @ PRESERVE8 - -.text -.p2align 2 - -_vpx_push_neon: - vpx_push_neon: @ - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - @ - -_vpx_pop_neon: - vpx_pop_neon: @ - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - @ - - diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c deleted file mode 100644 index f734e48027..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vpx_idct16x16_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d2u8, d3u8, d30u8, d31u8; - uint64x1_t d2u64, d3u64, d4u64, d5u64; - uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q0s16; - uint8_t *d1, *d2; - int16_t i, j, a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 6); - - q0s16 = vdupq_n_s16(a1); - q0u16 = vreinterpretq_u16_s16(q0s16); - - for (d1 = d2 = dest, i = 0; i < 4; i++) { - for (j = 0; j < 2; j++) { - d2u64 = vld1_u64((const uint64_t *)d1); - d3u64 = vld1_u64((const uint64_t *)(d1 + 8)); - d1 += dest_stride; - d4u64 = vld1_u64((const uint64_t *)d1); - d5u64 = vld1_u64((const uint64_t *)(d1 + 8)); - d1 += dest_stride; - - q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); - q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); - q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64)); - q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); - vst1_u64((uint64_t *)(d2 + 8), vreinterpret_u64_u8(d31u8)); - d2 += dest_stride; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c deleted file mode 100644 index 651ebb21f9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_add_neon.c +++ /dev/null @@ -1,1317 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" - -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; -} - -void vpx_idct16x16_256_add_neon_pass1( - int16_t *in, - int16_t *out, - int output_stride) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - uint64x1_t d16u64, d17u64, d18u64, d19u64, d20u64, d21u64, d22u64, d23u64; - uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q5s32, q6s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - int16x8x2_t q0x2s16; - - q0x2s16 = vld2q_s16(in); - q8s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q9s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q10s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q11s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q12s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q13s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q14s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - d30s16 = vget_low_s16(q15s16); - d31s16 = vget_high_s16(q15s16); - - // stage 3 - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d18s16, d1s16); - q6s32 = vmull_s16(d19s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlal_s16(q5s32, d30s16, d0s16); - q6s32 = vmlal_s16(q6s32, d31s16, d0s16); - - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q5s32, 14); - d15s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - q2s32 = vmull_s16(d26s16, d2s16); - q3s32 = vmull_s16(d27s16, d2s16); - q9s32 = vmull_s16(d26s16, d3s16); - q15s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlsl_s16(q2s32, d22s16, d3s16); - q3s32 = vmlsl_s16(q3s32, d23s16, d3s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q15s32 = vmlal_s16(q15s32, d23s16, d2s16); - - d10s16 = vqrshrn_n_s32(q2s32, 14); - d11s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q15s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 4 - d30s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d30s16); - q11s32 = vmull_s16(d17s16, d30s16); - q0s32 = vmull_s16(d24s16, d30s16); - q1s32 = vmull_s16(d25s16, d30s16); - - d30s16 = vdup_n_s16(cospi_24_64); - d31s16 = vdup_n_s16(cospi_8_64); - - q3s32 = vaddq_s32(q2s32, q0s32); - q12s32 = vaddq_s32(q11s32, q1s32); - q13s32 = vsubq_s32(q2s32, q0s32); - q1s32 = vsubq_s32(q11s32, q1s32); - - d16s16 = vqrshrn_n_s32(q3s32, 14); - d17s16 = vqrshrn_n_s32(q12s32, 14); - d18s16 = vqrshrn_n_s32(q13s32, 14); - d19s16 = vqrshrn_n_s32(q1s32, 14); - q8s16 = vcombine_s16(d16s16, d17s16); - q9s16 = vcombine_s16(d18s16, d19s16); - - q0s32 = vmull_s16(d20s16, d31s16); - q1s32 = vmull_s16(d21s16, d31s16); - q12s32 = vmull_s16(d20s16, d30s16); - q13s32 = vmull_s16(d21s16, d30s16); - - q0s32 = vmlal_s16(q0s32, d28s16, d30s16); - q1s32 = vmlal_s16(q1s32, d29s16, d30s16); - q12s32 = vmlsl_s16(q12s32, d28s16, d31s16); - q13s32 = vmlsl_s16(q13s32, d29s16, d31s16); - - d22s16 = vqrshrn_n_s32(q0s32, 14); - d23s16 = vqrshrn_n_s32(q1s32, 14); - d20s16 = vqrshrn_n_s32(q12s32, 14); - d21s16 = vqrshrn_n_s32(q13s32, 14); - q10s16 = vcombine_s16(d20s16, d21s16); - q11s16 = vcombine_s16(d22s16, d23s16); - - q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - q14s16 = vsubq_s16(q7s16, q6s16); - q15s16 = vaddq_s16(q6s16, q7s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - // stage 5 - q0s16 = vaddq_s16(q8s16, q11s16); - q1s16 = vaddq_s16(q9s16, q10s16); - q2s16 = vsubq_s16(q9s16, q10s16); - q3s16 = vsubq_s16(q8s16, q11s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q11s32 = vmull_s16(d26s16, d16s16); - q12s32 = vmull_s16(d27s16, d16s16); - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - - q6s32 = vsubq_s32(q9s32, q11s32); - q13s32 = vsubq_s32(q10s32, q12s32); - q9s32 = vaddq_s32(q9s32, q11s32); - q10s32 = vaddq_s32(q10s32, q12s32); - - d10s16 = vqrshrn_n_s32(q6s32, 14); - d11s16 = vqrshrn_n_s32(q13s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q10s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 6 - q8s16 = vaddq_s16(q0s16, q15s16); - q9s16 = vaddq_s16(q1s16, q6s16); - q10s16 = vaddq_s16(q2s16, q5s16); - q11s16 = vaddq_s16(q3s16, q4s16); - q12s16 = vsubq_s16(q3s16, q4s16); - q13s16 = vsubq_s16(q2s16, q5s16); - q14s16 = vsubq_s16(q1s16, q6s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - d16u64 = vreinterpret_u64_s16(vget_low_s16(q8s16)); - d17u64 = vreinterpret_u64_s16(vget_high_s16(q8s16)); - d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); - d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); - d20u64 = vreinterpret_u64_s16(vget_low_s16(q10s16)); - d21u64 = vreinterpret_u64_s16(vget_high_s16(q10s16)); - d22u64 = vreinterpret_u64_s16(vget_low_s16(q11s16)); - d23u64 = vreinterpret_u64_s16(vget_high_s16(q11s16)); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); - d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); - d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); - d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); - - // store the data - output_stride >>= 1; // output_stride / 2, out is int16_t - vst1_u64((uint64_t *)out, d16u64); - out += output_stride; - vst1_u64((uint64_t *)out, d17u64); - out += output_stride; - vst1_u64((uint64_t *)out, d18u64); - out += output_stride; - vst1_u64((uint64_t *)out, d19u64); - out += output_stride; - vst1_u64((uint64_t *)out, d20u64); - out += output_stride; - vst1_u64((uint64_t *)out, d21u64); - out += output_stride; - vst1_u64((uint64_t *)out, d22u64); - out += output_stride; - vst1_u64((uint64_t *)out, d23u64); - out += output_stride; - vst1_u64((uint64_t *)out, d24u64); - out += output_stride; - vst1_u64((uint64_t *)out, d25u64); - out += output_stride; - vst1_u64((uint64_t *)out, d26u64); - out += output_stride; - vst1_u64((uint64_t *)out, d27u64); - out += output_stride; - vst1_u64((uint64_t *)out, d28u64); - out += output_stride; - vst1_u64((uint64_t *)out, d29u64); - out += output_stride; - vst1_u64((uint64_t *)out, d30u64); - out += output_stride; - vst1_u64((uint64_t *)out, d31u64); - return; -} - -void vpx_idct16x16_256_add_neon_pass2( - int16_t *src, - int16_t *out, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride) { - uint8_t *d; - uint8x8_t d12u8, d13u8; - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - uint64x1_t d24u64, d25u64, d26u64, d27u64; - int64x1_t d12s64, d13s64; - uint16x8_t q2u16, q3u16, q4u16, q5u16, q8u16; - uint16x8_t q9u16, q12u16, q13u16, q14u16, q15u16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32; - int16x8x2_t q0x2s16; - - q0x2s16 = vld2q_s16(src); - q8s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q9s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q10s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q11s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q12s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q13s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q14s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - d30s16 = vget_low_s16(q15s16); - d31s16 = vget_high_s16(q15s16); - - // stage 3 - d12s16 = vdup_n_s16(cospi_30_64); - d13s16 = vdup_n_s16(cospi_2_64); - - q2s32 = vmull_s16(d16s16, d12s16); - q3s32 = vmull_s16(d17s16, d12s16); - q1s32 = vmull_s16(d16s16, d13s16); - q4s32 = vmull_s16(d17s16, d13s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d13s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d13s16); - q1s32 = vmlal_s16(q1s32, d30s16, d12s16); - q4s32 = vmlal_s16(q4s32, d31s16, d12s16); - - d0s16 = vqrshrn_n_s32(q2s32, 14); - d1s16 = vqrshrn_n_s32(q3s32, 14); - d14s16 = vqrshrn_n_s32(q1s32, 14); - d15s16 = vqrshrn_n_s32(q4s32, 14); - q0s16 = vcombine_s16(d0s16, d1s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d30s16 = vdup_n_s16(cospi_14_64); - d31s16 = vdup_n_s16(cospi_18_64); - - q2s32 = vmull_s16(d24s16, d30s16); - q3s32 = vmull_s16(d25s16, d30s16); - q4s32 = vmull_s16(d24s16, d31s16); - q5s32 = vmull_s16(d25s16, d31s16); - - q2s32 = vmlsl_s16(q2s32, d22s16, d31s16); - q3s32 = vmlsl_s16(q3s32, d23s16, d31s16); - q4s32 = vmlal_s16(q4s32, d22s16, d30s16); - q5s32 = vmlal_s16(q5s32, d23s16, d30s16); - - d2s16 = vqrshrn_n_s32(q2s32, 14); - d3s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q4s32, 14); - d13s16 = vqrshrn_n_s32(q5s32, 14); - q1s16 = vcombine_s16(d2s16, d3s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - d30s16 = vdup_n_s16(cospi_22_64); - d31s16 = vdup_n_s16(cospi_10_64); - - q11s32 = vmull_s16(d20s16, d30s16); - q12s32 = vmull_s16(d21s16, d30s16); - q4s32 = vmull_s16(d20s16, d31s16); - q5s32 = vmull_s16(d21s16, d31s16); - - q11s32 = vmlsl_s16(q11s32, d26s16, d31s16); - q12s32 = vmlsl_s16(q12s32, d27s16, d31s16); - q4s32 = vmlal_s16(q4s32, d26s16, d30s16); - q5s32 = vmlal_s16(q5s32, d27s16, d30s16); - - d4s16 = vqrshrn_n_s32(q11s32, 14); - d5s16 = vqrshrn_n_s32(q12s32, 14); - d11s16 = vqrshrn_n_s32(q5s32, 14); - d10s16 = vqrshrn_n_s32(q4s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - d30s16 = vdup_n_s16(cospi_6_64); - d31s16 = vdup_n_s16(cospi_26_64); - - q10s32 = vmull_s16(d28s16, d30s16); - q11s32 = vmull_s16(d29s16, d30s16); - q12s32 = vmull_s16(d28s16, d31s16); - q13s32 = vmull_s16(d29s16, d31s16); - - q10s32 = vmlsl_s16(q10s32, d18s16, d31s16); - q11s32 = vmlsl_s16(q11s32, d19s16, d31s16); - q12s32 = vmlal_s16(q12s32, d18s16, d30s16); - q13s32 = vmlal_s16(q13s32, d19s16, d30s16); - - d6s16 = vqrshrn_n_s32(q10s32, 14); - d7s16 = vqrshrn_n_s32(q11s32, 14); - d8s16 = vqrshrn_n_s32(q12s32, 14); - d9s16 = vqrshrn_n_s32(q13s32, 14); - q3s16 = vcombine_s16(d6s16, d7s16); - q4s16 = vcombine_s16(d8s16, d9s16); - - // stage 3 - q9s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q10s16 = vsubq_s16(q3s16, q2s16); - q11s16 = vaddq_s16(q2s16, q3s16); - q12s16 = vaddq_s16(q4s16, q5s16); - q13s16 = vsubq_s16(q4s16, q5s16); - q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q6s16, q7s16); - - // stage 4 - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - q2s32 = vmull_s16(d18s16, d31s16); - q3s32 = vmull_s16(d19s16, d31s16); - q4s32 = vmull_s16(d28s16, d31s16); - q5s32 = vmull_s16(d29s16, d31s16); - - q2s32 = vmlal_s16(q2s32, d28s16, d30s16); - q3s32 = vmlal_s16(q3s32, d29s16, d30s16); - q4s32 = vmlsl_s16(q4s32, d18s16, d30s16); - q5s32 = vmlsl_s16(q5s32, d19s16, d30s16); - - d12s16 = vqrshrn_n_s32(q2s32, 14); - d13s16 = vqrshrn_n_s32(q3s32, 14); - d2s16 = vqrshrn_n_s32(q4s32, 14); - d3s16 = vqrshrn_n_s32(q5s32, 14); - q1s16 = vcombine_s16(d2s16, d3s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - q3s16 = q11s16; - q4s16 = q12s16; - - d30s16 = vdup_n_s16(-cospi_8_64); - q11s32 = vmull_s16(d26s16, d30s16); - q12s32 = vmull_s16(d27s16, d30s16); - q8s32 = vmull_s16(d20s16, d30s16); - q9s32 = vmull_s16(d21s16, d30s16); - - q11s32 = vmlsl_s16(q11s32, d20s16, d31s16); - q12s32 = vmlsl_s16(q12s32, d21s16, d31s16); - q8s32 = vmlal_s16(q8s32, d26s16, d31s16); - q9s32 = vmlal_s16(q9s32, d27s16, d31s16); - - d4s16 = vqrshrn_n_s32(q11s32, 14); - d5s16 = vqrshrn_n_s32(q12s32, 14); - d10s16 = vqrshrn_n_s32(q8s32, 14); - d11s16 = vqrshrn_n_s32(q9s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - // stage 5 - q8s16 = vaddq_s16(q0s16, q3s16); - q9s16 = vaddq_s16(q1s16, q2s16); - q10s16 = vsubq_s16(q1s16, q2s16); - q11s16 = vsubq_s16(q0s16, q3s16); - q12s16 = vsubq_s16(q7s16, q4s16); - q13s16 = vsubq_s16(q6s16, q5s16); - q14s16 = vaddq_s16(q6s16, q5s16); - q15s16 = vaddq_s16(q7s16, q4s16); - - // stage 6 - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - - d14s16 = vdup_n_s16(cospi_16_64); - - q3s32 = vmull_s16(d26s16, d14s16); - q4s32 = vmull_s16(d27s16, d14s16); - q0s32 = vmull_s16(d20s16, d14s16); - q1s32 = vmull_s16(d21s16, d14s16); - - q5s32 = vsubq_s32(q3s32, q0s32); - q6s32 = vsubq_s32(q4s32, q1s32); - q10s32 = vaddq_s32(q3s32, q0s32); - q4s32 = vaddq_s32(q4s32, q1s32); - - d4s16 = vqrshrn_n_s32(q5s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - d10s16 = vqrshrn_n_s32(q10s32, 14); - d11s16 = vqrshrn_n_s32(q4s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q0s32 = vmull_s16(d22s16, d14s16); - q1s32 = vmull_s16(d23s16, d14s16); - q13s32 = vmull_s16(d24s16, d14s16); - q6s32 = vmull_s16(d25s16, d14s16); - - q10s32 = vsubq_s32(q13s32, q0s32); - q4s32 = vsubq_s32(q6s32, q1s32); - q13s32 = vaddq_s32(q13s32, q0s32); - q6s32 = vaddq_s32(q6s32, q1s32); - - d6s16 = vqrshrn_n_s32(q10s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - d8s16 = vqrshrn_n_s32(q13s32, 14); - d9s16 = vqrshrn_n_s32(q6s32, 14); - q3s16 = vcombine_s16(d6s16, d7s16); - q4s16 = vcombine_s16(d8s16, d9s16); - - // stage 7 - if (skip_adding != 0) { - d = dest; - // load the data in pass1 - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - - q12s16 = vaddq_s16(q0s16, q15s16); - q13s16 = vaddq_s16(q1s16, q14s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q14s16 = vsubq_s16(q1s16, q14s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q12s16 = vaddq_s16(q10s16, q5s16); - q13s16 = vaddq_s16(q11s16, q4s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q4s16 = vsubq_s16(q11s16, q4s16); - q5s16 = vsubq_s16(q10s16, q5s16); - - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q12s16 = vaddq_s16(q0s16, q3s16); - q13s16 = vaddq_s16(q1s16, q2s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q2s16 = vsubq_s16(q1s16, q2s16); - q3s16 = vsubq_s16(q0s16, q3s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - d13s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q12s16 = vaddq_s16(q10s16, q9s16); - q13s16 = vaddq_s16(q11s16, q8s16); - q12s16 = vrshrq_n_s16(q12s16, 6); - q13s16 = vrshrq_n_s16(q13s16, 6); - q12u16 = vaddw_u8(vreinterpretq_u16_s16(q12s16), - vreinterpret_u8_s64(d12s64)); - q13u16 = vaddw_u8(vreinterpretq_u16_s16(q13s16), - vreinterpret_u8_s64(d13s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - d13u8 = vqmovun_s16(vreinterpretq_s16_u16(q13u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d13u8)); - d += dest_stride; - q8s16 = vsubq_s16(q11s16, q8s16); - q9s16 = vsubq_s16(q10s16, q9s16); - - // store the data out 8,9,10,11,12,13,14,15 - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q8s16 = vrshrq_n_s16(q8s16, 6); - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q9s16 = vrshrq_n_s16(q9s16, 6); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q2s16 = vrshrq_n_s16(q2s16, 6); - q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q3s16 = vrshrq_n_s16(q3s16, 6); - q3u16 = vaddw_u8(vreinterpretq_u16_s16(q3s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q3u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q4s16 = vrshrq_n_s16(q4s16, 6); - q4u16 = vaddw_u8(vreinterpretq_u16_s16(q4s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q4u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q5s16 = vrshrq_n_s16(q5s16, 6); - q5u16 = vaddw_u8(vreinterpretq_u16_s16(q5s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q5u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - dest += dest_stride; - q14s16 = vrshrq_n_s16(q14s16, 6); - q14u16 = vaddw_u8(vreinterpretq_u16_s16(q14s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q14u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - d += dest_stride; - - d12s64 = vld1_s64((int64_t *)dest); - q15s16 = vrshrq_n_s16(q15s16, 6); - q15u16 = vaddw_u8(vreinterpretq_u16_s16(q15s16), - vreinterpret_u8_s64(d12s64)); - d12u8 = vqmovun_s16(vreinterpretq_s16_u16(q15u16)); - vst1_u64((uint64_t *)d, vreinterpret_u64_u8(d12u8)); - } else { // skip_adding_dest - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q15s16); - q13s16 = vaddq_s16(q1s16, q14s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q14s16 = vsubq_s16(q1s16, q14s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q10s16, q5s16); - q13s16 = vaddq_s16(q11s16, q4s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q4s16 = vsubq_s16(q11s16, q4s16); - q5s16 = vsubq_s16(q10s16, q5s16); - - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q3s16); - q13s16 = vaddq_s16(q1s16, q2s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q2s16 = vsubq_s16(q1s16, q2s16); - q3s16 = vsubq_s16(q0s16, q3s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q10s16, q9s16); - q13s16 = vaddq_s16(q11s16, q8s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q8s16 = vsubq_s16(q11s16, q8s16); - q9s16 = vsubq_s16(q10s16, q9s16); - - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q8s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q8s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q9s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q9s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q2s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q2s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q3s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q3s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q4s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q4s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q5s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q5s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q14s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q14s16))); - out += 12; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_low_s16(q15s16))); - out += 4; - vst1_u64((uint64_t *)out, vreinterpret_u64_s16(vget_high_s16(q15s16))); - } - return; -} - -void vpx_idct16x16_10_add_neon_pass1( - int16_t *in, - int16_t *out, - int output_stride) { - int16x4_t d4s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - uint64x1_t d4u64, d5u64, d18u64, d19u64, d20u64, d21u64, d22u64, d23u64; - uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; - int16x8_t q0s16, q1s16, q2s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q6s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q15s32; - int16x8x2_t q0x2s16; - - q0x2s16 = vld2q_s16(in); - q8s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q9s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q10s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q11s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q12s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q13s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q14s16 = q0x2s16.val[0]; - in += 16; - q0x2s16 = vld2q_s16(in); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // stage 3 - q0s16 = vdupq_n_s16(cospi_28_64 * 2); - q1s16 = vdupq_n_s16(cospi_4_64 * 2); - - q4s16 = vqrdmulhq_s16(q9s16, q0s16); - q7s16 = vqrdmulhq_s16(q9s16, q1s16); - - // stage 4 - q1s16 = vdupq_n_s16(cospi_16_64 * 2); - d4s16 = vdup_n_s16(cospi_16_64); - - q8s16 = vqrdmulhq_s16(q8s16, q1s16); - - d8s16 = vget_low_s16(q4s16); - d9s16 = vget_high_s16(q4s16); - d14s16 = vget_low_s16(q7s16); - d15s16 = vget_high_s16(q7s16); - q9s32 = vmull_s16(d14s16, d4s16); - q10s32 = vmull_s16(d15s16, d4s16); - q12s32 = vmull_s16(d9s16, d4s16); - q11s32 = vmull_s16(d8s16, d4s16); - - q15s32 = vsubq_s32(q10s32, q12s32); - q6s32 = vsubq_s32(q9s32, q11s32); - q9s32 = vaddq_s32(q9s32, q11s32); - q10s32 = vaddq_s32(q10s32, q12s32); - - d11s16 = vqrshrn_n_s32(q15s32, 14); - d10s16 = vqrshrn_n_s32(q6s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q10s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 6 - q2s16 = vaddq_s16(q8s16, q7s16); - q9s16 = vaddq_s16(q8s16, q6s16); - q10s16 = vaddq_s16(q8s16, q5s16); - q11s16 = vaddq_s16(q8s16, q4s16); - q12s16 = vsubq_s16(q8s16, q4s16); - q13s16 = vsubq_s16(q8s16, q5s16); - q14s16 = vsubq_s16(q8s16, q6s16); - q15s16 = vsubq_s16(q8s16, q7s16); - - d4u64 = vreinterpret_u64_s16(vget_low_s16(q2s16)); - d5u64 = vreinterpret_u64_s16(vget_high_s16(q2s16)); - d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); - d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); - d20u64 = vreinterpret_u64_s16(vget_low_s16(q10s16)); - d21u64 = vreinterpret_u64_s16(vget_high_s16(q10s16)); - d22u64 = vreinterpret_u64_s16(vget_low_s16(q11s16)); - d23u64 = vreinterpret_u64_s16(vget_high_s16(q11s16)); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); - d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); - d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); - d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); - - // store the data - output_stride >>= 1; // output_stride / 2, out is int16_t - vst1_u64((uint64_t *)out, d4u64); - out += output_stride; - vst1_u64((uint64_t *)out, d5u64); - out += output_stride; - vst1_u64((uint64_t *)out, d18u64); - out += output_stride; - vst1_u64((uint64_t *)out, d19u64); - out += output_stride; - vst1_u64((uint64_t *)out, d20u64); - out += output_stride; - vst1_u64((uint64_t *)out, d21u64); - out += output_stride; - vst1_u64((uint64_t *)out, d22u64); - out += output_stride; - vst1_u64((uint64_t *)out, d23u64); - out += output_stride; - vst1_u64((uint64_t *)out, d24u64); - out += output_stride; - vst1_u64((uint64_t *)out, d25u64); - out += output_stride; - vst1_u64((uint64_t *)out, d26u64); - out += output_stride; - vst1_u64((uint64_t *)out, d27u64); - out += output_stride; - vst1_u64((uint64_t *)out, d28u64); - out += output_stride; - vst1_u64((uint64_t *)out, d29u64); - out += output_stride; - vst1_u64((uint64_t *)out, d30u64); - out += output_stride; - vst1_u64((uint64_t *)out, d31u64); - return; -} - -void vpx_idct16x16_10_add_neon_pass2( - int16_t *src, - int16_t *out, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride) { - int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d30s16, d31s16; - uint64x1_t d4u64, d5u64, d6u64, d7u64, d8u64, d9u64, d10u64, d11u64; - uint64x1_t d16u64, d17u64, d18u64, d19u64; - uint64x1_t d24u64, d25u64, d26u64, d27u64, d28u64, d29u64, d30u64, d31u64; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32; - int16x8x2_t q0x2s16; - (void)skip_adding; - (void)dest; - (void)dest_stride; - - q0x2s16 = vld2q_s16(src); - q8s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q9s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q10s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q11s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q12s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q13s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q14s16 = q0x2s16.val[0]; - src += 16; - q0x2s16 = vld2q_s16(src); - q15s16 = q0x2s16.val[0]; - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // stage 3 - q6s16 = vdupq_n_s16(cospi_30_64 * 2); - q0s16 = vqrdmulhq_s16(q8s16, q6s16); - q6s16 = vdupq_n_s16(cospi_2_64 * 2); - q7s16 = vqrdmulhq_s16(q8s16, q6s16); - - q15s16 = vdupq_n_s16(-cospi_26_64 * 2); - q14s16 = vdupq_n_s16(cospi_6_64 * 2); - q3s16 = vqrdmulhq_s16(q9s16, q15s16); - q4s16 = vqrdmulhq_s16(q9s16, q14s16); - - // stage 4 - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - d6s16 = vget_low_s16(q3s16); - d7s16 = vget_high_s16(q3s16); - d8s16 = vget_low_s16(q4s16); - d9s16 = vget_high_s16(q4s16); - d14s16 = vget_low_s16(q7s16); - d15s16 = vget_high_s16(q7s16); - - d30s16 = vdup_n_s16(cospi_8_64); - d31s16 = vdup_n_s16(cospi_24_64); - - q12s32 = vmull_s16(d14s16, d31s16); - q5s32 = vmull_s16(d15s16, d31s16); - q2s32 = vmull_s16(d0s16, d31s16); - q11s32 = vmull_s16(d1s16, d31s16); - - q12s32 = vmlsl_s16(q12s32, d0s16, d30s16); - q5s32 = vmlsl_s16(q5s32, d1s16, d30s16); - q2s32 = vmlal_s16(q2s32, d14s16, d30s16); - q11s32 = vmlal_s16(q11s32, d15s16, d30s16); - - d2s16 = vqrshrn_n_s32(q12s32, 14); - d3s16 = vqrshrn_n_s32(q5s32, 14); - d12s16 = vqrshrn_n_s32(q2s32, 14); - d13s16 = vqrshrn_n_s32(q11s32, 14); - q1s16 = vcombine_s16(d2s16, d3s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - d30s16 = vdup_n_s16(-cospi_8_64); - q10s32 = vmull_s16(d8s16, d30s16); - q13s32 = vmull_s16(d9s16, d30s16); - q8s32 = vmull_s16(d6s16, d30s16); - q9s32 = vmull_s16(d7s16, d30s16); - - q10s32 = vmlsl_s16(q10s32, d6s16, d31s16); - q13s32 = vmlsl_s16(q13s32, d7s16, d31s16); - q8s32 = vmlal_s16(q8s32, d8s16, d31s16); - q9s32 = vmlal_s16(q9s32, d9s16, d31s16); - - d4s16 = vqrshrn_n_s32(q10s32, 14); - d5s16 = vqrshrn_n_s32(q13s32, 14); - d10s16 = vqrshrn_n_s32(q8s32, 14); - d11s16 = vqrshrn_n_s32(q9s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - // stage 5 - q8s16 = vaddq_s16(q0s16, q3s16); - q9s16 = vaddq_s16(q1s16, q2s16); - q10s16 = vsubq_s16(q1s16, q2s16); - q11s16 = vsubq_s16(q0s16, q3s16); - q12s16 = vsubq_s16(q7s16, q4s16); - q13s16 = vsubq_s16(q6s16, q5s16); - q14s16 = vaddq_s16(q6s16, q5s16); - q15s16 = vaddq_s16(q7s16, q4s16); - - // stage 6 - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - - d14s16 = vdup_n_s16(cospi_16_64); - q3s32 = vmull_s16(d26s16, d14s16); - q4s32 = vmull_s16(d27s16, d14s16); - q0s32 = vmull_s16(d20s16, d14s16); - q1s32 = vmull_s16(d21s16, d14s16); - - q5s32 = vsubq_s32(q3s32, q0s32); - q6s32 = vsubq_s32(q4s32, q1s32); - q0s32 = vaddq_s32(q3s32, q0s32); - q4s32 = vaddq_s32(q4s32, q1s32); - - d4s16 = vqrshrn_n_s32(q5s32, 14); - d5s16 = vqrshrn_n_s32(q6s32, 14); - d10s16 = vqrshrn_n_s32(q0s32, 14); - d11s16 = vqrshrn_n_s32(q4s32, 14); - q2s16 = vcombine_s16(d4s16, d5s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q0s32 = vmull_s16(d22s16, d14s16); - q1s32 = vmull_s16(d23s16, d14s16); - q13s32 = vmull_s16(d24s16, d14s16); - q6s32 = vmull_s16(d25s16, d14s16); - - q10s32 = vsubq_s32(q13s32, q0s32); - q4s32 = vsubq_s32(q6s32, q1s32); - q13s32 = vaddq_s32(q13s32, q0s32); - q6s32 = vaddq_s32(q6s32, q1s32); - - d6s16 = vqrshrn_n_s32(q10s32, 14); - d7s16 = vqrshrn_n_s32(q4s32, 14); - d8s16 = vqrshrn_n_s32(q13s32, 14); - d9s16 = vqrshrn_n_s32(q6s32, 14); - q3s16 = vcombine_s16(d6s16, d7s16); - q4s16 = vcombine_s16(d8s16, d9s16); - - // stage 7 - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q15s16); - q13s16 = vaddq_s16(q1s16, q14s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q14s16 = vsubq_s16(q1s16, q14s16); - q15s16 = vsubq_s16(q0s16, q15s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q10s16, q5s16); - q13s16 = vaddq_s16(q11s16, q4s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q4s16 = vsubq_s16(q11s16, q4s16); - q5s16 = vsubq_s16(q10s16, q5s16); - - q0s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q1s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q12s16 = vaddq_s16(q0s16, q3s16); - q13s16 = vaddq_s16(q1s16, q2s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q2s16 = vsubq_s16(q1s16, q2s16); - q3s16 = vsubq_s16(q0s16, q3s16); - - q10s16 = vld1q_s16(pass1Output); - pass1Output += 8; - q11s16 = vld1q_s16(pass1Output); - q12s16 = vaddq_s16(q10s16, q9s16); - q13s16 = vaddq_s16(q11s16, q8s16); - d24u64 = vreinterpret_u64_s16(vget_low_s16(q12s16)); - d25u64 = vreinterpret_u64_s16(vget_high_s16(q12s16)); - d26u64 = vreinterpret_u64_s16(vget_low_s16(q13s16)); - d27u64 = vreinterpret_u64_s16(vget_high_s16(q13s16)); - vst1_u64((uint64_t *)out, d24u64); - out += 4; - vst1_u64((uint64_t *)out, d25u64); - out += 12; - vst1_u64((uint64_t *)out, d26u64); - out += 4; - vst1_u64((uint64_t *)out, d27u64); - out += 12; - q8s16 = vsubq_s16(q11s16, q8s16); - q9s16 = vsubq_s16(q10s16, q9s16); - - d4u64 = vreinterpret_u64_s16(vget_low_s16(q2s16)); - d5u64 = vreinterpret_u64_s16(vget_high_s16(q2s16)); - d6u64 = vreinterpret_u64_s16(vget_low_s16(q3s16)); - d7u64 = vreinterpret_u64_s16(vget_high_s16(q3s16)); - d8u64 = vreinterpret_u64_s16(vget_low_s16(q4s16)); - d9u64 = vreinterpret_u64_s16(vget_high_s16(q4s16)); - d10u64 = vreinterpret_u64_s16(vget_low_s16(q5s16)); - d11u64 = vreinterpret_u64_s16(vget_high_s16(q5s16)); - d16u64 = vreinterpret_u64_s16(vget_low_s16(q8s16)); - d17u64 = vreinterpret_u64_s16(vget_high_s16(q8s16)); - d18u64 = vreinterpret_u64_s16(vget_low_s16(q9s16)); - d19u64 = vreinterpret_u64_s16(vget_high_s16(q9s16)); - d28u64 = vreinterpret_u64_s16(vget_low_s16(q14s16)); - d29u64 = vreinterpret_u64_s16(vget_high_s16(q14s16)); - d30u64 = vreinterpret_u64_s16(vget_low_s16(q15s16)); - d31u64 = vreinterpret_u64_s16(vget_high_s16(q15s16)); - - vst1_u64((uint64_t *)out, d16u64); - out += 4; - vst1_u64((uint64_t *)out, d17u64); - out += 12; - vst1_u64((uint64_t *)out, d18u64); - out += 4; - vst1_u64((uint64_t *)out, d19u64); - out += 12; - vst1_u64((uint64_t *)out, d4u64); - out += 4; - vst1_u64((uint64_t *)out, d5u64); - out += 12; - vst1_u64((uint64_t *)out, d6u64); - out += 4; - vst1_u64((uint64_t *)out, d7u64); - out += 12; - vst1_u64((uint64_t *)out, d8u64); - out += 4; - vst1_u64((uint64_t *)out, d9u64); - out += 12; - vst1_u64((uint64_t *)out, d10u64); - out += 4; - vst1_u64((uint64_t *)out, d11u64); - out += 12; - vst1_u64((uint64_t *)out, d28u64); - out += 4; - vst1_u64((uint64_t *)out, d29u64); - out += 12; - vst1_u64((uint64_t *)out, d30u64); - out += 4; - vst1_u64((uint64_t *)out, d31u64); - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c deleted file mode 100644 index 352979aa16..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct16x16_neon.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vpx_dsp/vpx_dsp_common.h" - -void vpx_idct16x16_256_add_neon_pass1(const int16_t *input, - int16_t *output, - int output_stride); -void vpx_idct16x16_256_add_neon_pass2(const int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); -void vpx_idct16x16_10_add_neon_pass1(const int16_t *input, - int16_t *output, - int output_stride); -void vpx_idct16x16_10_add_neon_pass2(const int16_t *src, - int16_t *output, - int16_t *pass1Output, - int16_t skip_adding, - uint8_t *dest, - int dest_stride); - -#if HAVE_NEON_ASM -/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ -extern void vpx_push_neon(int64_t *store); -extern void vpx_pop_neon(int64_t *store); -#endif // HAVE_NEON_ASM - -void vpx_idct16x16_256_add_neon(const int16_t *input, - uint8_t *dest, int dest_stride) { -#if HAVE_NEON_ASM - int64_t store_reg[8]; -#endif - int16_t pass1_output[16*16] = {0}; - int16_t row_idct_output[16*16] = {0}; - -#if HAVE_NEON_ASM - // save d8-d15 register values. - vpx_push_neon(store_reg); -#endif - - /* Parallel idct on the upper 8 rows */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(input, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7 - // which will be saved into row_idct_output. - vpx_idct16x16_256_add_neon_pass2(input+1, - row_idct_output, - pass1_output, - 0, - dest, - dest_stride); - - /* Parallel idct on the lower 8 rows */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7 - // which will be saved into row_idct_output. - vpx_idct16x16_256_add_neon_pass2(input+8*16+1, - row_idct_output+8, - pass1_output, - 0, - dest, - dest_stride); - - /* Parallel idct on the left 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, - row_idct_output, - pass1_output, - 1, - dest, - dest_stride); - - /* Parallel idct on the right 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, - row_idct_output+8, - pass1_output, - 1, - dest+8, - dest_stride); - -#if HAVE_NEON_ASM - // restore d8-d15 register values. - vpx_pop_neon(store_reg); -#endif - - return; -} - -void vpx_idct16x16_10_add_neon(const int16_t *input, - uint8_t *dest, int dest_stride) { -#if HAVE_NEON_ASM - int64_t store_reg[8]; -#endif - int16_t pass1_output[16*16] = {0}; - int16_t row_idct_output[16*16] = {0}; - -#if HAVE_NEON_ASM - // save d8-d15 register values. - vpx_push_neon(store_reg); -#endif - - /* Parallel idct on the upper 8 rows */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_10_add_neon_pass1(input, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7 - // which will be saved into row_idct_output. - vpx_idct16x16_10_add_neon_pass2(input+1, - row_idct_output, - pass1_output, - 0, - dest, - dest_stride); - - /* Skip Parallel idct on the lower 8 rows as they are all 0s */ - - /* Parallel idct on the left 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+1, - row_idct_output, - pass1_output, - 1, - dest, - dest_stride); - - /* Parallel idct on the right 8 columns */ - // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the - // stage 6 result in pass1_output. - vpx_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8); - - // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines - // with result in pass1(pass1_output) to calculate final result in stage 7. - // Then add the result to the destination data. - vpx_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1, - row_idct_output+8, - pass1_output, - 1, - dest+8, - dest_stride); - -#if HAVE_NEON_ASM - // restore d8-d15 register values. - vpx_pop_neon(store_reg); -#endif - - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c deleted file mode 100644 index c25c0c4a5c..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_config.h" - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -static INLINE void LD_16x8( - uint8_t *d, - int d_stride, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - *q8u8 = vld1q_u8(d); - d += d_stride; - *q9u8 = vld1q_u8(d); - d += d_stride; - *q10u8 = vld1q_u8(d); - d += d_stride; - *q11u8 = vld1q_u8(d); - d += d_stride; - *q12u8 = vld1q_u8(d); - d += d_stride; - *q13u8 = vld1q_u8(d); - d += d_stride; - *q14u8 = vld1q_u8(d); - d += d_stride; - *q15u8 = vld1q_u8(d); - return; -} - -static INLINE void ADD_DIFF_16x8( - uint8x16_t qdiffu8, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - *q8u8 = vqaddq_u8(*q8u8, qdiffu8); - *q9u8 = vqaddq_u8(*q9u8, qdiffu8); - *q10u8 = vqaddq_u8(*q10u8, qdiffu8); - *q11u8 = vqaddq_u8(*q11u8, qdiffu8); - *q12u8 = vqaddq_u8(*q12u8, qdiffu8); - *q13u8 = vqaddq_u8(*q13u8, qdiffu8); - *q14u8 = vqaddq_u8(*q14u8, qdiffu8); - *q15u8 = vqaddq_u8(*q15u8, qdiffu8); - return; -} - -static INLINE void SUB_DIFF_16x8( - uint8x16_t qdiffu8, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - *q8u8 = vqsubq_u8(*q8u8, qdiffu8); - *q9u8 = vqsubq_u8(*q9u8, qdiffu8); - *q10u8 = vqsubq_u8(*q10u8, qdiffu8); - *q11u8 = vqsubq_u8(*q11u8, qdiffu8); - *q12u8 = vqsubq_u8(*q12u8, qdiffu8); - *q13u8 = vqsubq_u8(*q13u8, qdiffu8); - *q14u8 = vqsubq_u8(*q14u8, qdiffu8); - *q15u8 = vqsubq_u8(*q15u8, qdiffu8); - return; -} - -static INLINE void ST_16x8( - uint8_t *d, - int d_stride, - uint8x16_t *q8u8, - uint8x16_t *q9u8, - uint8x16_t *q10u8, - uint8x16_t *q11u8, - uint8x16_t *q12u8, - uint8x16_t *q13u8, - uint8x16_t *q14u8, - uint8x16_t *q15u8) { - vst1q_u8(d, *q8u8); - d += d_stride; - vst1q_u8(d, *q9u8); - d += d_stride; - vst1q_u8(d, *q10u8); - d += d_stride; - vst1q_u8(d, *q11u8); - d += d_stride; - vst1q_u8(d, *q12u8); - d += d_stride; - vst1q_u8(d, *q13u8); - d += d_stride; - vst1q_u8(d, *q14u8); - d += d_stride; - vst1q_u8(d, *q15u8); - return; -} - -void vpx_idct32x32_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int i, j, dest_stride8; - uint8_t *d; - int16_t a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 6); - - dest_stride8 = dest_stride * 8; - if (a1 >= 0) { // diff_positive_32_32 - a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; - q0u8 = vdupq_n_u8(a1); - for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop - d = dest; - for (j = 0; j < 4; j++) { - LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - d += dest_stride8; - } - } - } else { // diff_negative_32_32 - a1 = -a1; - a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1; - q0u8 = vdupq_n_u8(a1); - for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop - d = dest; - for (j = 0; j < 4; j++) { - LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8, - &q12u8, &q13u8, &q14u8, &q15u8); - d += dest_stride8; - } - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c deleted file mode 100644 index 025437eb96..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct32x32_add_neon.c +++ /dev/null @@ -1,719 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" - -#define LOAD_FROM_TRANSPOSED(prev, first, second) \ - q14s16 = vld1q_s16(trans_buf + first * 8); \ - q13s16 = vld1q_s16(trans_buf + second * 8); - -#define LOAD_FROM_OUTPUT(prev, first, second, qA, qB) \ - qA = vld1q_s16(out + first * 32); \ - qB = vld1q_s16(out + second * 32); - -#define STORE_IN_OUTPUT(prev, first, second, qA, qB) \ - vst1q_s16(out + first * 32, qA); \ - vst1q_s16(out + second * 32, qB); - -#define STORE_COMBINE_CENTER_RESULTS(r10, r9) \ - __STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, \ - q6s16, q7s16, q8s16, q9s16); -static INLINE void __STORE_COMBINE_CENTER_RESULTS( - uint8_t *p1, - uint8_t *p2, - int stride, - int16x8_t q6s16, - int16x8_t q7s16, - int16x8_t q8s16, - int16x8_t q9s16) { - int16x4_t d8s16, d9s16, d10s16, d11s16; - - d8s16 = vld1_s16((int16_t *)p1); - p1 += stride; - d11s16 = vld1_s16((int16_t *)p2); - p2 -= stride; - d9s16 = vld1_s16((int16_t *)p1); - d10s16 = vld1_s16((int16_t *)p2); - - q7s16 = vrshrq_n_s16(q7s16, 6); - q8s16 = vrshrq_n_s16(q8s16, 6); - q9s16 = vrshrq_n_s16(q9s16, 6); - q6s16 = vrshrq_n_s16(q6s16, 6); - - q7s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q7s16), - vreinterpret_u8_s16(d9s16))); - q8s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_s16(d10s16))); - q9s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_s16(d11s16))); - q6s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q6s16), - vreinterpret_u8_s16(d8s16))); - - d9s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16)); - d10s16 = vreinterpret_s16_u8(vqmovun_s16(q8s16)); - d11s16 = vreinterpret_s16_u8(vqmovun_s16(q9s16)); - d8s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16)); - - vst1_s16((int16_t *)p1, d9s16); - p1 -= stride; - vst1_s16((int16_t *)p2, d10s16); - p2 += stride; - vst1_s16((int16_t *)p1, d8s16); - vst1_s16((int16_t *)p2, d11s16); - return; -} - -#define STORE_COMBINE_EXTREME_RESULTS(r7, r6); \ - __STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, \ - q4s16, q5s16, q6s16, q7s16); -static INLINE void __STORE_COMBINE_EXTREME_RESULTS( - uint8_t *p1, - uint8_t *p2, - int stride, - int16x8_t q4s16, - int16x8_t q5s16, - int16x8_t q6s16, - int16x8_t q7s16) { - int16x4_t d4s16, d5s16, d6s16, d7s16; - - d4s16 = vld1_s16((int16_t *)p1); - p1 += stride; - d7s16 = vld1_s16((int16_t *)p2); - p2 -= stride; - d5s16 = vld1_s16((int16_t *)p1); - d6s16 = vld1_s16((int16_t *)p2); - - q5s16 = vrshrq_n_s16(q5s16, 6); - q6s16 = vrshrq_n_s16(q6s16, 6); - q7s16 = vrshrq_n_s16(q7s16, 6); - q4s16 = vrshrq_n_s16(q4s16, 6); - - q5s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q5s16), - vreinterpret_u8_s16(d5s16))); - q6s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q6s16), - vreinterpret_u8_s16(d6s16))); - q7s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q7s16), - vreinterpret_u8_s16(d7s16))); - q4s16 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q4s16), - vreinterpret_u8_s16(d4s16))); - - d5s16 = vreinterpret_s16_u8(vqmovun_s16(q5s16)); - d6s16 = vreinterpret_s16_u8(vqmovun_s16(q6s16)); - d7s16 = vreinterpret_s16_u8(vqmovun_s16(q7s16)); - d4s16 = vreinterpret_s16_u8(vqmovun_s16(q4s16)); - - vst1_s16((int16_t *)p1, d5s16); - p1 -= stride; - vst1_s16((int16_t *)p2, d6s16); - p2 += stride; - vst1_s16((int16_t *)p2, d7s16); - vst1_s16((int16_t *)p1, d4s16); - return; -} - -#define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \ - DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB); -static INLINE void DO_BUTTERFLY( - int16x8_t q14s16, - int16x8_t q13s16, - int16_t first_const, - int16_t second_const, - int16x8_t *qAs16, - int16x8_t *qBs16) { - int16x4_t d30s16, d31s16; - int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q15s32; - int16x4_t dCs16, dDs16, dAs16, dBs16; - - dCs16 = vget_low_s16(q14s16); - dDs16 = vget_high_s16(q14s16); - dAs16 = vget_low_s16(q13s16); - dBs16 = vget_high_s16(q13s16); - - d30s16 = vdup_n_s16(first_const); - d31s16 = vdup_n_s16(second_const); - - q8s32 = vmull_s16(dCs16, d30s16); - q10s32 = vmull_s16(dAs16, d31s16); - q9s32 = vmull_s16(dDs16, d30s16); - q11s32 = vmull_s16(dBs16, d31s16); - q12s32 = vmull_s16(dCs16, d31s16); - - q8s32 = vsubq_s32(q8s32, q10s32); - q9s32 = vsubq_s32(q9s32, q11s32); - - q10s32 = vmull_s16(dDs16, d31s16); - q11s32 = vmull_s16(dAs16, d30s16); - q15s32 = vmull_s16(dBs16, d30s16); - - q11s32 = vaddq_s32(q12s32, q11s32); - q10s32 = vaddq_s32(q10s32, q15s32); - - *qAs16 = vcombine_s16(vqrshrn_n_s32(q8s32, 14), - vqrshrn_n_s32(q9s32, 14)); - *qBs16 = vcombine_s16(vqrshrn_n_s32(q11s32, 14), - vqrshrn_n_s32(q10s32, 14)); - return; -} - -static INLINE void idct32_transpose_pair( - int16_t *input, - int16_t *t_buf) { - int16_t *in; - int i; - const int stride = 32; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - for (i = 0; i < 4; i++, input += 8) { - in = input; - q8s16 = vld1q_s16(in); - in += stride; - q9s16 = vld1q_s16(in); - in += stride; - q10s16 = vld1q_s16(in); - in += stride; - q11s16 = vld1q_s16(in); - in += stride; - q12s16 = vld1q_s16(in); - in += stride; - q13s16 = vld1q_s16(in); - in += stride; - q14s16 = vld1q_s16(in); - in += stride; - q15s16 = vld1q_s16(in); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - d20s16 = vget_low_s16(q10s16); - d21s16 = vget_high_s16(q10s16); - d22s16 = vget_low_s16(q11s16); - d23s16 = vget_high_s16(q11s16); - d24s16 = vget_low_s16(q12s16); - d25s16 = vget_high_s16(q12s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - d30s16 = vget_low_s16(q15s16); - d31s16 = vget_high_s16(q15s16); - - q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - q12s16 = vcombine_s16(d17s16, d25s16); - q13s16 = vcombine_s16(d19s16, d27s16); - q14s16 = vcombine_s16(d21s16, d29s16); - q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), - vreinterpretq_s32_s16(q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q9s16), - vreinterpretq_s32_s16(q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q12s16), - vreinterpretq_s32_s16(q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q13s16), - vreinterpretq_s32_s16(q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - vst1q_s16(t_buf, q0x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q0x2s16.val[1]); - t_buf += 8; - vst1q_s16(t_buf, q1x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q1x2s16.val[1]); - t_buf += 8; - vst1q_s16(t_buf, q2x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q2x2s16.val[1]); - t_buf += 8; - vst1q_s16(t_buf, q3x2s16.val[0]); - t_buf += 8; - vst1q_s16(t_buf, q3x2s16.val[1]); - t_buf += 8; - } - return; -} - -static INLINE void idct32_bands_end_1st_pass( - int16_t *out, - int16x8_t q2s16, - int16x8_t q3s16, - int16x8_t q6s16, - int16x8_t q7s16, - int16x8_t q8s16, - int16x8_t q9s16, - int16x8_t q10s16, - int16x8_t q11s16, - int16x8_t q12s16, - int16x8_t q13s16, - int16x8_t q14s16, - int16x8_t q15s16) { - int16x8_t q0s16, q1s16, q4s16, q5s16; - - STORE_IN_OUTPUT(17, 16, 17, q6s16, q7s16); - STORE_IN_OUTPUT(17, 14, 15, q8s16, q9s16); - - LOAD_FROM_OUTPUT(15, 30, 31, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(31, 30, 31, q6s16, q7s16); - STORE_IN_OUTPUT(31, 0, 1, q4s16, q5s16); - - LOAD_FROM_OUTPUT(1, 12, 13, q0s16, q1s16); - q2s16 = vaddq_s16(q10s16, q1s16); - q3s16 = vaddq_s16(q11s16, q0s16); - q4s16 = vsubq_s16(q11s16, q0s16); - q5s16 = vsubq_s16(q10s16, q1s16); - - LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16); - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_IN_OUTPUT(19, 18, 19, q6s16, q7s16); - STORE_IN_OUTPUT(19, 12, 13, q8s16, q9s16); - - LOAD_FROM_OUTPUT(13, 28, 29, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(29, 28, 29, q6s16, q7s16); - STORE_IN_OUTPUT(29, 2, 3, q4s16, q5s16); - - LOAD_FROM_OUTPUT(3, 10, 11, q0s16, q1s16); - q2s16 = vaddq_s16(q12s16, q1s16); - q3s16 = vaddq_s16(q13s16, q0s16); - q4s16 = vsubq_s16(q13s16, q0s16); - q5s16 = vsubq_s16(q12s16, q1s16); - - LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16); - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_IN_OUTPUT(21, 20, 21, q6s16, q7s16); - STORE_IN_OUTPUT(21, 10, 11, q8s16, q9s16); - - LOAD_FROM_OUTPUT(11, 26, 27, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(27, 26, 27, q6s16, q7s16); - STORE_IN_OUTPUT(27, 4, 5, q4s16, q5s16); - - LOAD_FROM_OUTPUT(5, 8, 9, q0s16, q1s16); - q2s16 = vaddq_s16(q14s16, q1s16); - q3s16 = vaddq_s16(q15s16, q0s16); - q4s16 = vsubq_s16(q15s16, q0s16); - q5s16 = vsubq_s16(q14s16, q1s16); - - LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16); - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_IN_OUTPUT(23, 22, 23, q6s16, q7s16); - STORE_IN_OUTPUT(23, 8, 9, q8s16, q9s16); - - LOAD_FROM_OUTPUT(9, 24, 25, q0s16, q1s16); - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_IN_OUTPUT(25, 24, 25, q6s16, q7s16); - STORE_IN_OUTPUT(25, 6, 7, q4s16, q5s16); - return; -} - -static INLINE void idct32_bands_end_2nd_pass( - int16_t *out, - uint8_t *dest, - int stride, - int16x8_t q2s16, - int16x8_t q3s16, - int16x8_t q6s16, - int16x8_t q7s16, - int16x8_t q8s16, - int16x8_t q9s16, - int16x8_t q10s16, - int16x8_t q11s16, - int16x8_t q12s16, - int16x8_t q13s16, - int16x8_t q14s16, - int16x8_t q15s16) { - uint8_t *r6 = dest + 31 * stride; - uint8_t *r7 = dest/* + 0 * stride*/; - uint8_t *r9 = dest + 15 * stride; - uint8_t *r10 = dest + 16 * stride; - int str2 = stride << 1; - int16x8_t q0s16, q1s16, q4s16, q5s16; - - STORE_COMBINE_CENTER_RESULTS(r10, r9); - r10 += str2; r9 -= str2; - - LOAD_FROM_OUTPUT(17, 30, 31, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - r7 += str2; r6 -= str2; - - LOAD_FROM_OUTPUT(31, 12, 13, q0s16, q1s16) - q2s16 = vaddq_s16(q10s16, q1s16); - q3s16 = vaddq_s16(q11s16, q0s16); - q4s16 = vsubq_s16(q11s16, q0s16); - q5s16 = vsubq_s16(q10s16, q1s16); - - LOAD_FROM_OUTPUT(13, 18, 19, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_COMBINE_CENTER_RESULTS(r10, r9); - r10 += str2; r9 -= str2; - - LOAD_FROM_OUTPUT(19, 28, 29, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - r7 += str2; r6 -= str2; - - LOAD_FROM_OUTPUT(29, 10, 11, q0s16, q1s16) - q2s16 = vaddq_s16(q12s16, q1s16); - q3s16 = vaddq_s16(q13s16, q0s16); - q4s16 = vsubq_s16(q13s16, q0s16); - q5s16 = vsubq_s16(q12s16, q1s16); - - LOAD_FROM_OUTPUT(11, 20, 21, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_COMBINE_CENTER_RESULTS(r10, r9); - r10 += str2; r9 -= str2; - - LOAD_FROM_OUTPUT(21, 26, 27, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - r7 += str2; r6 -= str2; - - LOAD_FROM_OUTPUT(27, 8, 9, q0s16, q1s16) - q2s16 = vaddq_s16(q14s16, q1s16); - q3s16 = vaddq_s16(q15s16, q0s16); - q4s16 = vsubq_s16(q15s16, q0s16); - q5s16 = vsubq_s16(q14s16, q1s16); - - LOAD_FROM_OUTPUT(9, 22, 23, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - STORE_COMBINE_CENTER_RESULTS(r10, r9); - - LOAD_FROM_OUTPUT(23, 24, 25, q0s16, q1s16) - q4s16 = vaddq_s16(q2s16, q1s16); - q5s16 = vaddq_s16(q3s16, q0s16); - q6s16 = vsubq_s16(q3s16, q0s16); - q7s16 = vsubq_s16(q2s16, q1s16); - STORE_COMBINE_EXTREME_RESULTS(r7, r6); - return; -} - -void vpx_idct32x32_1024_add_neon( - int16_t *input, - uint8_t *dest, - int stride) { - int i, idct32_pass_loop; - int16_t trans_buf[32 * 8]; - int16_t pass1[32 * 32]; - int16_t pass2[32 * 32]; - int16_t *out; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - - for (idct32_pass_loop = 0, out = pass1; - idct32_pass_loop < 2; - idct32_pass_loop++, - input = pass1, // the input of pass2 is the result of pass1 - out = pass2) { - for (i = 0; - i < 4; i++, - input += 32 * 8, out += 8) { // idct32_bands_loop - idct32_transpose_pair(input, trans_buf); - - // ----------------------------------------- - // BLOCK A: 16-19,28-31 - // ----------------------------------------- - // generate 16,17,30,31 - // part of stage 1 - LOAD_FROM_TRANSPOSED(0, 1, 31) - DO_BUTTERFLY_STD(cospi_31_64, cospi_1_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(31, 17, 15) - DO_BUTTERFLY_STD(cospi_15_64, cospi_17_64, &q1s16, &q3s16) - // part of stage 2 - q4s16 = vaddq_s16(q0s16, q1s16); - q13s16 = vsubq_s16(q0s16, q1s16); - q6s16 = vaddq_s16(q2s16, q3s16); - q14s16 = vsubq_s16(q2s16, q3s16); - // part of stage 3 - DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q5s16, &q7s16) - - // generate 18,19,28,29 - // part of stage 1 - LOAD_FROM_TRANSPOSED(15, 9, 23) - DO_BUTTERFLY_STD(cospi_23_64, cospi_9_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(23, 25, 7) - DO_BUTTERFLY_STD(cospi_7_64, cospi_25_64, &q1s16, &q3s16) - // part of stage 2 - q13s16 = vsubq_s16(q3s16, q2s16); - q3s16 = vaddq_s16(q3s16, q2s16); - q14s16 = vsubq_s16(q1s16, q0s16); - q2s16 = vaddq_s16(q1s16, q0s16); - // part of stage 3 - DO_BUTTERFLY_STD(-cospi_4_64, -cospi_28_64, &q1s16, &q0s16) - // part of stage 4 - q8s16 = vaddq_s16(q4s16, q2s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q10s16 = vaddq_s16(q7s16, q1s16); - q15s16 = vaddq_s16(q6s16, q3s16); - q13s16 = vsubq_s16(q5s16, q0s16); - q14s16 = vsubq_s16(q7s16, q1s16); - STORE_IN_OUTPUT(0, 16, 31, q8s16, q15s16) - STORE_IN_OUTPUT(31, 17, 30, q9s16, q10s16) - // part of stage 5 - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q0s16, &q1s16) - STORE_IN_OUTPUT(30, 29, 18, q1s16, q0s16) - // part of stage 4 - q13s16 = vsubq_s16(q4s16, q2s16); - q14s16 = vsubq_s16(q6s16, q3s16); - // part of stage 5 - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q4s16, &q6s16) - STORE_IN_OUTPUT(18, 19, 28, q4s16, q6s16) - - // ----------------------------------------- - // BLOCK B: 20-23,24-27 - // ----------------------------------------- - // generate 20,21,26,27 - // part of stage 1 - LOAD_FROM_TRANSPOSED(7, 5, 27) - DO_BUTTERFLY_STD(cospi_27_64, cospi_5_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(27, 21, 11) - DO_BUTTERFLY_STD(cospi_11_64, cospi_21_64, &q1s16, &q3s16) - // part of stage 2 - q13s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q14s16 = vsubq_s16(q2s16, q3s16); - q2s16 = vaddq_s16(q2s16, q3s16); - // part of stage 3 - DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16) - - // generate 22,23,24,25 - // part of stage 1 - LOAD_FROM_TRANSPOSED(11, 13, 19) - DO_BUTTERFLY_STD(cospi_19_64, cospi_13_64, &q5s16, &q7s16) - LOAD_FROM_TRANSPOSED(19, 29, 3) - DO_BUTTERFLY_STD(cospi_3_64, cospi_29_64, &q4s16, &q6s16) - // part of stage 2 - q14s16 = vsubq_s16(q4s16, q5s16); - q5s16 = vaddq_s16(q4s16, q5s16); - q13s16 = vsubq_s16(q6s16, q7s16); - q6s16 = vaddq_s16(q6s16, q7s16); - // part of stage 3 - DO_BUTTERFLY_STD(-cospi_20_64, -cospi_12_64, &q4s16, &q7s16) - // part of stage 4 - q10s16 = vaddq_s16(q7s16, q1s16); - q11s16 = vaddq_s16(q5s16, q0s16); - q12s16 = vaddq_s16(q6s16, q2s16); - q15s16 = vaddq_s16(q4s16, q3s16); - // part of stage 6 - LOAD_FROM_OUTPUT(28, 16, 17, q14s16, q13s16) - q8s16 = vaddq_s16(q14s16, q11s16); - q9s16 = vaddq_s16(q13s16, q10s16); - q13s16 = vsubq_s16(q13s16, q10s16); - q11s16 = vsubq_s16(q14s16, q11s16); - STORE_IN_OUTPUT(17, 17, 16, q9s16, q8s16) - LOAD_FROM_OUTPUT(16, 30, 31, q14s16, q9s16) - q8s16 = vsubq_s16(q9s16, q12s16); - q10s16 = vaddq_s16(q14s16, q15s16); - q14s16 = vsubq_s16(q14s16, q15s16); - q12s16 = vaddq_s16(q9s16, q12s16); - STORE_IN_OUTPUT(31, 30, 31, q10s16, q12s16) - // part of stage 7 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) - STORE_IN_OUTPUT(31, 25, 22, q14s16, q13s16) - q13s16 = q11s16; - q14s16 = q8s16; - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) - STORE_IN_OUTPUT(22, 24, 23, q14s16, q13s16) - // part of stage 4 - q14s16 = vsubq_s16(q5s16, q0s16); - q13s16 = vsubq_s16(q6s16, q2s16); - DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q5s16, &q6s16); - q14s16 = vsubq_s16(q7s16, q1s16); - q13s16 = vsubq_s16(q4s16, q3s16); - DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q0s16, &q1s16); - // part of stage 6 - LOAD_FROM_OUTPUT(23, 18, 19, q14s16, q13s16) - q8s16 = vaddq_s16(q14s16, q1s16); - q9s16 = vaddq_s16(q13s16, q6s16); - q13s16 = vsubq_s16(q13s16, q6s16); - q1s16 = vsubq_s16(q14s16, q1s16); - STORE_IN_OUTPUT(19, 18, 19, q8s16, q9s16) - LOAD_FROM_OUTPUT(19, 28, 29, q8s16, q9s16) - q14s16 = vsubq_s16(q8s16, q5s16); - q10s16 = vaddq_s16(q8s16, q5s16); - q11s16 = vaddq_s16(q9s16, q0s16); - q0s16 = vsubq_s16(q9s16, q0s16); - STORE_IN_OUTPUT(29, 28, 29, q10s16, q11s16) - // part of stage 7 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q13s16, &q14s16) - STORE_IN_OUTPUT(29, 20, 27, q13s16, q14s16) - DO_BUTTERFLY(q0s16, q1s16, cospi_16_64, cospi_16_64, - &q1s16, &q0s16); - STORE_IN_OUTPUT(27, 21, 26, q1s16, q0s16) - - // ----------------------------------------- - // BLOCK C: 8-10,11-15 - // ----------------------------------------- - // generate 8,9,14,15 - // part of stage 2 - LOAD_FROM_TRANSPOSED(3, 2, 30) - DO_BUTTERFLY_STD(cospi_30_64, cospi_2_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(30, 18, 14) - DO_BUTTERFLY_STD(cospi_14_64, cospi_18_64, &q1s16, &q3s16) - // part of stage 3 - q13s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q14s16 = vsubq_s16(q2s16, q3s16); - q2s16 = vaddq_s16(q2s16, q3s16); - // part of stage 4 - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q1s16, &q3s16) - - // generate 10,11,12,13 - // part of stage 2 - LOAD_FROM_TRANSPOSED(14, 10, 22) - DO_BUTTERFLY_STD(cospi_22_64, cospi_10_64, &q5s16, &q7s16) - LOAD_FROM_TRANSPOSED(22, 26, 6) - DO_BUTTERFLY_STD(cospi_6_64, cospi_26_64, &q4s16, &q6s16) - // part of stage 3 - q14s16 = vsubq_s16(q4s16, q5s16); - q5s16 = vaddq_s16(q4s16, q5s16); - q13s16 = vsubq_s16(q6s16, q7s16); - q6s16 = vaddq_s16(q6s16, q7s16); - // part of stage 4 - DO_BUTTERFLY_STD(-cospi_8_64, -cospi_24_64, &q4s16, &q7s16) - // part of stage 5 - q8s16 = vaddq_s16(q0s16, q5s16); - q9s16 = vaddq_s16(q1s16, q7s16); - q13s16 = vsubq_s16(q1s16, q7s16); - q14s16 = vsubq_s16(q3s16, q4s16); - q10s16 = vaddq_s16(q3s16, q4s16); - q15s16 = vaddq_s16(q2s16, q6s16); - STORE_IN_OUTPUT(26, 8, 15, q8s16, q15s16) - STORE_IN_OUTPUT(15, 9, 14, q9s16, q10s16) - // part of stage 6 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) - STORE_IN_OUTPUT(14, 13, 10, q3s16, q1s16) - q13s16 = vsubq_s16(q0s16, q5s16); - q14s16 = vsubq_s16(q2s16, q6s16); - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) - STORE_IN_OUTPUT(10, 11, 12, q1s16, q3s16) - - // ----------------------------------------- - // BLOCK D: 0-3,4-7 - // ----------------------------------------- - // generate 4,5,6,7 - // part of stage 3 - LOAD_FROM_TRANSPOSED(6, 4, 28) - DO_BUTTERFLY_STD(cospi_28_64, cospi_4_64, &q0s16, &q2s16) - LOAD_FROM_TRANSPOSED(28, 20, 12) - DO_BUTTERFLY_STD(cospi_12_64, cospi_20_64, &q1s16, &q3s16) - // part of stage 4 - q13s16 = vsubq_s16(q0s16, q1s16); - q0s16 = vaddq_s16(q0s16, q1s16); - q14s16 = vsubq_s16(q2s16, q3s16); - q2s16 = vaddq_s16(q2s16, q3s16); - // part of stage 5 - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q1s16, &q3s16) - - // generate 0,1,2,3 - // part of stage 4 - LOAD_FROM_TRANSPOSED(12, 0, 16) - DO_BUTTERFLY_STD(cospi_16_64, cospi_16_64, &q5s16, &q7s16) - LOAD_FROM_TRANSPOSED(16, 8, 24) - DO_BUTTERFLY_STD(cospi_24_64, cospi_8_64, &q14s16, &q6s16) - // part of stage 5 - q4s16 = vaddq_s16(q7s16, q6s16); - q7s16 = vsubq_s16(q7s16, q6s16); - q6s16 = vsubq_s16(q5s16, q14s16); - q5s16 = vaddq_s16(q5s16, q14s16); - // part of stage 6 - q8s16 = vaddq_s16(q4s16, q2s16); - q9s16 = vaddq_s16(q5s16, q3s16); - q10s16 = vaddq_s16(q6s16, q1s16); - q11s16 = vaddq_s16(q7s16, q0s16); - q12s16 = vsubq_s16(q7s16, q0s16); - q13s16 = vsubq_s16(q6s16, q1s16); - q14s16 = vsubq_s16(q5s16, q3s16); - q15s16 = vsubq_s16(q4s16, q2s16); - // part of stage 7 - LOAD_FROM_OUTPUT(12, 14, 15, q0s16, q1s16) - q2s16 = vaddq_s16(q8s16, q1s16); - q3s16 = vaddq_s16(q9s16, q0s16); - q4s16 = vsubq_s16(q9s16, q0s16); - q5s16 = vsubq_s16(q8s16, q1s16); - LOAD_FROM_OUTPUT(15, 16, 17, q0s16, q1s16) - q8s16 = vaddq_s16(q4s16, q1s16); - q9s16 = vaddq_s16(q5s16, q0s16); - q6s16 = vsubq_s16(q5s16, q0s16); - q7s16 = vsubq_s16(q4s16, q1s16); - - if (idct32_pass_loop == 0) { - idct32_bands_end_1st_pass(out, - q2s16, q3s16, q6s16, q7s16, q8s16, q9s16, - q10s16, q11s16, q12s16, q13s16, q14s16, q15s16); - } else { - idct32_bands_end_2nd_pass(out, dest, stride, - q2s16, q3s16, q6s16, q7s16, q8s16, q9s16, - q10s16, q11s16, q12s16, q13s16, q14s16, q15s16); - dest += 8; - } - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c deleted file mode 100644 index ea618700c9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vpx_idct4x4_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d6u8; - uint32x2_t d2u32 = vdup_n_u32(0); - uint16x8_t q8u16; - int16x8_t q0s16; - uint8_t *d1, *d2; - int16_t i, a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 4); - - q0s16 = vdupq_n_s16(a1); - - // dc_only_idct_add - d1 = d2 = dest; - for (i = 0; i < 2; i++) { - d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 0); - d1 += dest_stride; - d2u32 = vld1_lane_u32((const uint32_t *)d1, d2u32, 1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q0s16), - vreinterpret_u8_u32(d2u32)); - d6u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - - vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 0); - d2 += dest_stride; - vst1_lane_u32((uint32_t *)d2, vreinterpret_u32_u8(d6u8), 1); - d2 += dest_stride; - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c deleted file mode 100644 index 3c975c99b7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct4x4_add_neon.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -void vpx_idct4x4_16_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d26u8, d27u8; - uint32x2_t d26u32, d27u32; - uint16x8_t q8u16, q9u16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16; - int16x4_t d22s16, d23s16, d24s16, d26s16, d27s16, d28s16, d29s16; - int16x8_t q8s16, q9s16, q13s16, q14s16; - int32x4_t q1s32, q13s32, q14s32, q15s32; - int16x4x2_t d0x2s16, d1x2s16; - int32x4x2_t q0x2s32; - uint8_t *d; - int16_t cospi_8_64 = 15137; - int16_t cospi_16_64 = 11585; - int16_t cospi_24_64 = 6270; - - d26u32 = d27u32 = vdup_n_u32(0); - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_low_s16(q9s16); - d19s16 = vget_high_s16(q9s16); - - d0x2s16 = vtrn_s16(d16s16, d17s16); - d1x2s16 = vtrn_s16(d18s16, d19s16); - q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]); - q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]); - - d20s16 = vdup_n_s16(cospi_8_64); - d21s16 = vdup_n_s16(cospi_16_64); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), - vreinterpretq_s32_s16(q9s16)); - d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - - d22s16 = vdup_n_s16(cospi_24_64); - - // stage 1 - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, d22s16); - q1s32 = vmull_s16(d17s16, d20s16); - q13s32 = vmull_s16(d23s16, d21s16); - q14s32 = vmull_s16(d24s16, d21s16); - - q15s32 = vmlsl_s16(q15s32, d19s16, d20s16); - q1s32 = vmlal_s16(q1s32, d19s16, d22s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q1s32, 14); - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - - // stage 2 - q8s16 = vaddq_s16(q13s16, q14s16); - q9s16 = vsubq_s16(q13s16, q14s16); - - d16s16 = vget_low_s16(q8s16); - d17s16 = vget_high_s16(q8s16); - d18s16 = vget_high_s16(q9s16); // vswp d18 d19 - d19s16 = vget_low_s16(q9s16); - - d0x2s16 = vtrn_s16(d16s16, d17s16); - d1x2s16 = vtrn_s16(d18s16, d19s16); - q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]); - q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(q8s16), - vreinterpretq_s32_s16(q9s16)); - d16s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d17s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[0])); - d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1])); - - // do the transform on columns - // stage 1 - d23s16 = vadd_s16(d16s16, d18s16); - d24s16 = vsub_s16(d16s16, d18s16); - - q15s32 = vmull_s16(d17s16, d22s16); - q1s32 = vmull_s16(d17s16, d20s16); - q13s32 = vmull_s16(d23s16, d21s16); - q14s32 = vmull_s16(d24s16, d21s16); - - q15s32 = vmlsl_s16(q15s32, d19s16, d20s16); - q1s32 = vmlal_s16(q1s32, d19s16, d22s16); - - d26s16 = vqrshrn_n_s32(q13s32, 14); - d27s16 = vqrshrn_n_s32(q14s32, 14); - d29s16 = vqrshrn_n_s32(q15s32, 14); - d28s16 = vqrshrn_n_s32(q1s32, 14); - q13s16 = vcombine_s16(d26s16, d27s16); - q14s16 = vcombine_s16(d28s16, d29s16); - - // stage 2 - q8s16 = vaddq_s16(q13s16, q14s16); - q9s16 = vsubq_s16(q13s16, q14s16); - - q8s16 = vrshrq_n_s16(q8s16, 4); - q9s16 = vrshrq_n_s16(q9s16, 4); - - d = dest; - d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 0); - d += dest_stride; - d26u32 = vld1_lane_u32((const uint32_t *)d, d26u32, 1); - d += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 1); - d += dest_stride; - d27u32 = vld1_lane_u32((const uint32_t *)d, d27u32, 0); - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u32(d26u32)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u32(d27u32)); - - d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - - d = dest; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 0); - d += dest_stride; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d26u8), 1); - d += dest_stride; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 1); - d += dest_stride; - vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d27u8), 0); - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c deleted file mode 100644 index c1b801fad5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "vpx_dsp/inv_txfm.h" -#include "vpx_ports/mem.h" - -void vpx_idct8x8_1_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8x8_t d2u8, d3u8, d30u8, d31u8; - uint64x1_t d2u64, d3u64, d4u64, d5u64; - uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16; - int16x8_t q0s16; - uint8_t *d1, *d2; - int16_t i, a1, cospi_16_64 = 11585; - int16_t out = dct_const_round_shift(input[0] * cospi_16_64); - out = dct_const_round_shift(out * cospi_16_64); - a1 = ROUND_POWER_OF_TWO(out, 5); - - q0s16 = vdupq_n_s16(a1); - q0u16 = vreinterpretq_u16_s16(q0s16); - - d1 = d2 = dest; - for (i = 0; i < 2; i++) { - d2u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - d4u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - d5u64 = vld1_u64((const uint64_t *)d1); - d1 += dest_stride; - - q9u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d2u64)); - q10u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d3u64)); - q11u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d4u64)); - q12u16 = vaddw_u8(q0u16, vreinterpret_u8_u64(d5u64)); - - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d30u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - d31u8 = vqmovun_s16(vreinterpretq_s16_u16(q12u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d30u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d31u8)); - d2 += dest_stride; - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c b/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c deleted file mode 100644 index 4b2c2a6f83..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/idct8x8_add_neon.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" - -static INLINE void TRANSPOSE8X8( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; - int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 - *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 - *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 - *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 - *q12s16 = vcombine_s16(d17s16, d25s16); - *q13s16 = vcombine_s16(d19s16, d27s16); - *q14s16 = vcombine_s16(d21s16, d29s16); - *q15s16 = vcombine_s16(d23s16, d31s16); - - q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), - vreinterpretq_s32_s16(*q10s16)); - q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), - vreinterpretq_s32_s16(*q11s16)); - q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), - vreinterpretq_s32_s16(*q14s16)); - q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), - vreinterpretq_s32_s16(*q15s16)); - - q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 - vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 - q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 - vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 - q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 - vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 - q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 - vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 - - *q8s16 = q0x2s16.val[0]; - *q9s16 = q0x2s16.val[1]; - *q10s16 = q1x2s16.val[0]; - *q11s16 = q1x2s16.val[1]; - *q12s16 = q2x2s16.val[0]; - *q13s16 = q2x2s16.val[1]; - *q14s16 = q3x2s16.val[0]; - *q15s16 = q3x2s16.val[1]; - return; -} - -static INLINE void IDCT8x8_1D( - int16x8_t *q8s16, - int16x8_t *q9s16, - int16x8_t *q10s16, - int16x8_t *q11s16, - int16x8_t *q12s16, - int16x8_t *q13s16, - int16x8_t *q14s16, - int16x8_t *q15s16) { - int16x4_t d0s16, d1s16, d2s16, d3s16; - int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; - int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; - - d0s16 = vdup_n_s16(cospi_28_64); - d1s16 = vdup_n_s16(cospi_4_64); - d2s16 = vdup_n_s16(cospi_12_64); - d3s16 = vdup_n_s16(cospi_20_64); - - d16s16 = vget_low_s16(*q8s16); - d17s16 = vget_high_s16(*q8s16); - d18s16 = vget_low_s16(*q9s16); - d19s16 = vget_high_s16(*q9s16); - d20s16 = vget_low_s16(*q10s16); - d21s16 = vget_high_s16(*q10s16); - d22s16 = vget_low_s16(*q11s16); - d23s16 = vget_high_s16(*q11s16); - d24s16 = vget_low_s16(*q12s16); - d25s16 = vget_high_s16(*q12s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - d30s16 = vget_low_s16(*q15s16); - d31s16 = vget_high_s16(*q15s16); - - q2s32 = vmull_s16(d18s16, d0s16); - q3s32 = vmull_s16(d19s16, d0s16); - q5s32 = vmull_s16(d26s16, d2s16); - q6s32 = vmull_s16(d27s16, d2s16); - - q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); - q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); - q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); - - d8s16 = vqrshrn_n_s32(q2s32, 14); - d9s16 = vqrshrn_n_s32(q3s32, 14); - d10s16 = vqrshrn_n_s32(q5s32, 14); - d11s16 = vqrshrn_n_s32(q6s32, 14); - q4s16 = vcombine_s16(d8s16, d9s16); - q5s16 = vcombine_s16(d10s16, d11s16); - - q2s32 = vmull_s16(d18s16, d1s16); - q3s32 = vmull_s16(d19s16, d1s16); - q9s32 = vmull_s16(d26s16, d3s16); - q13s32 = vmull_s16(d27s16, d3s16); - - q2s32 = vmlal_s16(q2s32, d30s16, d0s16); - q3s32 = vmlal_s16(q3s32, d31s16, d0s16); - q9s32 = vmlal_s16(q9s32, d22s16, d2s16); - q13s32 = vmlal_s16(q13s32, d23s16, d2s16); - - d14s16 = vqrshrn_n_s32(q2s32, 14); - d15s16 = vqrshrn_n_s32(q3s32, 14); - d12s16 = vqrshrn_n_s32(q9s32, 14); - d13s16 = vqrshrn_n_s32(q13s32, 14); - q6s16 = vcombine_s16(d12s16, d13s16); - q7s16 = vcombine_s16(d14s16, d15s16); - - d0s16 = vdup_n_s16(cospi_16_64); - - q2s32 = vmull_s16(d16s16, d0s16); - q3s32 = vmull_s16(d17s16, d0s16); - q13s32 = vmull_s16(d16s16, d0s16); - q15s32 = vmull_s16(d17s16, d0s16); - - q2s32 = vmlal_s16(q2s32, d24s16, d0s16); - q3s32 = vmlal_s16(q3s32, d25s16, d0s16); - q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); - q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); - - d0s16 = vdup_n_s16(cospi_24_64); - d1s16 = vdup_n_s16(cospi_8_64); - - d18s16 = vqrshrn_n_s32(q2s32, 14); - d19s16 = vqrshrn_n_s32(q3s32, 14); - d22s16 = vqrshrn_n_s32(q13s32, 14); - d23s16 = vqrshrn_n_s32(q15s32, 14); - *q9s16 = vcombine_s16(d18s16, d19s16); - *q11s16 = vcombine_s16(d22s16, d23s16); - - q2s32 = vmull_s16(d20s16, d0s16); - q3s32 = vmull_s16(d21s16, d0s16); - q8s32 = vmull_s16(d20s16, d1s16); - q12s32 = vmull_s16(d21s16, d1s16); - - q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); - q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); - q8s32 = vmlal_s16(q8s32, d28s16, d0s16); - q12s32 = vmlal_s16(q12s32, d29s16, d0s16); - - d26s16 = vqrshrn_n_s32(q2s32, 14); - d27s16 = vqrshrn_n_s32(q3s32, 14); - d30s16 = vqrshrn_n_s32(q8s32, 14); - d31s16 = vqrshrn_n_s32(q12s32, 14); - *q13s16 = vcombine_s16(d26s16, d27s16); - *q15s16 = vcombine_s16(d30s16, d31s16); - - q0s16 = vaddq_s16(*q9s16, *q15s16); - q1s16 = vaddq_s16(*q11s16, *q13s16); - q2s16 = vsubq_s16(*q11s16, *q13s16); - q3s16 = vsubq_s16(*q9s16, *q15s16); - - *q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - *q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(*q13s16); - d27s16 = vget_high_s16(*q13s16); - d28s16 = vget_low_s16(*q14s16); - d29s16 = vget_high_s16(*q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - *q8s16 = vaddq_s16(q0s16, q7s16); - *q9s16 = vaddq_s16(q1s16, q6s16); - *q10s16 = vaddq_s16(q2s16, q5s16); - *q11s16 = vaddq_s16(q3s16, q4s16); - *q12s16 = vsubq_s16(q3s16, q4s16); - *q13s16 = vsubq_s16(q2s16, q5s16); - *q14s16 = vsubq_s16(q1s16, q6s16); - *q15s16 = vsubq_s16(q0s16, q7s16); - return; -} - -void vpx_idct8x8_64_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 16); - q11s16 = vld1q_s16(input + 24); - q12s16 = vld1q_s16(input + 32); - q13s16 = vld1q_s16(input + 40); - q14s16 = vld1q_s16(input + 48); - q15s16 = vld1q_s16(input + 56); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - d1 = d2 = dest; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - return; -} - -void vpx_idct8x8_12_add_neon( - int16_t *input, - uint8_t *dest, - int dest_stride) { - uint8_t *d1, *d2; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - int16x4_t d10s16, d11s16, d12s16, d13s16, d16s16; - int16x4_t d26s16, d27s16, d28s16, d29s16; - uint64x1_t d0u64, d1u64, d2u64, d3u64; - int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; - int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; - uint16x8_t q8u16, q9u16, q10u16, q11u16; - int32x4_t q9s32, q10s32, q11s32, q12s32; - - q8s16 = vld1q_s16(input); - q9s16 = vld1q_s16(input + 8); - q10s16 = vld1q_s16(input + 16); - q11s16 = vld1q_s16(input + 24); - q12s16 = vld1q_s16(input + 32); - q13s16 = vld1q_s16(input + 40); - q14s16 = vld1q_s16(input + 48); - q15s16 = vld1q_s16(input + 56); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - // First transform rows - // stage 1 - q0s16 = vdupq_n_s16(cospi_28_64 * 2); - q1s16 = vdupq_n_s16(cospi_4_64 * 2); - - q4s16 = vqrdmulhq_s16(q9s16, q0s16); - - q0s16 = vdupq_n_s16(-cospi_20_64 * 2); - - q7s16 = vqrdmulhq_s16(q9s16, q1s16); - - q1s16 = vdupq_n_s16(cospi_12_64 * 2); - - q5s16 = vqrdmulhq_s16(q11s16, q0s16); - - q0s16 = vdupq_n_s16(cospi_16_64 * 2); - - q6s16 = vqrdmulhq_s16(q11s16, q1s16); - - // stage 2 & stage 3 - even half - q1s16 = vdupq_n_s16(cospi_24_64 * 2); - - q9s16 = vqrdmulhq_s16(q8s16, q0s16); - - q0s16 = vdupq_n_s16(cospi_8_64 * 2); - - q13s16 = vqrdmulhq_s16(q10s16, q1s16); - - q15s16 = vqrdmulhq_s16(q10s16, q0s16); - - // stage 3 -odd half - q0s16 = vaddq_s16(q9s16, q15s16); - q1s16 = vaddq_s16(q9s16, q13s16); - q2s16 = vsubq_s16(q9s16, q13s16); - q3s16 = vsubq_s16(q9s16, q15s16); - - // stage 2 - odd half - q13s16 = vsubq_s16(q4s16, q5s16); - q4s16 = vaddq_s16(q4s16, q5s16); - q14s16 = vsubq_s16(q7s16, q6s16); - q7s16 = vaddq_s16(q7s16, q6s16); - d26s16 = vget_low_s16(q13s16); - d27s16 = vget_high_s16(q13s16); - d28s16 = vget_low_s16(q14s16); - d29s16 = vget_high_s16(q14s16); - - d16s16 = vdup_n_s16(cospi_16_64); - q9s32 = vmull_s16(d28s16, d16s16); - q10s32 = vmull_s16(d29s16, d16s16); - q11s32 = vmull_s16(d28s16, d16s16); - q12s32 = vmull_s16(d29s16, d16s16); - - q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); - q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); - q11s32 = vmlal_s16(q11s32, d26s16, d16s16); - q12s32 = vmlal_s16(q12s32, d27s16, d16s16); - - d10s16 = vqrshrn_n_s32(q9s32, 14); - d11s16 = vqrshrn_n_s32(q10s32, 14); - d12s16 = vqrshrn_n_s32(q11s32, 14); - d13s16 = vqrshrn_n_s32(q12s32, 14); - q5s16 = vcombine_s16(d10s16, d11s16); - q6s16 = vcombine_s16(d12s16, d13s16); - - // stage 4 - q8s16 = vaddq_s16(q0s16, q7s16); - q9s16 = vaddq_s16(q1s16, q6s16); - q10s16 = vaddq_s16(q2s16, q5s16); - q11s16 = vaddq_s16(q3s16, q4s16); - q12s16 = vsubq_s16(q3s16, q4s16); - q13s16 = vsubq_s16(q2s16, q5s16); - q14s16 = vsubq_s16(q1s16, q6s16); - q15s16 = vsubq_s16(q0s16, q7s16); - - TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, - &q12s16, &q13s16, &q14s16, &q15s16); - - q8s16 = vrshrq_n_s16(q8s16, 5); - q9s16 = vrshrq_n_s16(q9s16, 5); - q10s16 = vrshrq_n_s16(q10s16, 5); - q11s16 = vrshrq_n_s16(q11s16, 5); - q12s16 = vrshrq_n_s16(q12s16, 5); - q13s16 = vrshrq_n_s16(q13s16, 5); - q14s16 = vrshrq_n_s16(q14s16, 5); - q15s16 = vrshrq_n_s16(q15s16, 5); - - d1 = d2 = dest; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - - q8s16 = q12s16; - q9s16 = q13s16; - q10s16 = q14s16; - q11s16 = q15s16; - - d0u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d1u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d2u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - d3u64 = vld1_u64((uint64_t *)d1); - d1 += dest_stride; - - q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), - vreinterpret_u8_u64(d0u64)); - q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), - vreinterpret_u8_u64(d1u64)); - q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), - vreinterpret_u8_u64(d2u64)); - q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), - vreinterpret_u8_u64(d3u64)); - - d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); - d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); - d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); - d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); - - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); - d2 += dest_stride; - vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); - d2 += dest_stride; - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c b/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c deleted file mode 100644 index 0a376104d2..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/intrapred_neon.c +++ /dev/null @@ -1,822 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" - -//------------------------------------------------------------------------------ -// DC 4x4 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x8_t A = vld1_u8(above); // top row - const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top - const uint16x4_t p1 = vpadd_u16(p0, p0); - sum_top = vcombine_u16(p1, p1); - } - - if (do_left) { - const uint8x8_t L = vld1_u8(left); // left border - const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left - const uint16x4_t p1 = vpadd_u16(p0, p0); - sum_left = vcombine_u16(p1, p1); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 3); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 2); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 2); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x8_t dc = vdup_lane_u8(dc0, 0); - int i; - for (i = 0; i < 4; ++i) { - vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0); - } - } -} - -void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_4x4(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - dc_4x4(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)left; - dc_4x4(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - (void)left; - dc_4x4(dst, stride, NULL, NULL, 0, 0); -} - -//------------------------------------------------------------------------------ -// DC 8x8 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x8_t A = vld1_u8(above); // top row - const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top - const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x4_t p2 = vpadd_u16(p1, p1); - sum_top = vcombine_u16(p2, p2); - } - - if (do_left) { - const uint8x8_t L = vld1_u8(left); // left border - const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left - const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x4_t p2 = vpadd_u16(p1, p1); - sum_left = vcombine_u16(p2, p2); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 4); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 3); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 3); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x8_t dc = vdup_lane_u8(dc0, 0); - int i; - for (i = 0; i < 8; ++i) { - vst1_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc)); - } - } -} - -void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_8x8(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - dc_8x8(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)left; - dc_8x8(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - (void)above; - (void)left; - dc_8x8(dst, stride, NULL, NULL, 0, 0); -} - -//------------------------------------------------------------------------------ -// DC 16x16 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x16_t A = vld1q_u8(above); // top row - const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top - const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); - const uint16x4_t p2 = vpadd_u16(p1, p1); - const uint16x4_t p3 = vpadd_u16(p2, p2); - sum_top = vcombine_u16(p3, p3); - } - - if (do_left) { - const uint8x16_t L = vld1q_u8(left); // left row - const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left - const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); - const uint16x4_t p2 = vpadd_u16(p1, p1); - const uint16x4_t p3 = vpadd_u16(p2, p2); - sum_left = vcombine_u16(p3, p3); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 5); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 4); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 4); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x16_t dc = vdupq_lane_u8(dc0, 0); - int i; - for (i = 0; i < 16; ++i) { - vst1q_u8(dst + i * stride, dc); - } - } -} - -void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_16x16(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - dc_16x16(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)left; - dc_16x16(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - (void)left; - dc_16x16(dst, stride, NULL, NULL, 0, 0); -} - -//------------------------------------------------------------------------------ -// DC 32x32 - -// 'do_above' and 'do_left' facilitate branch removal when inlined. -static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left, - int do_above, int do_left) { - uint16x8_t sum_top; - uint16x8_t sum_left; - uint8x8_t dc0; - - if (do_above) { - const uint8x16_t A0 = vld1q_u8(above); // top row - const uint8x16_t A1 = vld1q_u8(above + 16); - const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top - const uint16x8_t p1 = vpaddlq_u8(A1); - const uint16x8_t p2 = vaddq_u16(p0, p1); - const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); - const uint16x4_t p4 = vpadd_u16(p3, p3); - const uint16x4_t p5 = vpadd_u16(p4, p4); - sum_top = vcombine_u16(p5, p5); - } - - if (do_left) { - const uint8x16_t L0 = vld1q_u8(left); // left row - const uint8x16_t L1 = vld1q_u8(left + 16); - const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left - const uint16x8_t p1 = vpaddlq_u8(L1); - const uint16x8_t p2 = vaddq_u16(p0, p1); - const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); - const uint16x4_t p4 = vpadd_u16(p3, p3); - const uint16x4_t p5 = vpadd_u16(p4, p4); - sum_left = vcombine_u16(p5, p5); - } - - if (do_above && do_left) { - const uint16x8_t sum = vaddq_u16(sum_left, sum_top); - dc0 = vrshrn_n_u16(sum, 6); - } else if (do_above) { - dc0 = vrshrn_n_u16(sum_top, 5); - } else if (do_left) { - dc0 = vrshrn_n_u16(sum_left, 5); - } else { - dc0 = vdup_n_u8(0x80); - } - - { - const uint8x16_t dc = vdupq_lane_u8(dc0, 0); - int i; - for (i = 0; i < 32; ++i) { - vst1q_u8(dst + i * stride, dc); - vst1q_u8(dst + i * stride + 16, dc); - } - } -} - -void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - dc_32x32(dst, stride, above, left, 1, 1); -} - -void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - dc_32x32(dst, stride, NULL, left, 0, 1); -} - -void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)left; - dc_32x32(dst, stride, above, NULL, 1, 0); -} - -void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, - const uint8_t *left) { - (void)above; - (void)left; - dc_32x32(dst, stride, NULL, NULL, 0, 0); -} - -// ----------------------------------------------------------------------------- - -void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above)); // top row - const uint64x1_t A1 = vshr_n_u64(A0, 8); - const uint64x1_t A2 = vshr_n_u64(A0, 16); - const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0); - const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1); - const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2); - const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00); - const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0); - const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); - const uint32x2_t r0 = vreinterpret_u32_u8(avg2); - const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); - const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); - const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); - (void)left; - vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); - vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); - vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); - vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); - dst[3 * stride + 3] = above[7]; -} - -void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 }; - static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 }; - const uint8x8_t sh_12345677 = vld1_u8(shuffle1); - const uint8x8_t sh_23456777 = vld1_u8(shuffle2); - const uint8x8_t A0 = vld1_u8(above); // top row - const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677); - const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777); - const uint8x8_t avg1 = vhadd_u8(A0, A2); - uint8x8_t row = vrhadd_u8(avg1, A1); - int i; - (void)left; - for (i = 0; i < 7; ++i) { - vst1_u8(dst + i * stride, row); - row = vtbl1_u8(row, sh_12345677); - } - vst1_u8(dst + i * stride, row); -} - -void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const uint8x16_t A0 = vld1q_u8(above); // top row - const uint8x16_t above_right = vld1q_dup_u8(above + 15); - const uint8x16_t A1 = vextq_u8(A0, above_right, 1); - const uint8x16_t A2 = vextq_u8(A0, above_right, 2); - const uint8x16_t avg1 = vhaddq_u8(A0, A2); - uint8x16_t row = vrhaddq_u8(avg1, A1); - int i; - (void)left; - for (i = 0; i < 15; ++i) { - vst1q_u8(dst + i * stride, row); - row = vextq_u8(row, above_right, 1); - } - vst1q_u8(dst + i * stride, row); -} - -// ----------------------------------------------------------------------------- - -void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const uint8x8_t XABCD_u8 = vld1_u8(above - 1); - const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8); - const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32); - const uint32x2_t zero = vdup_n_u32(0); - const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0); - const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL); - const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8)); - const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC); - const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8)); - const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16)); - const uint8_t D = vget_lane_u8(XABCD_u8, 4); - const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6); - const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC); - const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8); - const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_); - const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); - const uint32x2_t r3 = vreinterpret_u32_u8(avg2); - const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); - const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); - const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); - vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); - vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); - vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); - vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); -} - -#if !HAVE_NEON_ASM - -void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint32x2_t d0u32 = vdup_n_u32(0); - (void)left; - - d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0); - for (i = 0; i < 4; i++, dst += stride) - vst1_lane_u32((uint32_t *)dst, d0u32, 0); -} - -void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint8x8_t d0u8 = vdup_n_u8(0); - (void)left; - - d0u8 = vld1_u8(above); - for (i = 0; i < 8; i++, dst += stride) - vst1_u8(dst, d0u8); -} - -void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint8x16_t q0u8 = vdupq_n_u8(0); - (void)left; - - q0u8 = vld1q_u8(above); - for (i = 0; i < 16; i++, dst += stride) - vst1q_u8(dst, q0u8); -} - -void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)left; - - q0u8 = vld1q_u8(above); - q1u8 = vld1q_u8(above + 16); - for (i = 0; i < 32; i++, dst += stride) { - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - } -} - -void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - uint8x8_t d0u8 = vdup_n_u8(0); - uint32x2_t d1u32 = vdup_n_u32(0); - (void)above; - - d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0); - - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); -} - -void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - uint8x8_t d0u8 = vdup_n_u8(0); - uint64x1_t d1u64 = vdup_n_u64(0); - (void)above; - - d1u64 = vld1_u64((const uint64_t *)left); - - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6); - vst1_u8(dst, d0u8); - dst += stride; - d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7); - vst1_u8(dst, d0u8); -} - -void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j; - uint8x8_t d2u8 = vdup_n_u8(0); - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)above; - - q1u8 = vld1q_u8(left); - d2u8 = vget_low_u8(q1u8); - for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { - q0u8 = vdupq_lane_u8(d2u8, 0); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 1); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 2); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 3); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 4); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 5); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 6); - vst1q_u8(dst, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 7); - vst1q_u8(dst, q0u8); - dst += stride; - } -} - -void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j, k; - uint8x8_t d2u8 = vdup_n_u8(0); - uint8x16_t q0u8 = vdupq_n_u8(0); - uint8x16_t q1u8 = vdupq_n_u8(0); - (void)above; - - for (k = 0; k < 2; k++, left += 16) { - q1u8 = vld1q_u8(left); - d2u8 = vget_low_u8(q1u8); - for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) { - q0u8 = vdupq_lane_u8(d2u8, 0); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 1); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 2); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 3); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 4); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 5); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 6); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - q0u8 = vdupq_lane_u8(d2u8, 7); - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q0u8); - dst += stride; - } - } -} - -void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int i; - uint16x8_t q1u16, q3u16; - int16x8_t q1s16; - uint8x8_t d0u8 = vdup_n_u8(0); - uint32x2_t d2u32 = vdup_n_u32(0); - - d0u8 = vld1_dup_u8(above - 1); - d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0); - q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8); - for (i = 0; i < 4; i++, dst += stride) { - q1u16 = vdupq_n_u16((uint16_t)left[i]); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16), - vreinterpretq_s16_u16(q3u16)); - d0u8 = vqmovun_s16(q1s16); - vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0); - } -} - -void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j; - uint16x8_t q0u16, q3u16, q10u16; - int16x8_t q0s16; - uint16x4_t d20u16; - uint8x8_t d0u8, d2u8, d30u8; - - d0u8 = vld1_dup_u8(above - 1); - d30u8 = vld1_u8(left); - d2u8 = vld1_u8(above); - q10u16 = vmovl_u8(d30u8); - q3u16 = vsubl_u8(d2u8, d0u8); - d20u16 = vget_low_u16(q10u16); - for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { - q0u16 = vdupq_lane_u16(d20u16, 0); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 1); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 2); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - q0u16 = vdupq_lane_u16(d20u16, 3); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16), - vreinterpretq_s16_u16(q0u16)); - d0u8 = vqmovun_s16(q0s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8)); - dst += stride; - } -} - -void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j, k; - uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16; - uint8x16_t q0u8, q1u8; - int16x8_t q0s16, q1s16, q8s16, q11s16; - uint16x4_t d20u16; - uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8; - - q0u8 = vld1q_dup_u8(above - 1); - q1u8 = vld1q_u8(above); - q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); - q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); - for (k = 0; k < 2; k++, left += 8) { - d18u8 = vld1_u8(left); - q10u16 = vmovl_u8(d18u8); - d20u16 = vget_low_u16(q10u16); - for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) { - q0u16 = vdupq_lane_u16(d20u16, 0); - q8u16 = vdupq_lane_u16(d20u16, 1); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q2u16)); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q3u16)); - q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q2u16)); - q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q3u16)); - d2u8 = vqmovun_s16(q1s16); - d3u8 = vqmovun_s16(q0s16); - d22u8 = vqmovun_s16(q11s16); - d23u8 = vqmovun_s16(q8s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); - dst += stride; - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d20u16, 2); - q8u16 = vdupq_lane_u16(d20u16, 3); - q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q2u16)); - q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q3u16)); - q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q2u16)); - q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16), - vreinterpretq_s16_u16(q3u16)); - d2u8 = vqmovun_s16(q1s16); - d3u8 = vqmovun_s16(q0s16); - d22u8 = vqmovun_s16(q11s16); - d23u8 = vqmovun_s16(q8s16); - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8)); - dst += stride; - vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8)); - vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8)); - dst += stride; - } - } -} - -void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - int j, k; - uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16; - uint8x16_t q0u8, q1u8, q2u8; - int16x8_t q12s16, q13s16, q14s16, q15s16; - uint16x4_t d6u16; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8; - - q0u8 = vld1q_dup_u8(above - 1); - q1u8 = vld1q_u8(above); - q2u8 = vld1q_u8(above + 16); - q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8)); - q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8)); - q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8)); - q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8)); - for (k = 0; k < 4; k++, left += 8) { - d26u8 = vld1_u8(left); - q3u16 = vmovl_u8(d26u8); - d6u16 = vget_low_u16(q3u16); - for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) { - q0u16 = vdupq_lane_u16(d6u16, 0); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d6u16, 1); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d6u16, 2); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - - q0u16 = vdupq_lane_u16(d6u16, 3); - q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q8u16)); - q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q9u16)); - q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q10u16)); - q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16), - vreinterpretq_s16_u16(q11u16)); - d0u8 = vqmovun_s16(q12s16); - d1u8 = vqmovun_s16(q13s16); - d2u8 = vqmovun_s16(q14s16); - d3u8 = vqmovun_s16(q15s16); - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8)); - vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8)); - dst += stride; - } - } -} -#endif // !HAVE_NEON_ASM diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c deleted file mode 100644 index d24e6adc8a..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_16_neon.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_dsp_rtcd.h" -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -static INLINE void loop_filter_neon_16( - uint8x16_t qblimit, // blimit - uint8x16_t qlimit, // limit - uint8x16_t qthresh, // thresh - uint8x16_t q3, // p3 - uint8x16_t q4, // p2 - uint8x16_t q5, // p1 - uint8x16_t q6, // p0 - uint8x16_t q7, // q0 - uint8x16_t q8, // q1 - uint8x16_t q9, // q2 - uint8x16_t q10, // q3 - uint8x16_t *q5r, // p1 - uint8x16_t *q6r, // p0 - uint8x16_t *q7r, // q0 - uint8x16_t *q8r) { // q1 - uint8x16_t q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; - int16x8_t q2s16, q11s16; - uint16x8_t q4u16; - int8x16_t q0s8, q1s8, q2s8, q11s8, q12s8, q13s8; - int8x8_t d2s8, d3s8; - - q11u8 = vabdq_u8(q3, q4); - q12u8 = vabdq_u8(q4, q5); - q13u8 = vabdq_u8(q5, q6); - q14u8 = vabdq_u8(q8, q7); - q3 = vabdq_u8(q9, q8); - q4 = vabdq_u8(q10, q9); - - q11u8 = vmaxq_u8(q11u8, q12u8); - q12u8 = vmaxq_u8(q13u8, q14u8); - q3 = vmaxq_u8(q3, q4); - q15u8 = vmaxq_u8(q11u8, q12u8); - - q9 = vabdq_u8(q6, q7); - - // vp8_hevmask - q13u8 = vcgtq_u8(q13u8, qthresh); - q14u8 = vcgtq_u8(q14u8, qthresh); - q15u8 = vmaxq_u8(q15u8, q3); - - q2u8 = vabdq_u8(q5, q8); - q9 = vqaddq_u8(q9, q9); - - q15u8 = vcgeq_u8(qlimit, q15u8); - - // vp8_filter() function - // convert to signed - q10 = vdupq_n_u8(0x80); - q8 = veorq_u8(q8, q10); - q7 = veorq_u8(q7, q10); - q6 = veorq_u8(q6, q10); - q5 = veorq_u8(q5, q10); - - q2u8 = vshrq_n_u8(q2u8, 1); - q9 = vqaddq_u8(q9, q2u8); - - q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), - vget_low_s8(vreinterpretq_s8_u8(q6))); - q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), - vget_high_s8(vreinterpretq_s8_u8(q6))); - - q9 = vcgeq_u8(qblimit, q9); - - q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), - vreinterpretq_s8_u8(q8)); - - q14u8 = vorrq_u8(q13u8, q14u8); - - q4u16 = vdupq_n_u16(3); - q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); - q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); - - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); - q15u8 = vandq_u8(q15u8, q9); - - q1s8 = vreinterpretq_s8_u8(q1u8); - q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); - q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); - - q4 = vdupq_n_u8(3); - q9 = vdupq_n_u8(4); - // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) - d2s8 = vqmovn_s16(q2s16); - d3s8 = vqmovn_s16(q11s16); - q1s8 = vcombine_s8(d2s8, d3s8); - q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); - q1s8 = vreinterpretq_s8_u8(q1u8); - - q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q4)); - q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); - q2s8 = vshrq_n_s8(q2s8, 3); - q1s8 = vshrq_n_s8(q1s8, 3); - - q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); - q0s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); - - q1s8 = vrshrq_n_s8(q1s8, 1); - q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); - - q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); - q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); - - *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q10); - *q7r = veorq_u8(vreinterpretq_u8_s8(q0s8), q10); - *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q10); - *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q10); - return; -} - -void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - uint8x8_t dblimit0, dlimit0, dthresh0, dblimit1, dlimit1, dthresh1; - uint8x16_t qblimit, qlimit, qthresh; - uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; - - dblimit0 = vld1_u8(blimit0); - dlimit0 = vld1_u8(limit0); - dthresh0 = vld1_u8(thresh0); - dblimit1 = vld1_u8(blimit1); - dlimit1 = vld1_u8(limit1); - dthresh1 = vld1_u8(thresh1); - qblimit = vcombine_u8(dblimit0, dblimit1); - qlimit = vcombine_u8(dlimit0, dlimit1); - qthresh = vcombine_u8(dthresh0, dthresh1); - - s -= (p << 2); - - q3u8 = vld1q_u8(s); - s += p; - q4u8 = vld1q_u8(s); - s += p; - q5u8 = vld1q_u8(s); - s += p; - q6u8 = vld1q_u8(s); - s += p; - q7u8 = vld1q_u8(s); - s += p; - q8u8 = vld1q_u8(s); - s += p; - q9u8 = vld1q_u8(s); - s += p; - q10u8 = vld1q_u8(s); - - loop_filter_neon_16(qblimit, qlimit, qthresh, - q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, - &q5u8, &q6u8, &q7u8, &q8u8); - - s -= (p * 5); - vst1q_u8(s, q5u8); - s += p; - vst1q_u8(s, q6u8); - s += p; - vst1q_u8(s, q7u8); - s += p; - vst1q_u8(s, q8u8); - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c deleted file mode 100644 index 7f3ee70b94..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_4_neon.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_dsp_rtcd.h" - -static INLINE void loop_filter_neon( - uint8x8_t dblimit, // flimit - uint8x8_t dlimit, // limit - uint8x8_t dthresh, // thresh - uint8x8_t d3u8, // p3 - uint8x8_t d4u8, // p2 - uint8x8_t d5u8, // p1 - uint8x8_t d6u8, // p0 - uint8x8_t d7u8, // q0 - uint8x8_t d16u8, // q1 - uint8x8_t d17u8, // q2 - uint8x8_t d18u8, // q3 - uint8x8_t *d4ru8, // p1 - uint8x8_t *d5ru8, // p0 - uint8x8_t *d6ru8, // q0 - uint8x8_t *d7ru8) { // q1 - uint8x8_t d19u8, d20u8, d21u8, d22u8, d23u8, d27u8, d28u8; - int16x8_t q12s16; - int8x8_t d19s8, d20s8, d21s8, d26s8, d27s8, d28s8; - - d19u8 = vabd_u8(d3u8, d4u8); - d20u8 = vabd_u8(d4u8, d5u8); - d21u8 = vabd_u8(d5u8, d6u8); - d22u8 = vabd_u8(d16u8, d7u8); - d3u8 = vabd_u8(d17u8, d16u8); - d4u8 = vabd_u8(d18u8, d17u8); - - d19u8 = vmax_u8(d19u8, d20u8); - d20u8 = vmax_u8(d21u8, d22u8); - d3u8 = vmax_u8(d3u8, d4u8); - d23u8 = vmax_u8(d19u8, d20u8); - - d17u8 = vabd_u8(d6u8, d7u8); - - d21u8 = vcgt_u8(d21u8, dthresh); - d22u8 = vcgt_u8(d22u8, dthresh); - d23u8 = vmax_u8(d23u8, d3u8); - - d28u8 = vabd_u8(d5u8, d16u8); - d17u8 = vqadd_u8(d17u8, d17u8); - - d23u8 = vcge_u8(dlimit, d23u8); - - d18u8 = vdup_n_u8(0x80); - d5u8 = veor_u8(d5u8, d18u8); - d6u8 = veor_u8(d6u8, d18u8); - d7u8 = veor_u8(d7u8, d18u8); - d16u8 = veor_u8(d16u8, d18u8); - - d28u8 = vshr_n_u8(d28u8, 1); - d17u8 = vqadd_u8(d17u8, d28u8); - - d19u8 = vdup_n_u8(3); - - d28s8 = vsub_s8(vreinterpret_s8_u8(d7u8), - vreinterpret_s8_u8(d6u8)); - - d17u8 = vcge_u8(dblimit, d17u8); - - d27s8 = vqsub_s8(vreinterpret_s8_u8(d5u8), - vreinterpret_s8_u8(d16u8)); - - d22u8 = vorr_u8(d21u8, d22u8); - - q12s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d19u8)); - - d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d22u8); - d23u8 = vand_u8(d23u8, d17u8); - - q12s16 = vaddw_s8(q12s16, vreinterpret_s8_u8(d27u8)); - - d17u8 = vdup_n_u8(4); - - d27s8 = vqmovn_s16(q12s16); - d27u8 = vand_u8(vreinterpret_u8_s8(d27s8), d23u8); - d27s8 = vreinterpret_s8_u8(d27u8); - - d28s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d19u8)); - d27s8 = vqadd_s8(d27s8, vreinterpret_s8_u8(d17u8)); - d28s8 = vshr_n_s8(d28s8, 3); - d27s8 = vshr_n_s8(d27s8, 3); - - d19s8 = vqadd_s8(vreinterpret_s8_u8(d6u8), d28s8); - d26s8 = vqsub_s8(vreinterpret_s8_u8(d7u8), d27s8); - - d27s8 = vrshr_n_s8(d27s8, 1); - d27s8 = vbic_s8(d27s8, vreinterpret_s8_u8(d22u8)); - - d21s8 = vqadd_s8(vreinterpret_s8_u8(d5u8), d27s8); - d20s8 = vqsub_s8(vreinterpret_s8_u8(d16u8), d27s8); - - *d4ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d18u8); - *d5ru8 = veor_u8(vreinterpret_u8_s8(d19s8), d18u8); - *d6ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d18u8); - *d7ru8 = veor_u8(vreinterpret_u8_s8(d20s8), d18u8); - return; -} - -void vpx_lpf_horizontal_4_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i; - uint8_t *s, *psrc; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - psrc = src - (pitch << 2); - for (i = 0; i < 1; i++) { - s = psrc + i * 8; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - loop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d4u8, &d5u8, &d6u8, &d7u8); - - s -= (pitch * 5); - vst1_u8(s, d4u8); - s += pitch; - vst1_u8(s, d5u8); - s += pitch; - vst1_u8(s, d6u8); - s += pitch; - vst1_u8(s, d7u8); - } - return; -} - -void vpx_lpf_vertical_4_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i, pitch8; - uint8_t *s; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; - uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; - uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; - uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; - uint8x8x4_t d4Result; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - pitch8 = pitch * 8; - for (i = 0; i < 1; i++, src += pitch8) { - s = src - (i + 1) * 4; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), - vreinterpret_u32_u8(d7u8)); - d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), - vreinterpret_u32_u8(d16u8)); - d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), - vreinterpret_u32_u8(d17u8)); - d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), - vreinterpret_u32_u8(d18u8)); - - d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), - vreinterpret_u16_u32(d2tmp2.val[0])); - d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), - vreinterpret_u16_u32(d2tmp3.val[0])); - d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), - vreinterpret_u16_u32(d2tmp2.val[1])); - d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), - vreinterpret_u16_u32(d2tmp3.val[1])); - - d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), - vreinterpret_u8_u16(d2tmp5.val[0])); - d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), - vreinterpret_u8_u16(d2tmp5.val[1])); - d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), - vreinterpret_u8_u16(d2tmp7.val[0])); - d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), - vreinterpret_u8_u16(d2tmp7.val[1])); - - d3u8 = d2tmp8.val[0]; - d4u8 = d2tmp8.val[1]; - d5u8 = d2tmp9.val[0]; - d6u8 = d2tmp9.val[1]; - d7u8 = d2tmp10.val[0]; - d16u8 = d2tmp10.val[1]; - d17u8 = d2tmp11.val[0]; - d18u8 = d2tmp11.val[1]; - - loop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d4u8, &d5u8, &d6u8, &d7u8); - - d4Result.val[0] = d4u8; - d4Result.val[1] = d5u8; - d4Result.val[2] = d6u8; - d4Result.val[3] = d7u8; - - src -= 2; - vst4_lane_u8(src, d4Result, 0); - src += pitch; - vst4_lane_u8(src, d4Result, 1); - src += pitch; - vst4_lane_u8(src, d4Result, 2); - src += pitch; - vst4_lane_u8(src, d4Result, 3); - src += pitch; - vst4_lane_u8(src, d4Result, 4); - src += pitch; - vst4_lane_u8(src, d4Result, 5); - src += pitch; - vst4_lane_u8(src, d4Result, 6); - src += pitch; - vst4_lane_u8(src, d4Result, 7); - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c deleted file mode 100644 index ec3757380d..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_8_neon.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_dsp_rtcd.h" - -static INLINE void mbloop_filter_neon( - uint8x8_t dblimit, // mblimit - uint8x8_t dlimit, // limit - uint8x8_t dthresh, // thresh - uint8x8_t d3u8, // p2 - uint8x8_t d4u8, // p2 - uint8x8_t d5u8, // p1 - uint8x8_t d6u8, // p0 - uint8x8_t d7u8, // q0 - uint8x8_t d16u8, // q1 - uint8x8_t d17u8, // q2 - uint8x8_t d18u8, // q3 - uint8x8_t *d0ru8, // p1 - uint8x8_t *d1ru8, // p1 - uint8x8_t *d2ru8, // p0 - uint8x8_t *d3ru8, // q0 - uint8x8_t *d4ru8, // q1 - uint8x8_t *d5ru8) { // q1 - uint32_t flat; - uint8x8_t d0u8, d1u8, d2u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8; - uint8x8_t d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; - int16x8_t q15s16; - uint16x8_t q10u16, q14u16; - int8x8_t d21s8, d24s8, d25s8, d26s8, d28s8, d29s8, d30s8; - - d19u8 = vabd_u8(d3u8, d4u8); - d20u8 = vabd_u8(d4u8, d5u8); - d21u8 = vabd_u8(d5u8, d6u8); - d22u8 = vabd_u8(d16u8, d7u8); - d23u8 = vabd_u8(d17u8, d16u8); - d24u8 = vabd_u8(d18u8, d17u8); - - d19u8 = vmax_u8(d19u8, d20u8); - d20u8 = vmax_u8(d21u8, d22u8); - - d25u8 = vabd_u8(d6u8, d4u8); - - d23u8 = vmax_u8(d23u8, d24u8); - - d26u8 = vabd_u8(d7u8, d17u8); - - d19u8 = vmax_u8(d19u8, d20u8); - - d24u8 = vabd_u8(d6u8, d7u8); - d27u8 = vabd_u8(d3u8, d6u8); - d28u8 = vabd_u8(d18u8, d7u8); - - d19u8 = vmax_u8(d19u8, d23u8); - - d23u8 = vabd_u8(d5u8, d16u8); - d24u8 = vqadd_u8(d24u8, d24u8); - - - d19u8 = vcge_u8(dlimit, d19u8); - - - d25u8 = vmax_u8(d25u8, d26u8); - d26u8 = vmax_u8(d27u8, d28u8); - - d23u8 = vshr_n_u8(d23u8, 1); - - d25u8 = vmax_u8(d25u8, d26u8); - - d24u8 = vqadd_u8(d24u8, d23u8); - - d20u8 = vmax_u8(d20u8, d25u8); - - d23u8 = vdup_n_u8(1); - d24u8 = vcge_u8(dblimit, d24u8); - - d21u8 = vcgt_u8(d21u8, dthresh); - - d20u8 = vcge_u8(d23u8, d20u8); - - d19u8 = vand_u8(d19u8, d24u8); - - d23u8 = vcgt_u8(d22u8, dthresh); - - d20u8 = vand_u8(d20u8, d19u8); - - d22u8 = vdup_n_u8(0x80); - - d23u8 = vorr_u8(d21u8, d23u8); - - q10u16 = vcombine_u16(vreinterpret_u16_u8(d20u8), - vreinterpret_u16_u8(d21u8)); - - d30u8 = vshrn_n_u16(q10u16, 4); - flat = vget_lane_u32(vreinterpret_u32_u8(d30u8), 0); - - if (flat == 0xffffffff) { // Check for all 1's, power_branch_only - d27u8 = vdup_n_u8(3); - d21u8 = vdup_n_u8(2); - q14u16 = vaddl_u8(d6u8, d7u8); - q14u16 = vmlal_u8(q14u16, d3u8, d27u8); - q14u16 = vmlal_u8(q14u16, d4u8, d21u8); - q14u16 = vaddw_u8(q14u16, d5u8); - *d0ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vaddw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d16u8); - *d1ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d17u8); - *d2ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d7u8); - q14u16 = vaddw_u8(q14u16, d18u8); - *d3ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vsubw_u8(q14u16, d7u8); - q14u16 = vaddw_u8(q14u16, d16u8); - q14u16 = vaddw_u8(q14u16, d18u8); - *d4ru8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vsubw_u8(q14u16, d16u8); - q14u16 = vaddw_u8(q14u16, d17u8); - q14u16 = vaddw_u8(q14u16, d18u8); - *d5ru8 = vqrshrn_n_u16(q14u16, 3); - } else { - d21u8 = veor_u8(d7u8, d22u8); - d24u8 = veor_u8(d6u8, d22u8); - d25u8 = veor_u8(d5u8, d22u8); - d26u8 = veor_u8(d16u8, d22u8); - - d27u8 = vdup_n_u8(3); - - d28s8 = vsub_s8(vreinterpret_s8_u8(d21u8), vreinterpret_s8_u8(d24u8)); - d29s8 = vqsub_s8(vreinterpret_s8_u8(d25u8), vreinterpret_s8_u8(d26u8)); - - q15s16 = vmull_s8(d28s8, vreinterpret_s8_u8(d27u8)); - - d29s8 = vand_s8(d29s8, vreinterpret_s8_u8(d23u8)); - - q15s16 = vaddw_s8(q15s16, d29s8); - - d29u8 = vdup_n_u8(4); - - d28s8 = vqmovn_s16(q15s16); - - d28s8 = vand_s8(d28s8, vreinterpret_s8_u8(d19u8)); - - d30s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d27u8)); - d29s8 = vqadd_s8(d28s8, vreinterpret_s8_u8(d29u8)); - d30s8 = vshr_n_s8(d30s8, 3); - d29s8 = vshr_n_s8(d29s8, 3); - - d24s8 = vqadd_s8(vreinterpret_s8_u8(d24u8), d30s8); - d21s8 = vqsub_s8(vreinterpret_s8_u8(d21u8), d29s8); - - d29s8 = vrshr_n_s8(d29s8, 1); - d29s8 = vbic_s8(d29s8, vreinterpret_s8_u8(d23u8)); - - d25s8 = vqadd_s8(vreinterpret_s8_u8(d25u8), d29s8); - d26s8 = vqsub_s8(vreinterpret_s8_u8(d26u8), d29s8); - - if (flat == 0) { // filter_branch_only - *d0ru8 = d4u8; - *d1ru8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8); - *d2ru8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8); - *d3ru8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8); - *d4ru8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8); - *d5ru8 = d17u8; - return; - } - - d21u8 = veor_u8(vreinterpret_u8_s8(d21s8), d22u8); - d24u8 = veor_u8(vreinterpret_u8_s8(d24s8), d22u8); - d25u8 = veor_u8(vreinterpret_u8_s8(d25s8), d22u8); - d26u8 = veor_u8(vreinterpret_u8_s8(d26s8), d22u8); - - d23u8 = vdup_n_u8(2); - q14u16 = vaddl_u8(d6u8, d7u8); - q14u16 = vmlal_u8(q14u16, d3u8, d27u8); - q14u16 = vmlal_u8(q14u16, d4u8, d23u8); - - d0u8 = vbsl_u8(d20u8, dblimit, d4u8); - - q14u16 = vaddw_u8(q14u16, d5u8); - - d1u8 = vbsl_u8(d20u8, dlimit, d25u8); - - d30u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vaddw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d16u8); - - d2u8 = vbsl_u8(d20u8, dthresh, d24u8); - - d31u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vaddw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d17u8); - - *d0ru8 = vbsl_u8(d20u8, d30u8, d0u8); - - d23u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d3u8); - q14u16 = vsubw_u8(q14u16, d6u8); - q14u16 = vaddw_u8(q14u16, d7u8); - - *d1ru8 = vbsl_u8(d20u8, d31u8, d1u8); - - q14u16 = vaddw_u8(q14u16, d18u8); - - *d2ru8 = vbsl_u8(d20u8, d23u8, d2u8); - - d22u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d4u8); - q14u16 = vsubw_u8(q14u16, d7u8); - q14u16 = vaddw_u8(q14u16, d16u8); - - d3u8 = vbsl_u8(d20u8, d3u8, d21u8); - - q14u16 = vaddw_u8(q14u16, d18u8); - - d4u8 = vbsl_u8(d20u8, d4u8, d26u8); - - d6u8 = vqrshrn_n_u16(q14u16, 3); - - q14u16 = vsubw_u8(q14u16, d5u8); - q14u16 = vsubw_u8(q14u16, d16u8); - q14u16 = vaddw_u8(q14u16, d17u8); - q14u16 = vaddw_u8(q14u16, d18u8); - - d5u8 = vbsl_u8(d20u8, d5u8, d17u8); - - d7u8 = vqrshrn_n_u16(q14u16, 3); - - *d3ru8 = vbsl_u8(d20u8, d22u8, d3u8); - *d4ru8 = vbsl_u8(d20u8, d6u8, d4u8); - *d5ru8 = vbsl_u8(d20u8, d7u8, d5u8); - } - return; -} - -void vpx_lpf_horizontal_8_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i; - uint8_t *s, *psrc; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - uint8x8_t d16u8, d17u8, d18u8; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - psrc = src - (pitch << 2); - for (i = 0; i < 1; i++) { - s = psrc + i * 8; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - mbloop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); - - s -= (pitch * 6); - vst1_u8(s, d0u8); - s += pitch; - vst1_u8(s, d1u8); - s += pitch; - vst1_u8(s, d2u8); - s += pitch; - vst1_u8(s, d3u8); - s += pitch; - vst1_u8(s, d4u8); - s += pitch; - vst1_u8(s, d5u8); - } - return; -} - -void vpx_lpf_vertical_8_neon( - uint8_t *src, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - int i; - uint8_t *s; - uint8x8_t dblimit, dlimit, dthresh; - uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; - uint8x8_t d16u8, d17u8, d18u8; - uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; - uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; - uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; - uint8x8x4_t d4Result; - uint8x8x2_t d2Result; - - dblimit = vld1_u8(blimit); - dlimit = vld1_u8(limit); - dthresh = vld1_u8(thresh); - - for (i = 0; i < 1; i++) { - s = src + (i * (pitch << 3)) - 4; - - d3u8 = vld1_u8(s); - s += pitch; - d4u8 = vld1_u8(s); - s += pitch; - d5u8 = vld1_u8(s); - s += pitch; - d6u8 = vld1_u8(s); - s += pitch; - d7u8 = vld1_u8(s); - s += pitch; - d16u8 = vld1_u8(s); - s += pitch; - d17u8 = vld1_u8(s); - s += pitch; - d18u8 = vld1_u8(s); - - d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), - vreinterpret_u32_u8(d7u8)); - d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), - vreinterpret_u32_u8(d16u8)); - d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), - vreinterpret_u32_u8(d17u8)); - d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), - vreinterpret_u32_u8(d18u8)); - - d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), - vreinterpret_u16_u32(d2tmp2.val[0])); - d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), - vreinterpret_u16_u32(d2tmp3.val[0])); - d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), - vreinterpret_u16_u32(d2tmp2.val[1])); - d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), - vreinterpret_u16_u32(d2tmp3.val[1])); - - d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), - vreinterpret_u8_u16(d2tmp5.val[0])); - d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), - vreinterpret_u8_u16(d2tmp5.val[1])); - d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), - vreinterpret_u8_u16(d2tmp7.val[0])); - d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), - vreinterpret_u8_u16(d2tmp7.val[1])); - - d3u8 = d2tmp8.val[0]; - d4u8 = d2tmp8.val[1]; - d5u8 = d2tmp9.val[0]; - d6u8 = d2tmp9.val[1]; - d7u8 = d2tmp10.val[0]; - d16u8 = d2tmp10.val[1]; - d17u8 = d2tmp11.val[0]; - d18u8 = d2tmp11.val[1]; - - mbloop_filter_neon(dblimit, dlimit, dthresh, - d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, - &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); - - d4Result.val[0] = d0u8; - d4Result.val[1] = d1u8; - d4Result.val[2] = d2u8; - d4Result.val[3] = d3u8; - - d2Result.val[0] = d4u8; - d2Result.val[1] = d5u8; - - s = src - 3; - vst4_lane_u8(s, d4Result, 0); - s += pitch; - vst4_lane_u8(s, d4Result, 1); - s += pitch; - vst4_lane_u8(s, d4Result, 2); - s += pitch; - vst4_lane_u8(s, d4Result, 3); - s += pitch; - vst4_lane_u8(s, d4Result, 4); - s += pitch; - vst4_lane_u8(s, d4Result, 5); - s += pitch; - vst4_lane_u8(s, d4Result, 6); - s += pitch; - vst4_lane_u8(s, d4Result, 7); - - s = src + 1; - vst2_lane_u8(s, d2Result, 0); - s += pitch; - vst2_lane_u8(s, d2Result, 1); - s += pitch; - vst2_lane_u8(s, d2Result, 2); - s += pitch; - vst2_lane_u8(s, d2Result, 3); - s += pitch; - vst2_lane_u8(s, d2Result, 4); - s += pitch; - vst2_lane_u8(s, d2Result, 5); - s += pitch; - vst2_lane_u8(s, d2Result, 6); - s += pitch; - vst2_lane_u8(s, d2Result, 7); - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c b/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c deleted file mode 100644 index aa31f29358..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/loopfilter_neon.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_dsp_rtcd.h" -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0); - vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1); -} - -#if HAVE_NEON_ASM -void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0); - vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh); - vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); -} -#endif // HAVE_NEON_ASM diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c deleted file mode 100644 index 8632250138..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_avg_neon.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -static INLINE int32x4_t MULTIPLY_BY_Q0( - int16x4_t dsrc0, - int16x4_t dsrc1, - int16x4_t dsrc2, - int16x4_t dsrc3, - int16x4_t dsrc4, - int16x4_t dsrc5, - int16x4_t dsrc6, - int16x4_t dsrc7, - int16x8_t q0s16) { - int32x4_t qdst; - int16x4_t d0s16, d1s16; - - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - - qdst = vmull_lane_s16(dsrc0, d0s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); - qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); - return qdst; -} - -void vpx_convolve8_avg_horiz_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, - int x_step_q4, - const int16_t *filter_y, // unused - int y_step_q4, // unused - int w, - int h) { - int width; - const uint8_t *s; - uint8_t *d; - uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; - uint32x2_t d2u32, d3u32, d6u32, d7u32, d28u32, d29u32, d30u32, d31u32; - uint8x16_t q1u8, q3u8, q12u8, q13u8, q14u8, q15u8; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - uint16x8x2_t q0x2u16; - uint8x8x2_t d0x2u8, d1x2u8; - uint32x2x2_t d0x2u32; - uint16x4x2_t d0x2u16, d1x2u16; - uint32x4x2_t q0x2u32; - - assert(x_step_q4 == 16); - - q0s16 = vld1q_s16(filter_x); - - src -= 3; // adjust for taps - for (; h > 0; h -= 4) { // loop_horiz_v - s = src; - d24u8 = vld1_u8(s); - s += src_stride; - d25u8 = vld1_u8(s); - s += src_stride; - d26u8 = vld1_u8(s); - s += src_stride; - d27u8 = vld1_u8(s); - - q12u8 = vcombine_u8(d24u8, d25u8); - q13u8 = vcombine_u8(d26u8, d27u8); - - q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), - vreinterpretq_u16_u8(q13u8)); - d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); - d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); - d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); - d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); - d0x2u8 = vtrn_u8(d24u8, d25u8); - d1x2u8 = vtrn_u8(d26u8, d27u8); - - __builtin_prefetch(src + src_stride * 4); - __builtin_prefetch(src + src_stride * 5); - - q8u16 = vmovl_u8(d0x2u8.val[0]); - q9u16 = vmovl_u8(d0x2u8.val[1]); - q10u16 = vmovl_u8(d1x2u8.val[0]); - q11u16 = vmovl_u8(d1x2u8.val[1]); - - src += 7; - d16u16 = vget_low_u16(q8u16); - d17u16 = vget_high_u16(q8u16); - d18u16 = vget_low_u16(q9u16); - d19u16 = vget_high_u16(q9u16); - q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 - q9u16 = vcombine_u16(d17u16, d19u16); - - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 - for (width = w; - width > 0; - width -= 4, src += 4, dst += 4) { // loop_horiz - s = src; - d28u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d29u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d31u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d30u32 = vld1_dup_u32((const uint32_t *)s); - - __builtin_prefetch(src + 64); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), - vreinterpret_u16_u32(d31u32)); - d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), - vreinterpret_u16_u32(d30u32)); - d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 - vreinterpret_u8_u16(d1x2u16.val[0])); // d29 - d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 - vreinterpret_u8_u16(d1x2u16.val[1])); // d30 - - __builtin_prefetch(src + 64 + src_stride); - - q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); - q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); - q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), - vreinterpretq_u32_u8(q15u8)); - - d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); - d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); - q12u16 = vmovl_u8(d28u8); - q13u16 = vmovl_u8(d29u8); - - __builtin_prefetch(src + 64 + src_stride * 2); - - d = dst; - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0); - d += dst_stride; - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1); - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, - d18s16, d19s16, d23s16, d24s16, q0s16); - q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, - d19s16, d23s16, d24s16, d26s16, q0s16); - q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, - d23s16, d24s16, d26s16, d27s16, q0s16); - q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - __builtin_prefetch(src + 64 + src_stride * 3); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u8 = vqmovn_u16(q1u16); - d3u8 = vqmovn_u16(q2u16); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), - vreinterpret_u16_u8(d3u8)); - d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), - vreinterpret_u32_u16(d0x2u16.val[1])); - d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), - vreinterpret_u8_u32(d0x2u32.val[1])); - - q1u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); - q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32)); - - q1u8 = vrhaddq_u8(q1u8, q3u8); - - d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8)); - d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8)); - - d = dst; - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - - q8u16 = q9u16; - d20s16 = d23s16; - q11u16 = q12u16; - q9u16 = q13u16; - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - } - src += src_stride * 4 - w - 7; - dst += dst_stride * 4 - w; - } - return; -} - -void vpx_convolve8_avg_vert_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, // unused - int x_step_q4, // unused - const int16_t *filter_y, - int y_step_q4, - int w, - int h) { - int height; - const uint8_t *s; - uint8_t *d; - uint8x8_t d2u8, d3u8; - uint32x2_t d2u32, d3u32, d6u32, d7u32; - uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; - uint8x16_t q1u8, q3u8; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - - assert(y_step_q4 == 16); - - src -= src_stride * 3; - q0s16 = vld1q_s16(filter_y); - for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h - s = src; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); - s += src_stride; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); - s += src_stride; - d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); - s += src_stride; - - q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); - q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); - q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); - q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); - - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d = dst; - for (height = h; height > 0; height -= 4) { // loop_vert - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); - s += src_stride; - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); - s += src_stride; - - q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); - q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); - - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 0); - d += dst_stride; - d6u32 = vld1_lane_u32((const uint32_t *)d, d6u32, 1); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 0); - d += dst_stride; - d7u32 = vld1_lane_u32((const uint32_t *)d, d7u32, 1); - d -= dst_stride * 3; - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - __builtin_prefetch(s); - __builtin_prefetch(s + src_stride); - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, - d20s16, d21s16, d22s16, d24s16, q0s16); - __builtin_prefetch(s + src_stride * 2); - __builtin_prefetch(s + src_stride * 3); - q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, - d21s16, d22s16, d24s16, d26s16, q0s16); - __builtin_prefetch(d); - __builtin_prefetch(d + dst_stride); - q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, - d22s16, d24s16, d26s16, d27s16, q0s16); - __builtin_prefetch(d + dst_stride * 2); - __builtin_prefetch(d + dst_stride * 3); - q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u8 = vqmovn_u16(q1u16); - d3u8 = vqmovn_u16(q2u16); - - q1u8 = vcombine_u8(d2u8, d3u8); - q3u8 = vreinterpretq_u8_u32(vcombine_u32(d6u32, d7u32)); - - q1u8 = vrhaddq_u8(q1u8, q3u8); - - d2u32 = vreinterpret_u32_u8(vget_low_u8(q1u8)); - d3u32 = vreinterpret_u32_u8(vget_high_u8(q1u8)); - - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - d += dst_stride; - - q8u16 = q10u16; - d18s16 = d22s16; - d19s16 = d24s16; - q10u16 = q13u16; - d22s16 = d25s16; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c deleted file mode 100644 index 9bd715e2c6..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> -#include <assert.h> - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -static INLINE int32x4_t MULTIPLY_BY_Q0( - int16x4_t dsrc0, - int16x4_t dsrc1, - int16x4_t dsrc2, - int16x4_t dsrc3, - int16x4_t dsrc4, - int16x4_t dsrc5, - int16x4_t dsrc6, - int16x4_t dsrc7, - int16x8_t q0s16) { - int32x4_t qdst; - int16x4_t d0s16, d1s16; - - d0s16 = vget_low_s16(q0s16); - d1s16 = vget_high_s16(q0s16); - - qdst = vmull_lane_s16(dsrc0, d0s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); - qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); - qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); - qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); - qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); - return qdst; -} - -void vpx_convolve8_horiz_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, - int x_step_q4, - const int16_t *filter_y, // unused - int y_step_q4, // unused - int w, - int h) { - int width; - const uint8_t *s, *psrc; - uint8_t *d, *pdst; - uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; - uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32; - uint8x16_t q12u8, q13u8, q14u8, q15u8; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - uint16x8x2_t q0x2u16; - uint8x8x2_t d0x2u8, d1x2u8; - uint32x2x2_t d0x2u32; - uint16x4x2_t d0x2u16, d1x2u16; - uint32x4x2_t q0x2u32; - - assert(x_step_q4 == 16); - - q0s16 = vld1q_s16(filter_x); - - src -= 3; // adjust for taps - for (; h > 0; h -= 4, - src += src_stride * 4, - dst += dst_stride * 4) { // loop_horiz_v - s = src; - d24u8 = vld1_u8(s); - s += src_stride; - d25u8 = vld1_u8(s); - s += src_stride; - d26u8 = vld1_u8(s); - s += src_stride; - d27u8 = vld1_u8(s); - - q12u8 = vcombine_u8(d24u8, d25u8); - q13u8 = vcombine_u8(d26u8, d27u8); - - q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), - vreinterpretq_u16_u8(q13u8)); - d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); - d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); - d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); - d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); - d0x2u8 = vtrn_u8(d24u8, d25u8); - d1x2u8 = vtrn_u8(d26u8, d27u8); - - __builtin_prefetch(src + src_stride * 4); - __builtin_prefetch(src + src_stride * 5); - __builtin_prefetch(src + src_stride * 6); - - q8u16 = vmovl_u8(d0x2u8.val[0]); - q9u16 = vmovl_u8(d0x2u8.val[1]); - q10u16 = vmovl_u8(d1x2u8.val[0]); - q11u16 = vmovl_u8(d1x2u8.val[1]); - - d16u16 = vget_low_u16(q8u16); - d17u16 = vget_high_u16(q8u16); - d18u16 = vget_low_u16(q9u16); - d19u16 = vget_high_u16(q9u16); - q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 - q9u16 = vcombine_u16(d17u16, d19u16); - - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 - for (width = w, psrc = src + 7, pdst = dst; - width > 0; - width -= 4, psrc += 4, pdst += 4) { // loop_horiz - s = psrc; - d28u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d29u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d31u32 = vld1_dup_u32((const uint32_t *)s); - s += src_stride; - d30u32 = vld1_dup_u32((const uint32_t *)s); - - __builtin_prefetch(psrc + 64); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), - vreinterpret_u16_u32(d31u32)); - d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), - vreinterpret_u16_u32(d30u32)); - d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 - vreinterpret_u8_u16(d1x2u16.val[0])); // d29 - d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 - vreinterpret_u8_u16(d1x2u16.val[1])); // d30 - - __builtin_prefetch(psrc + 64 + src_stride); - - q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); - q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); - q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), - vreinterpretq_u32_u8(q15u8)); - - d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); - d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); - q12u16 = vmovl_u8(d28u8); - q13u16 = vmovl_u8(d29u8); - - __builtin_prefetch(psrc + 64 + src_stride * 2); - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, - d18s16, d19s16, d23s16, d24s16, q0s16); - q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, - d19s16, d23s16, d24s16, d26s16, q0s16); - q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, - d23s16, d24s16, d26s16, d27s16, q0s16); - q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - __builtin_prefetch(psrc + 60 + src_stride * 3); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u8 = vqmovn_u16(q1u16); - d3u8 = vqmovn_u16(q2u16); - - d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), - vreinterpret_u16_u8(d3u8)); - d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), - vreinterpret_u32_u16(d0x2u16.val[1])); - d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), - vreinterpret_u8_u32(d0x2u32.val[1])); - - d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]); - d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]); - - d = pdst; - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - - q8u16 = q9u16; - d20s16 = d23s16; - q11u16 = q12u16; - q9u16 = q13u16; - d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); - } - } - return; -} - -void vpx_convolve8_vert_neon( - const uint8_t *src, - ptrdiff_t src_stride, - uint8_t *dst, - ptrdiff_t dst_stride, - const int16_t *filter_x, // unused - int x_step_q4, // unused - const int16_t *filter_y, - int y_step_q4, - int w, - int h) { - int height; - const uint8_t *s; - uint8_t *d; - uint32x2_t d2u32, d3u32; - uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; - int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; - int16x4_t d24s16, d25s16, d26s16, d27s16; - uint16x4_t d2u16, d3u16, d4u16, d5u16; - int16x8_t q0s16; - uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; - int32x4_t q1s32, q2s32, q14s32, q15s32; - - assert(y_step_q4 == 16); - - src -= src_stride * 3; - q0s16 = vld1q_s16(filter_y); - for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h - s = src; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); - s += src_stride; - d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); - s += src_stride; - d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); - s += src_stride; - d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); - s += src_stride; - d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); - s += src_stride; - - q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); - q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); - q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); - q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); - - d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); - d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); - d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); - d = dst; - for (height = h; height > 0; height -= 4) { // loop_vert - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); - s += src_stride; - d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); - s += src_stride; - d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); - s += src_stride; - - q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); - q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); - - d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); - d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); - d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); - d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); - d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); - d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); - d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); - d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); - - __builtin_prefetch(d); - __builtin_prefetch(d + dst_stride); - q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, - d20s16, d21s16, d22s16, d24s16, q0s16); - __builtin_prefetch(d + dst_stride * 2); - __builtin_prefetch(d + dst_stride * 3); - q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, - d21s16, d22s16, d24s16, d26s16, q0s16); - __builtin_prefetch(s); - __builtin_prefetch(s + src_stride); - q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, - d22s16, d24s16, d26s16, d27s16, q0s16); - __builtin_prefetch(s + src_stride * 2); - __builtin_prefetch(s + src_stride * 3); - q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, - d24s16, d26s16, d27s16, d25s16, q0s16); - - d2u16 = vqrshrun_n_s32(q1s32, 7); - d3u16 = vqrshrun_n_s32(q2s32, 7); - d4u16 = vqrshrun_n_s32(q14s32, 7); - d5u16 = vqrshrun_n_s32(q15s32, 7); - - q1u16 = vcombine_u16(d2u16, d3u16); - q2u16 = vcombine_u16(d4u16, d5u16); - - d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16)); - d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16)); - - vst1_lane_u32((uint32_t *)d, d2u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d2u32, 1); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 0); - d += dst_stride; - vst1_lane_u32((uint32_t *)d, d3u32, 1); - d += dst_stride; - - q8u16 = q10u16; - d18s16 = d22s16; - d19s16 = d24s16; - q10u16 = q13u16; - d22s16 = d25s16; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c deleted file mode 100644 index dc58a332f8..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_avg_neon.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" - -void vpx_convolve_avg_neon( - const uint8_t *src, // r0 - ptrdiff_t src_stride, // r1 - uint8_t *dst, // r2 - ptrdiff_t dst_stride, // r3 - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, - int h) { - uint8_t *d; - uint8x8_t d0u8, d1u8, d2u8, d3u8; - uint32x2_t d0u32, d2u32; - uint8x16_t q0u8, q1u8, q2u8, q3u8, q8u8, q9u8, q10u8, q11u8; - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - d = dst; - if (w > 32) { // avg64 - for (; h > 0; h -= 1) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - q2u8 = vld1q_u8(src + 32); - q3u8 = vld1q_u8(src + 48); - src += src_stride; - q8u8 = vld1q_u8(d); - q9u8 = vld1q_u8(d + 16); - q10u8 = vld1q_u8(d + 32); - q11u8 = vld1q_u8(d + 48); - d += dst_stride; - - q0u8 = vrhaddq_u8(q0u8, q8u8); - q1u8 = vrhaddq_u8(q1u8, q9u8); - q2u8 = vrhaddq_u8(q2u8, q10u8); - q3u8 = vrhaddq_u8(q3u8, q11u8); - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - vst1q_u8(dst + 32, q2u8); - vst1q_u8(dst + 48, q3u8); - dst += dst_stride; - } - } else if (w == 32) { // avg32 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - src += src_stride; - q2u8 = vld1q_u8(src); - q3u8 = vld1q_u8(src + 16); - src += src_stride; - q8u8 = vld1q_u8(d); - q9u8 = vld1q_u8(d + 16); - d += dst_stride; - q10u8 = vld1q_u8(d); - q11u8 = vld1q_u8(d + 16); - d += dst_stride; - - q0u8 = vrhaddq_u8(q0u8, q8u8); - q1u8 = vrhaddq_u8(q1u8, q9u8); - q2u8 = vrhaddq_u8(q2u8, q10u8); - q3u8 = vrhaddq_u8(q3u8, q11u8); - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - dst += dst_stride; - vst1q_u8(dst, q2u8); - vst1q_u8(dst + 16, q3u8); - dst += dst_stride; - } - } else if (w > 8) { // avg16 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - src += src_stride; - q1u8 = vld1q_u8(src); - src += src_stride; - q2u8 = vld1q_u8(d); - d += dst_stride; - q3u8 = vld1q_u8(d); - d += dst_stride; - - q0u8 = vrhaddq_u8(q0u8, q2u8); - q1u8 = vrhaddq_u8(q1u8, q3u8); - - vst1q_u8(dst, q0u8); - dst += dst_stride; - vst1q_u8(dst, q1u8); - dst += dst_stride; - } - } else if (w == 8) { // avg8 - for (; h > 0; h -= 2) { - d0u8 = vld1_u8(src); - src += src_stride; - d1u8 = vld1_u8(src); - src += src_stride; - d2u8 = vld1_u8(d); - d += dst_stride; - d3u8 = vld1_u8(d); - d += dst_stride; - - q0u8 = vcombine_u8(d0u8, d1u8); - q1u8 = vcombine_u8(d2u8, d3u8); - q0u8 = vrhaddq_u8(q0u8, q1u8); - - vst1_u8(dst, vget_low_u8(q0u8)); - dst += dst_stride; - vst1_u8(dst, vget_high_u8(q0u8)); - dst += dst_stride; - } - } else { // avg4 - for (; h > 0; h -= 2) { - d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 0); - src += src_stride; - d0u32 = vld1_lane_u32((const uint32_t *)src, d0u32, 1); - src += src_stride; - d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 0); - d += dst_stride; - d2u32 = vld1_lane_u32((const uint32_t *)d, d2u32, 1); - d += dst_stride; - - d0u8 = vrhadd_u8(vreinterpret_u8_u32(d0u32), - vreinterpret_u8_u32(d2u32)); - - d0u32 = vreinterpret_u32_u8(d0u8); - vst1_lane_u32((uint32_t *)dst, d0u32, 0); - dst += dst_stride; - vst1_lane_u32((uint32_t *)dst, d0u32, 1); - dst += dst_stride; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c deleted file mode 100644 index d8fb97a861..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_copy_neon.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <arm_neon.h> - -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" - -void vpx_convolve_copy_neon( - const uint8_t *src, // r0 - ptrdiff_t src_stride, // r1 - uint8_t *dst, // r2 - ptrdiff_t dst_stride, // r3 - const int16_t *filter_x, - int filter_x_stride, - const int16_t *filter_y, - int filter_y_stride, - int w, - int h) { - uint8x8_t d0u8, d2u8; - uint8x16_t q0u8, q1u8, q2u8, q3u8; - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - if (w > 32) { // copy64 - for (; h > 0; h--) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - q2u8 = vld1q_u8(src + 32); - q3u8 = vld1q_u8(src + 48); - src += src_stride; - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - vst1q_u8(dst + 32, q2u8); - vst1q_u8(dst + 48, q3u8); - dst += dst_stride; - } - } else if (w == 32) { // copy32 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - q1u8 = vld1q_u8(src + 16); - src += src_stride; - q2u8 = vld1q_u8(src); - q3u8 = vld1q_u8(src + 16); - src += src_stride; - - vst1q_u8(dst, q0u8); - vst1q_u8(dst + 16, q1u8); - dst += dst_stride; - vst1q_u8(dst, q2u8); - vst1q_u8(dst + 16, q3u8); - dst += dst_stride; - } - } else if (w > 8) { // copy16 - for (; h > 0; h -= 2) { - q0u8 = vld1q_u8(src); - src += src_stride; - q1u8 = vld1q_u8(src); - src += src_stride; - - vst1q_u8(dst, q0u8); - dst += dst_stride; - vst1q_u8(dst, q1u8); - dst += dst_stride; - } - } else if (w == 8) { // copy8 - for (; h > 0; h -= 2) { - d0u8 = vld1_u8(src); - src += src_stride; - d2u8 = vld1_u8(src); - src += src_stride; - - vst1_u8(dst, d0u8); - dst += dst_stride; - vst1_u8(dst, d2u8); - dst += dst_stride; - } - } else { // copy4 - for (; h > 0; h--) { - *(uint32_t *)dst = *(const uint32_t *)src; - src += src_stride; - dst += dst_stride; - } - } - return; -} diff --git a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c b/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c deleted file mode 100644 index 1506ce6203..0000000000 --- a/thirdparty/libvpx/vpx_dsp/arm/vpx_convolve_neon.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" - -void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the - * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4). - */ - DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); - - // Account for the vertical phase needing 3 lines prior and 4 lines post - int intermediate_height = h + 7; - - assert(y_step_q4 == 16); - assert(x_step_q4 == 16); - - /* Filter starting 3 lines back. The neon implementation will ignore the - * given height and filter a multiple of 4 lines. Since this goes in to - * the temp buffer which has lots of extra room and is subsequently discarded - * this is safe if somewhat less than ideal. - */ - vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, - temp, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, intermediate_height); - - /* Step into the temp buffer 3 lines to get the actual frame data */ - vpx_convolve8_vert_neon(temp + 64 * 3, 64, - dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); -} - -void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); - int intermediate_height = h + 7; - - assert(y_step_q4 == 16); - assert(x_step_q4 == 16); - - /* This implementation has the same issues as above. In addition, we only want - * to average the values after both passes. - */ - vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, - temp, 64, - filter_x, x_step_q4, filter_y, y_step_q4, - w, intermediate_height); - vpx_convolve8_avg_vert_neon(temp + 64 * 3, - 64, dst, dst_stride, - filter_x, x_step_q4, filter_y, y_step_q4, - w, h); -} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader.c b/thirdparty/libvpx/vpx_dsp/bitreader.c deleted file mode 100644 index 8140e78e70..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include <stdlib.h> - -#include "./vpx_config.h" - -#include "vpx_dsp/bitreader.h" -#include "vpx_dsp/prob.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_util/endian_inl.h" - -int vpx_reader_init(vpx_reader *r, - const uint8_t *buffer, - size_t size, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { - if (size && !buffer) { - return 1; - } else { - r->buffer_end = buffer + size; - r->buffer = buffer; - r->value = 0; - r->count = -8; - r->range = 255; - r->decrypt_cb = decrypt_cb; - r->decrypt_state = decrypt_state; - vpx_reader_fill(r); - return vpx_read_bit(r) != 0; // marker bit - } -} - -void vpx_reader_fill(vpx_reader *r) { - const uint8_t *const buffer_end = r->buffer_end; - const uint8_t *buffer = r->buffer; - const uint8_t *buffer_start = buffer; - BD_VALUE value = r->value; - int count = r->count; - const size_t bytes_left = buffer_end - buffer; - const size_t bits_left = bytes_left * CHAR_BIT; - int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); - - if (r->decrypt_cb) { - size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left); - r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); - buffer = r->clear_buffer; - buffer_start = r->clear_buffer; - } - if (bits_left > BD_VALUE_SIZE) { - const int bits = (shift & 0xfffffff8) + CHAR_BIT; - BD_VALUE nv; - BD_VALUE big_endian_values; - memcpy(&big_endian_values, buffer, sizeof(BD_VALUE)); -#if SIZE_MAX == 0xffffffffffffffffULL - big_endian_values = HToBE64(big_endian_values); -#else - big_endian_values = HToBE32(big_endian_values); -#endif - nv = big_endian_values >> (BD_VALUE_SIZE - bits); - count += bits; - buffer += (bits >> 3); - value = r->value | (nv << (shift & 0x7)); - } else { - const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left); - int loop_end = 0; - if (bits_over >= 0) { - count += LOTS_OF_BITS; - loop_end = bits_over; - } - - if (bits_over < 0 || bits_left) { - while (shift >= loop_end) { - count += CHAR_BIT; - value |= (BD_VALUE)*buffer++ << shift; - shift -= CHAR_BIT; - } - } - } - - // NOTE: Variable 'buffer' may not relate to 'r->buffer' after decryption, - // so we increase 'r->buffer' by the amount that 'buffer' moved, rather than - // assign 'buffer' to 'r->buffer'. - r->buffer += buffer - buffer_start; - r->value = value; - r->count = count; -} - -const uint8_t *vpx_reader_find_end(vpx_reader *r) { - // Find the end of the coded buffer - while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) { - r->count -= CHAR_BIT; - r->buffer--; - } - return r->buffer; -} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader.h b/thirdparty/libvpx/vpx_dsp/bitreader.h deleted file mode 100644 index 9a441b4107..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_BITREADER_H_ -#define VPX_DSP_BITREADER_H_ - -#include <stddef.h> -#include <limits.h> - -#include "./vpx_config.h" -#include "vpx_ports/mem.h" -#include "vpx/vp8dx.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/prob.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef size_t BD_VALUE; - -#define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT) - -// This is meant to be a large, positive constant that can still be efficiently -// loaded as an immediate (on platforms like ARM, for example). -// Even relatively modest values like 100 would work fine. -#define LOTS_OF_BITS 0x40000000 - -typedef struct { - // Be careful when reordering this struct, it may impact the cache negatively. - BD_VALUE value; - unsigned int range; - int count; - const uint8_t *buffer_end; - const uint8_t *buffer; - vpx_decrypt_cb decrypt_cb; - void *decrypt_state; - uint8_t clear_buffer[sizeof(BD_VALUE) + 1]; -} vpx_reader; - -int vpx_reader_init(vpx_reader *r, - const uint8_t *buffer, - size_t size, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state); - -void vpx_reader_fill(vpx_reader *r); - -const uint8_t *vpx_reader_find_end(vpx_reader *r); - -static INLINE int vpx_reader_has_error(vpx_reader *r) { - // Check if we have reached the end of the buffer. - // - // Variable 'count' stores the number of bits in the 'value' buffer, minus - // 8. The top byte is part of the algorithm, and the remainder is buffered - // to be shifted into it. So if count == 8, the top 16 bits of 'value' are - // occupied, 8 for the algorithm and 8 in the buffer. - // - // When reading a byte from the user's buffer, count is filled with 8 and - // one byte is filled into the value buffer. When we reach the end of the - // data, count is additionally filled with LOTS_OF_BITS. So when - // count == LOTS_OF_BITS - 1, the user's data has been exhausted. - // - // 1 if we have tried to decode bits after the end of stream was encountered. - // 0 No error. - return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS; -} - -static INLINE int vpx_read(vpx_reader *r, int prob) { - unsigned int bit = 0; - BD_VALUE value; - BD_VALUE bigsplit; - int count; - unsigned int range; - unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT; - - if (r->count < 0) - vpx_reader_fill(r); - - value = r->value; - count = r->count; - - bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); - - range = split; - - if (value >= bigsplit) { - range = r->range - split; - value = value - bigsplit; - bit = 1; - } - - { - register int shift = vpx_norm[range]; - range <<= shift; - value <<= shift; - count -= shift; - } - r->value = value; - r->count = count; - r->range = range; - - return bit; -} - -static INLINE int vpx_read_bit(vpx_reader *r) { - return vpx_read(r, 128); // vpx_prob_half -} - -static INLINE int vpx_read_literal(vpx_reader *r, int bits) { - int literal = 0, bit; - - for (bit = bits - 1; bit >= 0; bit--) - literal |= vpx_read_bit(r) << bit; - - return literal; -} - -static INLINE int vpx_read_tree(vpx_reader *r, const vpx_tree_index *tree, - const vpx_prob *probs) { - vpx_tree_index i = 0; - - while ((i = tree[i + vpx_read(r, probs[i >> 1])]) > 0) - continue; - - return -i; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_BITREADER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c deleted file mode 100644 index d7b55cf9f4..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#include "./bitreader_buffer.h" - -size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) { - return (rb->bit_offset + 7) >> 3; -} - -int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb) { - const size_t off = rb->bit_offset; - const size_t p = off >> 3; - const int q = 7 - (int)(off & 0x7); - if (rb->bit_buffer + p < rb->bit_buffer_end) { - const int bit = (rb->bit_buffer[p] >> q) & 1; - rb->bit_offset = off + 1; - return bit; - } else { - rb->error_handler(rb->error_handler_data); - return 0; - } -} - -int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits) { - int value = 0, bit; - for (bit = bits - 1; bit >= 0; bit--) - value |= vpx_rb_read_bit(rb) << bit; - return value; -} - -int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, - int bits) { - const int value = vpx_rb_read_literal(rb, bits); - return vpx_rb_read_bit(rb) ? -value : value; -} - -int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, - int bits) { -#if CONFIG_MISC_FIXES - const int nbits = sizeof(unsigned) * 8 - bits - 1; - const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits; - return ((int) value) >> nbits; -#else - return vpx_rb_read_signed_literal(rb, bits); -#endif -} diff --git a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h b/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h deleted file mode 100644 index 8a48a95ed1..0000000000 --- a/thirdparty/libvpx/vpx_dsp/bitreader_buffer.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_BITREADER_BUFFER_H_ -#define VPX_DSP_BITREADER_BUFFER_H_ - -#include <limits.h> - -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*vpx_rb_error_handler)(void *data); - -struct vpx_read_bit_buffer { - const uint8_t *bit_buffer; - const uint8_t *bit_buffer_end; - size_t bit_offset; - - void *error_handler_data; - vpx_rb_error_handler error_handler; -}; - -size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb); - -int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb); - -int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits); - -int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits); - -int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_BITREADER_BUFFER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/intrapred.c b/thirdparty/libvpx/vpx_dsp/intrapred.c deleted file mode 100644 index cc4a74bd26..0000000000 --- a/thirdparty/libvpx/vpx_dsp/intrapred.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" - -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_mem/vpx_mem.h" - -#define DST(x, y) dst[(x) + (y) * stride] -#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) -#define AVG2(a, b) (((a) + (b) + 1) >> 1) - -static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) above; - // first column - for (r = 0; r < bs - 1; ++r) - dst[r * stride] = AVG2(left[r], left[r + 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // second column - for (r = 0; r < bs - 2; ++r) - dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); - dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // rest of last row - for (c = 0; c < bs - 2; ++c) - dst[(bs - 1) * stride + c] = left[bs - 1]; - - for (r = bs - 2; r >= 0; --r) - for (c = 0; c < bs - 2; ++c) - dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; -} - -#if CONFIG_MISC_FIXES -static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) above; - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], - left[(c >> 1) + r + 2]) - : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - int size; - (void)left; - for (c = 0; c < bs; ++c) { - dst[c] = AVG2(above[c], above[c + 1]); - dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); - } - for (r = 2, size = bs - 2; r < bs; r += 2, --size) { - memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); - memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); - memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); - memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); - } -} - -#if CONFIG_MISC_FIXES -static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) left; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - const uint8_t above_right = above[bs - 1]; - const uint8_t *const dst_row0 = dst; - int x, size; - (void)left; - - for (x = 0; x < bs - 1; ++x) { - dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); - } - dst[bs - 1] = above_right; - dst += stride; - for (x = 1, size = bs - 2; x < bs; ++x, --size) { - memcpy(dst, dst_row0 + x, size); - memset(dst + size, above_right, x + 1); - dst += stride; - } -} - -#if CONFIG_MISC_FIXES -static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - (void) left; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = AVG3(above[r + c], above[r + c + 1], - above[r + c + 1 + (r + c + 2 < bs * 2)]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - - // first row - for (c = 0; c < bs; c++) - dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} - -static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i; -#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 - // silence a spurious -Warray-bounds warning, possibly related to: - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 - uint8_t border[69]; -#else - uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right -#endif - - // dst(bs, bs - 2)[0], i.e., border starting at bottom-left - for (i = 0; i < bs - 2; ++i) { - border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); - } - border[bs - 2] = AVG3(above[-1], left[0], left[1]); - border[bs - 1] = AVG3(left[0], above[-1], above[0]); - border[bs - 0] = AVG3(above[-1], above[0], above[1]); - // dst[0][2, size), i.e., remaining top border ascending - for (i = 0; i < bs - 2; ++i) { - border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); - } - - for (i = 0; i < bs; ++i) { - memcpy(dst + i * stride, border + bs - 1 - i, bs); - } -} - -static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bs; r++) - dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bs - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bs; ++r) { - for (c = 0; c < bs - 2; c++) - dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} - -static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) left; - - for (r = 0; r < bs; r++) { - memcpy(dst, above, bs); - dst += stride; - } -} - -static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) above; - - for (r = 0; r < bs; r++) { - memset(dst, left[r], bs); - dst += stride; - } -} - -static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r, c; - int ytop_left = above[-1]; - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) - dst[c] = clip_pixel(left[r] + above[c] - ytop_left); - dst += stride; - } -} - -static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int r; - (void) above; - (void) left; - - for (r = 0; r < bs; r++) { - memset(dst, 128, bs); - dst += stride; - } -} - -static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, - const uint8_t *left) { - int i, r, expected_dc, sum = 0; - (void) above; - - for (i = 0; i < bs; i++) - sum += left[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i, r, expected_dc, sum = 0; - (void) left; - - for (i = 0; i < bs; i++) - sum += above[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, - const uint8_t *above, const uint8_t *left) { - int i, r, expected_dc, sum = 0; - const int count = 2 * bs; - - for (i = 0; i < bs; i++) { - sum += above[i]; - sum += left[i]; - } - - expected_dc = (sum + (count >> 1)) / count; - - for (r = 0; r < bs; r++) { - memset(dst, expected_dc, bs); - dst += stride; - } -} - -void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int H = above[-1]; - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - - memset(dst + stride * 0, AVG3(H, I, J), 4); - memset(dst + stride * 1, AVG3(I, J, K), 4); - memset(dst + stride * 2, AVG3(J, K, L), 4); - memset(dst + stride * 3, AVG3(K, L, L), 4); -} - -void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int H = above[-1]; - const int I = above[0]; - const int J = above[1]; - const int K = above[2]; - const int L = above[3]; - const int M = above[4]; - (void)left; - - dst[0] = AVG3(H, I, J); - dst[1] = AVG3(I, J, K); - dst[2] = AVG3(J, K, L); - dst[3] = AVG3(K, L, M); - memcpy(dst + stride * 1, dst, 4); - memcpy(dst + stride * 2, dst, 4); - memcpy(dst + stride * 3, dst, 4); -} - -void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - (void)above; - DST(0, 0) = AVG2(I, J); - DST(2, 0) = DST(0, 1) = AVG2(J, K); - DST(2, 1) = DST(0, 2) = AVG2(K, L); - DST(1, 0) = AVG3(I, J, K); - DST(3, 0) = DST(1, 1) = AVG3(J, K, L); - DST(3, 1) = DST(1, 2) = AVG3(K, L, L); - DST(3, 2) = DST(2, 2) = - DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; -} - -void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - (void)left; - DST(0, 0) = AVG2(A, B); - DST(1, 0) = DST(0, 2) = AVG2(B, C); - DST(2, 0) = DST(1, 2) = AVG2(C, D); - DST(3, 0) = DST(2, 2) = AVG2(D, E); - DST(3, 2) = AVG2(E, F); // differs from vp8 - - DST(0, 1) = AVG3(A, B, C); - DST(1, 1) = DST(0, 3) = AVG3(B, C, D); - DST(2, 1) = DST(1, 3) = AVG3(C, D, E); - DST(3, 1) = DST(2, 3) = AVG3(D, E, F); - DST(3, 3) = AVG3(E, F, G); // differs from vp8 -} - -void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)left; - DST(0, 0) = AVG2(A, B); - DST(1, 0) = DST(0, 2) = AVG2(B, C); - DST(2, 0) = DST(1, 2) = AVG2(C, D); - DST(3, 0) = DST(2, 2) = AVG2(D, E); - DST(3, 2) = AVG3(E, F, G); - - DST(0, 1) = AVG3(A, B, C); - DST(1, 1) = DST(0, 3) = AVG3(B, C, D); - DST(2, 1) = DST(1, 3) = AVG3(C, D, E); - DST(3, 1) = DST(2, 3) = AVG3(D, E, F); - DST(3, 3) = AVG3(F, G, H); -} - -void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)stride; - (void)left; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); - DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); - DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); - DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = H; // differs from vp8 -} - -void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)stride; - (void)left; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); - DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); - DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); - DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = AVG3(G, H, H); -} - -void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - DST(0, 0) = DST(1, 2) = AVG2(X, A); - DST(1, 0) = DST(2, 2) = AVG2(A, B); - DST(2, 0) = DST(3, 2) = AVG2(B, C); - DST(3, 0) = AVG2(C, D); - - DST(0, 3) = AVG3(K, J, I); - DST(0, 2) = AVG3(J, I, X); - DST(0, 1) = DST(1, 3) = AVG3(I, X, A); - DST(1, 1) = DST(2, 3) = AVG3(X, A, B); - DST(2, 1) = DST(3, 3) = AVG3(A, B, C); - DST(3, 1) = AVG3(B, C, D); -} - -void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)stride; - DST(0, 3) = AVG3(J, K, L); - DST(1, 3) = DST(0, 2) = AVG3(I, J, K); - DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); - DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); - DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); - DST(3, 1) = DST(2, 0) = AVG3(C, B, A); - DST(3, 0) = AVG3(D, C, B); -} - -void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - - DST(0, 0) = DST(2, 1) = AVG2(I, X); - DST(0, 1) = DST(2, 2) = AVG2(J, I); - DST(0, 2) = DST(2, 3) = AVG2(K, J); - DST(0, 3) = AVG2(L, K); - - DST(3, 0) = AVG3(A, B, C); - DST(2, 0) = AVG3(X, A, B); - DST(1, 0) = DST(3, 1) = AVG3(I, X, A); - DST(1, 1) = DST(3, 2) = AVG3(J, I, X); - DST(1, 2) = DST(3, 3) = AVG3(K, J, I); - DST(1, 3) = AVG3(L, K, J); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) above; - (void) bd; - - // First column. - for (r = 0; r < bs - 1; ++r) { - dst[r * stride] = AVG2(left[r], left[r + 1]); - } - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // Second column. - for (r = 0; r < bs - 2; ++r) { - dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); - } - dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); - dst[(bs - 1) * stride] = left[bs - 1]; - dst++; - - // Rest of last row. - for (c = 0; c < bs - 2; ++c) - dst[(bs - 1) * stride + c] = left[bs - 1]; - - for (r = bs - 2; r >= 0; --r) { - for (c = 0; c < bs - 2; ++c) - dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; - } -} - -#if CONFIG_MISC_FIXES -static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) above; - (void) bd; - - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], - left[(c >> 1) + r + 2]) - : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } -} - -#define highbd_d63e_predictor highbd_d63_predictor - -static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, - const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1], - above[r + c + 2]) - : above[bs * 2 - 1]; - } - dst += stride; - } -} - -#if CONFIG_MISC_FIXES -static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) left; - (void) bd; - for (r = 0; r < bs; ++r) { - for (c = 0; c < bs; ++c) { - dst[c] = AVG3(above[r + c], above[r + c + 1], - above[r + c + 1 + (r + c + 2 < bs * 2)]); - } - dst += stride; - } -} -#endif // CONFIG_MISC_FIXES - -static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - - // first row - for (c = 0; c < bs; c++) - dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bs; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} - -static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bs; c++) - dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - - dst += stride; - for (r = 1; r < bs; ++r) { - for (c = 1; c < bs; c++) - dst[c] = dst[-stride + c - 1]; - dst += stride; - } -} - -static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void) bd; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bs; r++) - dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bs; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bs - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bs; ++r) { - for (c = 0; c < bs - 2; c++) - dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} - -static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) left; - (void) bd; - for (r = 0; r < bs; r++) { - memcpy(dst, above, bs * sizeof(uint16_t)); - dst += stride; - } -} - -static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) above; - (void) bd; - for (r = 0; r < bs; r++) { - vpx_memset16(dst, left[r], bs); - dst += stride; - } -} - -static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - int ytop_left = above[-1]; - (void) bd; - - for (r = 0; r < bs; r++) { - for (c = 0; c < bs; c++) - dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd); - dst += stride; - } -} - -static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int r; - (void) above; - (void) left; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, 128 << (bd - 8), bs); - dst += stride; - } -} - -static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - (void) above; - (void) bd; - - for (i = 0; i < bs; i++) - sum += left[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - (void) left; - (void) bd; - - for (i = 0; i < bs; i++) - sum += above[i]; - expected_dc = (sum + (bs >> 1)) / bs; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} - -static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, - int bs, const uint16_t *above, - const uint16_t *left, int bd) { - int i, r, expected_dc, sum = 0; - const int count = 2 * bs; - (void) bd; - - for (i = 0; i < bs; i++) { - sum += above[i]; - sum += left[i]; - } - - expected_dc = (sum + (count >> 1)) / count; - - for (r = 0; r < bs; r++) { - vpx_memset16(dst, expected_dc, bs); - dst += stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -// This serves as a wrapper function, so that all the prediction functions -// can be unified and accessed as a pointer array. Note that the boundary -// above and left are not necessarily used all the time. -#define intra_pred_sized(type, size) \ - void vpx_##type##_predictor_##size##x##size##_c(uint8_t *dst, \ - ptrdiff_t stride, \ - const uint8_t *above, \ - const uint8_t *left) { \ - type##_predictor(dst, stride, size, above, left); \ - } - -#if CONFIG_VP9_HIGHBITDEPTH -#define intra_pred_highbd_sized(type, size) \ - void vpx_highbd_##type##_predictor_##size##x##size##_c( \ - uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ - const uint16_t *left, int bd) { \ - highbd_##type##_predictor(dst, stride, size, above, left, bd); \ - } - -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 4) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) \ - intra_pred_highbd_sized(type, 4) \ - intra_pred_highbd_sized(type, 8) \ - intra_pred_highbd_sized(type, 16) \ - intra_pred_highbd_sized(type, 32) - -#define intra_pred_no_4x4(type) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) \ - intra_pred_highbd_sized(type, 4) \ - intra_pred_highbd_sized(type, 8) \ - intra_pred_highbd_sized(type, 16) \ - intra_pred_highbd_sized(type, 32) - -#else -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 4) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) - -#define intra_pred_no_4x4(type) \ - intra_pred_sized(type, 8) \ - intra_pred_sized(type, 16) \ - intra_pred_sized(type, 32) -#endif // CONFIG_VP9_HIGHBITDEPTH - -intra_pred_no_4x4(d207) -intra_pred_no_4x4(d63) -intra_pred_no_4x4(d45) -#if CONFIG_MISC_FIXES -intra_pred_allsizes(d207e) -intra_pred_allsizes(d63e) -intra_pred_no_4x4(d45e) -#endif -intra_pred_no_4x4(d117) -intra_pred_no_4x4(d135) -intra_pred_no_4x4(d153) -intra_pred_allsizes(v) -intra_pred_allsizes(h) -intra_pred_allsizes(tm) -intra_pred_allsizes(dc_128) -intra_pred_allsizes(dc_left) -intra_pred_allsizes(dc_top) -intra_pred_allsizes(dc) -#undef intra_pred_allsizes diff --git a/thirdparty/libvpx/vpx_dsp/inv_txfm.c b/thirdparty/libvpx/vpx_dsp/inv_txfm.c deleted file mode 100644 index e18d31d7aa..0000000000 --- a/thirdparty/libvpx/vpx_dsp/inv_txfm.c +++ /dev/null @@ -1,2518 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <math.h> -#include <string.h> - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/inv_txfm.h" - -void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { -/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, - 0.5 shifts per pixel. */ - int i; - tran_low_t output[16]; - tran_high_t a1, b1, c1, d1, e1; - const tran_low_t *ip = input; - tran_low_t *op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0] >> UNIT_QUANT_SHIFT; - c1 = ip[1] >> UNIT_QUANT_SHIFT; - d1 = ip[2] >> UNIT_QUANT_SHIFT; - b1 = ip[3] >> UNIT_QUANT_SHIFT; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - op[0] = WRAPLOW(a1); - op[1] = WRAPLOW(b1); - op[2] = WRAPLOW(c1); - op[3] = WRAPLOW(d1); - ip += 4; - op += 4; - } - - ip = output; - for (i = 0; i < 4; i++) { - a1 = ip[4 * 0]; - c1 = ip[4 * 1]; - d1 = ip[4 * 2]; - b1 = ip[4 * 3]; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1)); - dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1)); - dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1)); - dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1)); - - ip++; - dest++; - } -} - -void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { - int i; - tran_high_t a1, e1; - tran_low_t tmp[4]; - const tran_low_t *ip = in; - tran_low_t *op = tmp; - - a1 = ip[0] >> UNIT_QUANT_SHIFT; - e1 = a1 >> 1; - a1 -= e1; - op[0] = WRAPLOW(a1); - op[1] = op[2] = op[3] = WRAPLOW(e1); - - ip = tmp; - for (i = 0; i < 4; i++) { - e1 = ip[0] >> 1; - a1 = ip[0] - e1; - dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1); - dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1); - dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1); - dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1); - ip++; - dest++; - } -} - -void idct4_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step[4]; - tran_high_t temp1, temp2; - // stage 1 - temp1 = (input[0] + input[2]) * cospi_16_64; - temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = WRAPLOW(dct_const_round_shift(temp1)); - step[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; - temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = WRAPLOW(dct_const_round_shift(temp1)); - step[3] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 2 - output[0] = WRAPLOW(step[0] + step[3]); - output[1] = WRAPLOW(step[1] + step[2]); - output[2] = WRAPLOW(step[1] - step[2]); - output[3] = WRAPLOW(step[0] - step[3]); -} - -void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[4], temp_out[4]; - - // Rows - for (i = 0; i < 4; ++i) { - idct4_c(input, outptr); - input += 4; - outptr += 4; - } - - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - idct4_c(temp_in, temp_out); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 4)); - } - } -} - -void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, - int dest_stride) { - int i; - tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 4); - - for (i = 0; i < 4; i++) { - dest[0] = clip_pixel_add(dest[0], a1); - dest[1] = clip_pixel_add(dest[1], a1); - dest[2] = clip_pixel_add(dest[2], a1); - dest[3] = clip_pixel_add(dest[3], a1); - dest += dest_stride; - } -} - -void idct8_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[8], step2[8]; - tran_high_t temp1, temp2; - // stage 1 - step1[0] = input[0]; - step1[2] = input[4]; - step1[1] = input[2]; - step1[3] = input[6]; - temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; - temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1)); - step1[7] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; - temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 2 - temp1 = (step1[0] + step1[2]) * cospi_16_64; - temp2 = (step1[0] - step1[2]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1)); - step2[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1)); - step2[3] = WRAPLOW(dct_const_round_shift(temp2)); - step2[4] = WRAPLOW(step1[4] + step1[5]); - step2[5] = WRAPLOW(step1[4] - step1[5]); - step2[6] = WRAPLOW(-step1[6] + step1[7]); - step2[7] = WRAPLOW(step1[6] + step1[7]); - - // stage 3 - step1[0] = WRAPLOW(step2[0] + step2[3]); - step1[1] = WRAPLOW(step2[1] + step2[2]); - step1[2] = WRAPLOW(step2[1] - step2[2]); - step1[3] = WRAPLOW(step2[0] - step2[3]); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - step1[7] = step2[7]; - - // stage 4 - output[0] = WRAPLOW(step1[0] + step1[7]); - output[1] = WRAPLOW(step1[1] + step1[6]); - output[2] = WRAPLOW(step1[2] + step1[5]); - output[3] = WRAPLOW(step1[3] + step1[4]); - output[4] = WRAPLOW(step1[3] - step1[4]); - output[5] = WRAPLOW(step1[2] - step1[5]); - output[6] = WRAPLOW(step1[1] - step1[6]); - output[7] = WRAPLOW(step1[0] - step1[7]); -} - -void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - - // First transform rows - for (i = 0; i < 8; ++i) { - idct8_c(input, outptr); - input += 8; - outptr += 8; - } - - // Then transform columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - idct8_c(temp_in, temp_out); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); - } - } -} - -void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - int i, j; - tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 5); - for (j = 0; j < 8; ++j) { - for (i = 0; i < 8; ++i) - dest[i] = clip_pixel_add(dest[i], a1); - dest += stride; - } -} - -void iadst4_c(const tran_low_t *input, tran_low_t *output) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - - tran_low_t x0 = input[0]; - tran_low_t x1 = input[1]; - tran_low_t x2 = input[2]; - tran_low_t x3 = input[3]; - - if (!(x0 | x1 | x2 | x3)) { - output[0] = output[1] = output[2] = output[3] = 0; - return; - } - - s0 = sinpi_1_9 * x0; - s1 = sinpi_2_9 * x0; - s2 = sinpi_3_9 * x1; - s3 = sinpi_4_9 * x2; - s4 = sinpi_1_9 * x2; - s5 = sinpi_2_9 * x3; - s6 = sinpi_4_9 * x3; - s7 = WRAPLOW(x0 - x2 + x3); - - s0 = s0 + s3 + s5; - s1 = s1 - s4 - s6; - s3 = s2; - s2 = sinpi_3_9 * s7; - - // 1-D transform scaling factor is sqrt(2). - // The overall dynamic range is 14b (input) + 14b (multiplication scaling) - // + 1b (addition) = 29b. - // Hence the output bit depth is 15b. - output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); - output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); - output[2] = WRAPLOW(dct_const_round_shift(s2)); - output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); -} - -void iadst8_c(const tran_low_t *input, tran_low_t *output) { - int s0, s1, s2, s3, s4, s5, s6, s7; - - tran_high_t x0 = input[7]; - tran_high_t x1 = input[0]; - tran_high_t x2 = input[5]; - tran_high_t x3 = input[2]; - tran_high_t x4 = input[3]; - tran_high_t x5 = input[4]; - tran_high_t x6 = input[1]; - tran_high_t x7 = input[6]; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { - output[0] = output[1] = output[2] = output[3] = output[4] - = output[5] = output[6] = output[7] = 0; - return; - } - - // stage 1 - s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); - s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); - s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); - s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); - s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); - s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); - s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); - s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); - - x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); - x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); - x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); - x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); - x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); - x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); - x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); - x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); - - // stage 2 - s0 = (int)x0; - s1 = (int)x1; - s2 = (int)x2; - s3 = (int)x3; - s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); - s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); - s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); - s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); - - x0 = WRAPLOW(s0 + s2); - x1 = WRAPLOW(s1 + s3); - x2 = WRAPLOW(s0 - s2); - x3 = WRAPLOW(s1 - s3); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); - - // stage 3 - s2 = (int)(cospi_16_64 * (x2 + x3)); - s3 = (int)(cospi_16_64 * (x2 - x3)); - s6 = (int)(cospi_16_64 * (x6 + x7)); - s7 = (int)(cospi_16_64 * (x6 - x7)); - - x2 = WRAPLOW(dct_const_round_shift(s2)); - x3 = WRAPLOW(dct_const_round_shift(s3)); - x6 = WRAPLOW(dct_const_round_shift(s6)); - x7 = WRAPLOW(dct_const_round_shift(s7)); - - output[0] = WRAPLOW(x0); - output[1] = WRAPLOW(-x4); - output[2] = WRAPLOW(x6); - output[3] = WRAPLOW(-x2); - output[4] = WRAPLOW(x3); - output[5] = WRAPLOW(-x7); - output[6] = WRAPLOW(x5); - output[7] = WRAPLOW(-x1); -} - -void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - tran_low_t out[8 * 8] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - - // First transform rows - // only first 4 row has non-zero coefs - for (i = 0; i < 4; ++i) { - idct8_c(input, outptr); - input += 8; - outptr += 8; - } - - // Then transform columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - idct8_c(temp_in, temp_out); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 5)); - } - } -} - -void idct16_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[16], step2[16]; - tran_high_t temp1, temp2; - - // stage 1 - step1[0] = input[0/2]; - step1[1] = input[16/2]; - step1[2] = input[8/2]; - step1[3] = input[24/2]; - step1[4] = input[4/2]; - step1[5] = input[20/2]; - step1[6] = input[12/2]; - step1[7] = input[28/2]; - step1[8] = input[2/2]; - step1[9] = input[18/2]; - step1[10] = input[10/2]; - step1[11] = input[26/2]; - step1[12] = input[6/2]; - step1[13] = input[22/2]; - step1[14] = input[14/2]; - step1[15] = input[30/2]; - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1)); - step2[15] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1)); - step1[7] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - - step1[8] = WRAPLOW(step2[8] + step2[9]); - step1[9] = WRAPLOW(step2[8] - step2[9]); - step1[10] = WRAPLOW(-step2[10] + step2[11]); - step1[11] = WRAPLOW(step2[10] + step2[11]); - step1[12] = WRAPLOW(step2[12] + step2[13]); - step1[13] = WRAPLOW(step2[12] - step2[13]); - step1[14] = WRAPLOW(-step2[14] + step2[15]); - step1[15] = WRAPLOW(step2[14] + step2[15]); - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1)); - step2[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1)); - step2[3] = WRAPLOW(dct_const_round_shift(temp2)); - step2[4] = WRAPLOW(step1[4] + step1[5]); - step2[5] = WRAPLOW(step1[4] - step1[5]); - step2[6] = WRAPLOW(-step1[6] + step1[7]); - step2[7] = WRAPLOW(step1[6] + step1[7]); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - step2[11] = step1[11]; - step2[12] = step1[12]; - - // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3]); - step1[1] = WRAPLOW(step2[1] + step2[2]); - step1[2] = WRAPLOW(step2[1] - step2[2]); - step1[3] = WRAPLOW(step2[0] - step2[3]); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - step1[7] = step2[7]; - - step1[8] = WRAPLOW(step2[8] + step2[11]); - step1[9] = WRAPLOW(step2[9] + step2[10]); - step1[10] = WRAPLOW(step2[9] - step2[10]); - step1[11] = WRAPLOW(step2[8] - step2[11]); - step1[12] = WRAPLOW(-step2[12] + step2[15]); - step1[13] = WRAPLOW(-step2[13] + step2[14]); - step1[14] = WRAPLOW(step2[13] + step2[14]); - step1[15] = WRAPLOW(step2[12] + step2[15]); - - // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7]); - step2[1] = WRAPLOW(step1[1] + step1[6]); - step2[2] = WRAPLOW(step1[2] + step1[5]); - step2[3] = WRAPLOW(step1[3] + step1[4]); - step2[4] = WRAPLOW(step1[3] - step1[4]); - step2[5] = WRAPLOW(step1[2] - step1[5]); - step2[6] = WRAPLOW(step1[1] - step1[6]); - step2[7] = WRAPLOW(step1[0] - step1[7]); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - output[0] = WRAPLOW(step2[0] + step2[15]); - output[1] = WRAPLOW(step2[1] + step2[14]); - output[2] = WRAPLOW(step2[2] + step2[13]); - output[3] = WRAPLOW(step2[3] + step2[12]); - output[4] = WRAPLOW(step2[4] + step2[11]); - output[5] = WRAPLOW(step2[5] + step2[10]); - output[6] = WRAPLOW(step2[6] + step2[9]); - output[7] = WRAPLOW(step2[7] + step2[8]); - output[8] = WRAPLOW(step2[7] - step2[8]); - output[9] = WRAPLOW(step2[6] - step2[9]); - output[10] = WRAPLOW(step2[5] - step2[10]); - output[11] = WRAPLOW(step2[4] - step2[11]); - output[12] = WRAPLOW(step2[3] - step2[12]); - output[13] = WRAPLOW(step2[2] - step2[13]); - output[14] = WRAPLOW(step2[1] - step2[14]); - output[15] = WRAPLOW(step2[0] - step2[15]); -} - -void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - - // First transform rows - for (i = 0; i < 16; ++i) { - idct16_c(input, outptr); - input += 16; - outptr += 16; - } - - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - idct16_c(temp_in, temp_out); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void iadst16_c(const tran_low_t *input, tran_low_t *output) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; - tran_high_t s9, s10, s11, s12, s13, s14, s15; - - tran_high_t x0 = input[15]; - tran_high_t x1 = input[0]; - tran_high_t x2 = input[13]; - tran_high_t x3 = input[2]; - tran_high_t x4 = input[11]; - tran_high_t x5 = input[4]; - tran_high_t x6 = input[9]; - tran_high_t x7 = input[6]; - tran_high_t x8 = input[7]; - tran_high_t x9 = input[8]; - tran_high_t x10 = input[5]; - tran_high_t x11 = input[10]; - tran_high_t x12 = input[3]; - tran_high_t x13 = input[12]; - tran_high_t x14 = input[1]; - tran_high_t x15 = input[14]; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 - | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { - output[0] = output[1] = output[2] = output[3] = output[4] - = output[5] = output[6] = output[7] = output[8] - = output[9] = output[10] = output[11] = output[12] - = output[13] = output[14] = output[15] = 0; - return; - } - - // stage 1 - s0 = x0 * cospi_1_64 + x1 * cospi_31_64; - s1 = x0 * cospi_31_64 - x1 * cospi_1_64; - s2 = x2 * cospi_5_64 + x3 * cospi_27_64; - s3 = x2 * cospi_27_64 - x3 * cospi_5_64; - s4 = x4 * cospi_9_64 + x5 * cospi_23_64; - s5 = x4 * cospi_23_64 - x5 * cospi_9_64; - s6 = x6 * cospi_13_64 + x7 * cospi_19_64; - s7 = x6 * cospi_19_64 - x7 * cospi_13_64; - s8 = x8 * cospi_17_64 + x9 * cospi_15_64; - s9 = x8 * cospi_15_64 - x9 * cospi_17_64; - s10 = x10 * cospi_21_64 + x11 * cospi_11_64; - s11 = x10 * cospi_11_64 - x11 * cospi_21_64; - s12 = x12 * cospi_25_64 + x13 * cospi_7_64; - s13 = x12 * cospi_7_64 - x13 * cospi_25_64; - s14 = x14 * cospi_29_64 + x15 * cospi_3_64; - s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - - x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); - x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); - x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); - x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); - x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); - x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); - x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); - x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); - x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); - x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); - x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); - x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); - x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); - x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); - x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); - x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); - - // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4; - s5 = x5; - s6 = x6; - s7 = x7; - s8 = x8 * cospi_4_64 + x9 * cospi_28_64; - s9 = x8 * cospi_28_64 - x9 * cospi_4_64; - s10 = x10 * cospi_20_64 + x11 * cospi_12_64; - s11 = x10 * cospi_12_64 - x11 * cospi_20_64; - s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; - s13 = x12 * cospi_4_64 + x13 * cospi_28_64; - s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; - s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - - x0 = WRAPLOW(s0 + s4); - x1 = WRAPLOW(s1 + s5); - x2 = WRAPLOW(s2 + s6); - x3 = WRAPLOW(s3 + s7); - x4 = WRAPLOW(s0 - s4); - x5 = WRAPLOW(s1 - s5); - x6 = WRAPLOW(s2 - s6); - x7 = WRAPLOW(s3 - s7); - x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); - x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); - x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); - x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); - x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); - x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); - x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); - x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); - - // stage 3 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4 * cospi_8_64 + x5 * cospi_24_64; - s5 = x4 * cospi_24_64 - x5 * cospi_8_64; - s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; - s7 = x6 * cospi_8_64 + x7 * cospi_24_64; - s8 = x8; - s9 = x9; - s10 = x10; - s11 = x11; - s12 = x12 * cospi_8_64 + x13 * cospi_24_64; - s13 = x12 * cospi_24_64 - x13 * cospi_8_64; - s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; - s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - - x0 = WRAPLOW(s0 + s2); - x1 = WRAPLOW(s1 + s3); - x2 = WRAPLOW(s0 - s2); - x3 = WRAPLOW(s1 - s3); - x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); - x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); - x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); - x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); - x8 = WRAPLOW(s8 + s10); - x9 = WRAPLOW(s9 + s11); - x10 = WRAPLOW(s8 - s10); - x11 = WRAPLOW(s9 - s11); - x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); - x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); - x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); - x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); - - // stage 4 - s2 = (- cospi_16_64) * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (- x6 + x7); - s10 = cospi_16_64 * (x10 + x11); - s11 = cospi_16_64 * (- x10 + x11); - s14 = (- cospi_16_64) * (x14 + x15); - s15 = cospi_16_64 * (x14 - x15); - - x2 = WRAPLOW(dct_const_round_shift(s2)); - x3 = WRAPLOW(dct_const_round_shift(s3)); - x6 = WRAPLOW(dct_const_round_shift(s6)); - x7 = WRAPLOW(dct_const_round_shift(s7)); - x10 = WRAPLOW(dct_const_round_shift(s10)); - x11 = WRAPLOW(dct_const_round_shift(s11)); - x14 = WRAPLOW(dct_const_round_shift(s14)); - x15 = WRAPLOW(dct_const_round_shift(s15)); - - output[0] = WRAPLOW(x0); - output[1] = WRAPLOW(-x8); - output[2] = WRAPLOW(x12); - output[3] = WRAPLOW(-x4); - output[4] = WRAPLOW(x6); - output[5] = WRAPLOW(x14); - output[6] = WRAPLOW(x10); - output[7] = WRAPLOW(x2); - output[8] = WRAPLOW(x3); - output[9] = WRAPLOW(x11); - output[10] = WRAPLOW(x15); - output[11] = WRAPLOW(x7); - output[12] = WRAPLOW(x5); - output[13] = WRAPLOW(-x13); - output[14] = WRAPLOW(x9); - output[15] = WRAPLOW(-x1); -} - -void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[16 * 16] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - - // First transform rows. Since all non-zero dct coefficients are in - // upper-left 4x4 area, we only need to calculate first 4 rows here. - for (i = 0; i < 4; ++i) { - idct16_c(input, outptr); - input += 16; - outptr += 16; - } - - // Then transform columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - idct16_c(temp_in, temp_out); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - int i, j; - tran_high_t a1; - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 6); - for (j = 0; j < 16; ++j) { - for (i = 0; i < 16; ++i) - dest[i] = clip_pixel_add(dest[i], a1); - dest += stride; - } -} - -void idct32_c(const tran_low_t *input, tran_low_t *output) { - tran_low_t step1[32], step2[32]; - tran_high_t temp1, temp2; - - // stage 1 - step1[0] = input[0]; - step1[1] = input[16]; - step1[2] = input[8]; - step1[3] = input[24]; - step1[4] = input[4]; - step1[5] = input[20]; - step1[6] = input[12]; - step1[7] = input[28]; - step1[8] = input[2]; - step1[9] = input[18]; - step1[10] = input[10]; - step1[11] = input[26]; - step1[12] = input[6]; - step1[13] = input[22]; - step1[14] = input[14]; - step1[15] = input[30]; - - temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; - temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = WRAPLOW(dct_const_round_shift(temp1)); - step1[31] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; - temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1)); - step1[30] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; - temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1)); - step1[29] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; - temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1)); - step1[28] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; - temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1)); - step1[27] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; - temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; - temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1)); - step1[25] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; - temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1)); - step1[24] = WRAPLOW(dct_const_round_shift(temp2)); - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = WRAPLOW(dct_const_round_shift(temp1)); - step2[15] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - - step2[16] = WRAPLOW(step1[16] + step1[17]); - step2[17] = WRAPLOW(step1[16] - step1[17]); - step2[18] = WRAPLOW(-step1[18] + step1[19]); - step2[19] = WRAPLOW(step1[18] + step1[19]); - step2[20] = WRAPLOW(step1[20] + step1[21]); - step2[21] = WRAPLOW(step1[20] - step1[21]); - step2[22] = WRAPLOW(-step1[22] + step1[23]); - step2[23] = WRAPLOW(step1[22] + step1[23]); - step2[24] = WRAPLOW(step1[24] + step1[25]); - step2[25] = WRAPLOW(step1[24] - step1[25]); - step2[26] = WRAPLOW(-step1[26] + step1[27]); - step2[27] = WRAPLOW(step1[26] + step1[27]); - step2[28] = WRAPLOW(step1[28] + step1[29]); - step2[29] = WRAPLOW(step1[28] - step1[29]); - step2[30] = WRAPLOW(-step1[30] + step1[31]); - step2[31] = WRAPLOW(step1[30] + step1[31]); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = WRAPLOW(dct_const_round_shift(temp1)); - step1[7] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - - step1[8] = WRAPLOW(step2[8] + step2[9]); - step1[9] = WRAPLOW(step2[8] - step2[9]); - step1[10] = WRAPLOW(-step2[10] + step2[11]); - step1[11] = WRAPLOW(step2[10] + step2[11]); - step1[12] = WRAPLOW(step2[12] + step2[13]); - step1[13] = WRAPLOW(step2[12] - step2[13]); - step1[14] = WRAPLOW(-step2[14] + step2[15]); - step1[15] = WRAPLOW(step2[14] + step2[15]); - - step1[16] = step2[16]; - step1[31] = step2[31]; - temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; - temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = WRAPLOW(dct_const_round_shift(temp1)); - step1[30] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; - temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1)); - step1[29] = WRAPLOW(dct_const_round_shift(temp2)); - step1[19] = step2[19]; - step1[20] = step2[20]; - temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; - temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; - temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1)); - step1[25] = WRAPLOW(dct_const_round_shift(temp2)); - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[27] = step2[27]; - step1[28] = step2[28]; - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = WRAPLOW(dct_const_round_shift(temp1)); - step2[1] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = WRAPLOW(dct_const_round_shift(temp1)); - step2[3] = WRAPLOW(dct_const_round_shift(temp2)); - step2[4] = WRAPLOW(step1[4] + step1[5]); - step2[5] = WRAPLOW(step1[4] - step1[5]); - step2[6] = WRAPLOW(-step1[6] + step1[7]); - step2[7] = WRAPLOW(step1[6] + step1[7]); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = WRAPLOW(dct_const_round_shift(temp1)); - step2[14] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - step2[11] = step1[11]; - step2[12] = step1[12]; - - step2[16] = WRAPLOW(step1[16] + step1[19]); - step2[17] = WRAPLOW(step1[17] + step1[18]); - step2[18] = WRAPLOW(step1[17] - step1[18]); - step2[19] = WRAPLOW(step1[16] - step1[19]); - step2[20] = WRAPLOW(-step1[20] + step1[23]); - step2[21] = WRAPLOW(-step1[21] + step1[22]); - step2[22] = WRAPLOW(step1[21] + step1[22]); - step2[23] = WRAPLOW(step1[20] + step1[23]); - - step2[24] = WRAPLOW(step1[24] + step1[27]); - step2[25] = WRAPLOW(step1[25] + step1[26]); - step2[26] = WRAPLOW(step1[25] - step1[26]); - step2[27] = WRAPLOW(step1[24] - step1[27]); - step2[28] = WRAPLOW(-step1[28] + step1[31]); - step2[29] = WRAPLOW(-step1[29] + step1[30]); - step2[30] = WRAPLOW(step1[29] + step1[30]); - step2[31] = WRAPLOW(step1[28] + step1[31]); - - // stage 5 - step1[0] = WRAPLOW(step2[0] + step2[3]); - step1[1] = WRAPLOW(step2[1] + step2[2]); - step1[2] = WRAPLOW(step2[1] - step2[2]); - step1[3] = WRAPLOW(step2[0] - step2[3]); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = WRAPLOW(dct_const_round_shift(temp1)); - step1[6] = WRAPLOW(dct_const_round_shift(temp2)); - step1[7] = step2[7]; - - step1[8] = WRAPLOW(step2[8] + step2[11]); - step1[9] = WRAPLOW(step2[9] + step2[10]); - step1[10] = WRAPLOW(step2[9] - step2[10]); - step1[11] = WRAPLOW(step2[8] - step2[11]); - step1[12] = WRAPLOW(-step2[12] + step2[15]); - step1[13] = WRAPLOW(-step2[13] + step2[14]); - step1[14] = WRAPLOW(step2[13] + step2[14]); - step1[15] = WRAPLOW(step2[12] + step2[15]); - - step1[16] = step2[16]; - step1[17] = step2[17]; - temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; - temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = WRAPLOW(dct_const_round_shift(temp1)); - step1[29] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; - temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = WRAPLOW(dct_const_round_shift(temp1)); - step1[28] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; - temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1)); - step1[27] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; - temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - step1[22] = step2[22]; - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[25] = step2[25]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // stage 6 - step2[0] = WRAPLOW(step1[0] + step1[7]); - step2[1] = WRAPLOW(step1[1] + step1[6]); - step2[2] = WRAPLOW(step1[2] + step1[5]); - step2[3] = WRAPLOW(step1[3] + step1[4]); - step2[4] = WRAPLOW(step1[3] - step1[4]); - step2[5] = WRAPLOW(step1[2] - step1[5]); - step2[6] = WRAPLOW(step1[1] - step1[6]); - step2[7] = WRAPLOW(step1[0] - step1[7]); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = WRAPLOW(dct_const_round_shift(temp1)); - step2[13] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = WRAPLOW(dct_const_round_shift(temp1)); - step2[12] = WRAPLOW(dct_const_round_shift(temp2)); - step2[14] = step1[14]; - step2[15] = step1[15]; - - step2[16] = WRAPLOW(step1[16] + step1[23]); - step2[17] = WRAPLOW(step1[17] + step1[22]); - step2[18] = WRAPLOW(step1[18] + step1[21]); - step2[19] = WRAPLOW(step1[19] + step1[20]); - step2[20] = WRAPLOW(step1[19] - step1[20]); - step2[21] = WRAPLOW(step1[18] - step1[21]); - step2[22] = WRAPLOW(step1[17] - step1[22]); - step2[23] = WRAPLOW(step1[16] - step1[23]); - - step2[24] = WRAPLOW(-step1[24] + step1[31]); - step2[25] = WRAPLOW(-step1[25] + step1[30]); - step2[26] = WRAPLOW(-step1[26] + step1[29]); - step2[27] = WRAPLOW(-step1[27] + step1[28]); - step2[28] = WRAPLOW(step1[27] + step1[28]); - step2[29] = WRAPLOW(step1[26] + step1[29]); - step2[30] = WRAPLOW(step1[25] + step1[30]); - step2[31] = WRAPLOW(step1[24] + step1[31]); - - // stage 7 - step1[0] = WRAPLOW(step2[0] + step2[15]); - step1[1] = WRAPLOW(step2[1] + step2[14]); - step1[2] = WRAPLOW(step2[2] + step2[13]); - step1[3] = WRAPLOW(step2[3] + step2[12]); - step1[4] = WRAPLOW(step2[4] + step2[11]); - step1[5] = WRAPLOW(step2[5] + step2[10]); - step1[6] = WRAPLOW(step2[6] + step2[9]); - step1[7] = WRAPLOW(step2[7] + step2[8]); - step1[8] = WRAPLOW(step2[7] - step2[8]); - step1[9] = WRAPLOW(step2[6] - step2[9]); - step1[10] = WRAPLOW(step2[5] - step2[10]); - step1[11] = WRAPLOW(step2[4] - step2[11]); - step1[12] = WRAPLOW(step2[3] - step2[12]); - step1[13] = WRAPLOW(step2[2] - step2[13]); - step1[14] = WRAPLOW(step2[1] - step2[14]); - step1[15] = WRAPLOW(step2[0] - step2[15]); - - step1[16] = step2[16]; - step1[17] = step2[17]; - step1[18] = step2[18]; - step1[19] = step2[19]; - temp1 = (-step2[20] + step2[27]) * cospi_16_64; - temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = WRAPLOW(dct_const_round_shift(temp1)); - step1[27] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step2[21] + step2[26]) * cospi_16_64; - temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = WRAPLOW(dct_const_round_shift(temp1)); - step1[26] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step2[22] + step2[25]) * cospi_16_64; - temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = WRAPLOW(dct_const_round_shift(temp1)); - step1[25] = WRAPLOW(dct_const_round_shift(temp2)); - temp1 = (-step2[23] + step2[24]) * cospi_16_64; - temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = WRAPLOW(dct_const_round_shift(temp1)); - step1[24] = WRAPLOW(dct_const_round_shift(temp2)); - step1[28] = step2[28]; - step1[29] = step2[29]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // final stage - output[0] = WRAPLOW(step1[0] + step1[31]); - output[1] = WRAPLOW(step1[1] + step1[30]); - output[2] = WRAPLOW(step1[2] + step1[29]); - output[3] = WRAPLOW(step1[3] + step1[28]); - output[4] = WRAPLOW(step1[4] + step1[27]); - output[5] = WRAPLOW(step1[5] + step1[26]); - output[6] = WRAPLOW(step1[6] + step1[25]); - output[7] = WRAPLOW(step1[7] + step1[24]); - output[8] = WRAPLOW(step1[8] + step1[23]); - output[9] = WRAPLOW(step1[9] + step1[22]); - output[10] = WRAPLOW(step1[10] + step1[21]); - output[11] = WRAPLOW(step1[11] + step1[20]); - output[12] = WRAPLOW(step1[12] + step1[19]); - output[13] = WRAPLOW(step1[13] + step1[18]); - output[14] = WRAPLOW(step1[14] + step1[17]); - output[15] = WRAPLOW(step1[15] + step1[16]); - output[16] = WRAPLOW(step1[15] - step1[16]); - output[17] = WRAPLOW(step1[14] - step1[17]); - output[18] = WRAPLOW(step1[13] - step1[18]); - output[19] = WRAPLOW(step1[12] - step1[19]); - output[20] = WRAPLOW(step1[11] - step1[20]); - output[21] = WRAPLOW(step1[10] - step1[21]); - output[22] = WRAPLOW(step1[9] - step1[22]); - output[23] = WRAPLOW(step1[8] - step1[23]); - output[24] = WRAPLOW(step1[7] - step1[24]); - output[25] = WRAPLOW(step1[6] - step1[25]); - output[26] = WRAPLOW(step1[5] - step1[26]); - output[27] = WRAPLOW(step1[4] - step1[27]); - output[28] = WRAPLOW(step1[3] - step1[28]); - output[29] = WRAPLOW(step1[2] - step1[29]); - output[30] = WRAPLOW(step1[1] - step1[30]); - output[31] = WRAPLOW(step1[0] - step1[31]); -} - -void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - - // Rows - for (i = 0; i < 32; ++i) { - int16_t zero_coeff[16]; - for (j = 0; j < 16; ++j) - zero_coeff[j] = input[2 * j] | input[2 * j + 1]; - for (j = 0; j < 8; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 4; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 2; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - - if (zero_coeff[0] | zero_coeff[1]) - idct32_c(input, outptr); - else - memset(outptr, 0, sizeof(tran_low_t) * 32); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_c(temp_in, temp_out); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32] = {0}; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - - // Rows - // only upper-left 16x16 has non-zero coeff - for (i = 0; i < 16; ++i) { - idct32_c(input, outptr); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_c(temp_in, temp_out); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, - int stride) { - tran_low_t out[32 * 32] = {0}; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - - // Rows - // only upper-left 8x8 has non-zero coeff - for (i = 0; i < 8; ++i) { - idct32_c(input, outptr); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - idct32_c(temp_in, temp_out); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], - ROUND_POWER_OF_TWO(temp_out[j], 6)); - } - } -} - -void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { - int i, j; - tran_high_t a1; - - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); - a1 = ROUND_POWER_OF_TWO(out, 6); - - for (j = 0; j < 32; ++j) { - for (i = 0; i < 32; ++i) - dest[i] = clip_pixel_add(dest[i], a1); - dest += stride; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, - 0.5 shifts per pixel. */ - int i; - tran_low_t output[16]; - tran_high_t a1, b1, c1, d1, e1; - const tran_low_t *ip = input; - tran_low_t *op = output; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - for (i = 0; i < 4; i++) { - a1 = ip[0] >> UNIT_QUANT_SHIFT; - c1 = ip[1] >> UNIT_QUANT_SHIFT; - d1 = ip[2] >> UNIT_QUANT_SHIFT; - b1 = ip[3] >> UNIT_QUANT_SHIFT; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - op[0] = HIGHBD_WRAPLOW(a1, bd); - op[1] = HIGHBD_WRAPLOW(b1, bd); - op[2] = HIGHBD_WRAPLOW(c1, bd); - op[3] = HIGHBD_WRAPLOW(d1, bd); - ip += 4; - op += 4; - } - - ip = output; - for (i = 0; i < 4; i++) { - a1 = ip[4 * 0]; - c1 = ip[4 * 1]; - d1 = ip[4 * 2]; - b1 = ip[4 * 3]; - a1 += c1; - d1 -= b1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= b1; - d1 += c1; - dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], - HIGHBD_WRAPLOW(a1, bd), bd); - dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], - HIGHBD_WRAPLOW(b1, bd), bd); - dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], - HIGHBD_WRAPLOW(c1, bd), bd); - dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], - HIGHBD_WRAPLOW(d1, bd), bd); - - ip++; - dest++; - } -} - -void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, - int dest_stride, int bd) { - int i; - tran_high_t a1, e1; - tran_low_t tmp[4]; - const tran_low_t *ip = in; - tran_low_t *op = tmp; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - (void) bd; - - a1 = ip[0] >> UNIT_QUANT_SHIFT; - e1 = a1 >> 1; - a1 -= e1; - op[0] = HIGHBD_WRAPLOW(a1, bd); - op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); - - ip = tmp; - for (i = 0; i < 4; i++) { - e1 = ip[0] >> 1; - a1 = ip[0] - e1; - dest[dest_stride * 0] = highbd_clip_pixel_add( - dest[dest_stride * 0], a1, bd); - dest[dest_stride * 1] = highbd_clip_pixel_add( - dest[dest_stride * 1], e1, bd); - dest[dest_stride * 2] = highbd_clip_pixel_add( - dest[dest_stride * 2], e1, bd); - dest[dest_stride * 3] = highbd_clip_pixel_add( - dest[dest_stride * 3], e1, bd); - ip++; - dest++; - } -} - -void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step[4]; - tran_high_t temp1, temp2; - (void) bd; - // stage 1 - temp1 = (input[0] + input[2]) * cospi_16_64; - temp2 = (input[0] - input[2]) * cospi_16_64; - step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; - temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; - step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 2 - output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); - output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); - output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); - output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); -} - -void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[4], temp_out[4]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - for (i = 0; i < 4; ++i) { - vpx_highbd_idct4_c(input, outptr, bd); - input += 4; - outptr += 4; - } - - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - vpx_highbd_idct4_c(temp_in, temp_out, bd); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); - } - } -} - -void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, - int dest_stride, int bd) { - int i; - tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 4); - - for (i = 0; i < 4; i++) { - dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); - dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); - dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); - dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); - dest += dest_stride; - } -} - -void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step1[8], step2[8]; - tran_high_t temp1, temp2; - // stage 1 - step1[0] = input[0]; - step1[2] = input[4]; - step1[1] = input[2]; - step1[3] = input[6]; - temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; - temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; - temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 2 & stage 3 - even half - vpx_highbd_idct4_c(step1, step1, bd); - - // stage 2 - odd half - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - // stage 3 - odd half - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - // stage 4 - output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); -} - -void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. - for (i = 0; i < 8; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - - // Then transform columns. - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } -} - -void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - int i, j; - tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 5); - for (j = 0; j < 8; ++j) { - for (i = 0; i < 8; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); - dest += stride; - } -} - -void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - - tran_low_t x0 = input[0]; - tran_low_t x1 = input[1]; - tran_low_t x2 = input[2]; - tran_low_t x3 = input[3]; - (void) bd; - - if (!(x0 | x1 | x2 | x3)) { - memset(output, 0, 4 * sizeof(*output)); - return; - } - - s0 = sinpi_1_9 * x0; - s1 = sinpi_2_9 * x0; - s2 = sinpi_3_9 * x1; - s3 = sinpi_4_9 * x2; - s4 = sinpi_1_9 * x2; - s5 = sinpi_2_9 * x3; - s6 = sinpi_4_9 * x3; - s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd); - - s0 = s0 + s3 + s5; - s1 = s1 - s4 - s6; - s3 = s2; - s2 = sinpi_3_9 * s7; - - // 1-D transform scaling factor is sqrt(2). - // The overall dynamic range is 14b (input) + 14b (multiplication scaling) - // + 1b (addition) = 29b. - // Hence the output bit depth is 15b. - output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd); - output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd); - output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); - output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd); -} - -void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - - tran_low_t x0 = input[7]; - tran_low_t x1 = input[0]; - tran_low_t x2 = input[5]; - tran_low_t x3 = input[2]; - tran_low_t x4 = input[3]; - tran_low_t x5 = input[4]; - tran_low_t x6 = input[1]; - tran_low_t x7 = input[6]; - (void) bd; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { - memset(output, 0, 8 * sizeof(*output)); - return; - } - - // stage 1 - s0 = cospi_2_64 * x0 + cospi_30_64 * x1; - s1 = cospi_30_64 * x0 - cospi_2_64 * x1; - s2 = cospi_10_64 * x2 + cospi_22_64 * x3; - s3 = cospi_22_64 * x2 - cospi_10_64 * x3; - s4 = cospi_18_64 * x4 + cospi_14_64 * x5; - s5 = cospi_14_64 * x4 - cospi_18_64 * x5; - s6 = cospi_26_64 * x6 + cospi_6_64 * x7; - s7 = cospi_6_64 * x6 - cospi_26_64 * x7; - - x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd); - x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd); - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd); - - // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = cospi_8_64 * x4 + cospi_24_64 * x5; - s5 = cospi_24_64 * x4 - cospi_8_64 * x5; - s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; - s7 = cospi_8_64 * x6 + cospi_24_64 * x7; - - x0 = HIGHBD_WRAPLOW(s0 + s2, bd); - x1 = HIGHBD_WRAPLOW(s1 + s3, bd); - x2 = HIGHBD_WRAPLOW(s0 - s2, bd); - x3 = HIGHBD_WRAPLOW(s1 - s3, bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); - - // stage 3 - s2 = cospi_16_64 * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (x6 - x7); - - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); - - output[0] = HIGHBD_WRAPLOW(x0, bd); - output[1] = HIGHBD_WRAPLOW(-x4, bd); - output[2] = HIGHBD_WRAPLOW(x6, bd); - output[3] = HIGHBD_WRAPLOW(-x2, bd); - output[4] = HIGHBD_WRAPLOW(x3, bd); - output[5] = HIGHBD_WRAPLOW(-x7, bd); - output[6] = HIGHBD_WRAPLOW(x5, bd); - output[7] = HIGHBD_WRAPLOW(-x1, bd); -} - -void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[8], temp_out[8]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. - // Only first 4 row has non-zero coefs. - for (i = 0; i < 4; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - // Then transform columns. - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } -} - -void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_low_t step1[16], step2[16]; - tran_high_t temp1, temp2; - (void) bd; - - // stage 1 - step1[0] = input[0/2]; - step1[1] = input[16/2]; - step1[2] = input[8/2]; - step1[3] = input[24/2]; - step1[4] = input[4/2]; - step1[5] = input[20/2]; - step1[6] = input[12/2]; - step1[7] = input[28/2]; - step1[8] = input[2/2]; - step1[9] = input[18/2]; - step1[10] = input[10/2]; - step1[11] = input[26/2]; - step1[12] = input[6/2]; - step1[13] = input[22/2]; - step1[14] = input[14/2]; - step1[15] = input[30/2]; - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); - step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); - step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[11] = step1[11]; - step2[12] = step1[12]; - - // stage 5 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); - step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); - step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); - step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); - step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); - - // stage 6 - step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[14] = step1[14]; - step2[15] = step1[15]; - - // stage 7 - output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); - output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); - output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); - output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); - output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); - output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); - output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); - output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); - output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); - output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); - output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); - output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); - output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); - output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); - output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); - output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); -} - -void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. - for (i = 0; i < 16; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - - // Then transform columns. - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { - tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; - tran_high_t s9, s10, s11, s12, s13, s14, s15; - - tran_low_t x0 = input[15]; - tran_low_t x1 = input[0]; - tran_low_t x2 = input[13]; - tran_low_t x3 = input[2]; - tran_low_t x4 = input[11]; - tran_low_t x5 = input[4]; - tran_low_t x6 = input[9]; - tran_low_t x7 = input[6]; - tran_low_t x8 = input[7]; - tran_low_t x9 = input[8]; - tran_low_t x10 = input[5]; - tran_low_t x11 = input[10]; - tran_low_t x12 = input[3]; - tran_low_t x13 = input[12]; - tran_low_t x14 = input[1]; - tran_low_t x15 = input[14]; - (void) bd; - - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 - | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { - memset(output, 0, 16 * sizeof(*output)); - return; - } - - // stage 1 - s0 = x0 * cospi_1_64 + x1 * cospi_31_64; - s1 = x0 * cospi_31_64 - x1 * cospi_1_64; - s2 = x2 * cospi_5_64 + x3 * cospi_27_64; - s3 = x2 * cospi_27_64 - x3 * cospi_5_64; - s4 = x4 * cospi_9_64 + x5 * cospi_23_64; - s5 = x4 * cospi_23_64 - x5 * cospi_9_64; - s6 = x6 * cospi_13_64 + x7 * cospi_19_64; - s7 = x6 * cospi_19_64 - x7 * cospi_13_64; - s8 = x8 * cospi_17_64 + x9 * cospi_15_64; - s9 = x8 * cospi_15_64 - x9 * cospi_17_64; - s10 = x10 * cospi_21_64 + x11 * cospi_11_64; - s11 = x10 * cospi_11_64 - x11 * cospi_21_64; - s12 = x12 * cospi_25_64 + x13 * cospi_7_64; - s13 = x12 * cospi_7_64 - x13 * cospi_25_64; - s14 = x14 * cospi_29_64 + x15 * cospi_3_64; - s15 = x14 * cospi_3_64 - x15 * cospi_29_64; - - x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd); - x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd); - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd); - x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd); - x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd); - x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd); - x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd); - x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd); - x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd); - - // stage 2 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4; - s5 = x5; - s6 = x6; - s7 = x7; - s8 = x8 * cospi_4_64 + x9 * cospi_28_64; - s9 = x8 * cospi_28_64 - x9 * cospi_4_64; - s10 = x10 * cospi_20_64 + x11 * cospi_12_64; - s11 = x10 * cospi_12_64 - x11 * cospi_20_64; - s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; - s13 = x12 * cospi_4_64 + x13 * cospi_28_64; - s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; - s15 = x14 * cospi_20_64 + x15 * cospi_12_64; - - x0 = HIGHBD_WRAPLOW(s0 + s4, bd); - x1 = HIGHBD_WRAPLOW(s1 + s5, bd); - x2 = HIGHBD_WRAPLOW(s2 + s6, bd); - x3 = HIGHBD_WRAPLOW(s3 + s7, bd); - x4 = HIGHBD_WRAPLOW(s0 - s4, bd); - x5 = HIGHBD_WRAPLOW(s1 - s5, bd); - x6 = HIGHBD_WRAPLOW(s2 - s6, bd); - x7 = HIGHBD_WRAPLOW(s3 - s7, bd); - x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd); - x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd); - x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd); - x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd); - x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd); - x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd); - - // stage 3 - s0 = x0; - s1 = x1; - s2 = x2; - s3 = x3; - s4 = x4 * cospi_8_64 + x5 * cospi_24_64; - s5 = x4 * cospi_24_64 - x5 * cospi_8_64; - s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; - s7 = x6 * cospi_8_64 + x7 * cospi_24_64; - s8 = x8; - s9 = x9; - s10 = x10; - s11 = x11; - s12 = x12 * cospi_8_64 + x13 * cospi_24_64; - s13 = x12 * cospi_24_64 - x13 * cospi_8_64; - s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; - s15 = x14 * cospi_8_64 + x15 * cospi_24_64; - - x0 = HIGHBD_WRAPLOW(s0 + s2, bd); - x1 = HIGHBD_WRAPLOW(s1 + s3, bd); - x2 = HIGHBD_WRAPLOW(s0 - s2, bd); - x3 = HIGHBD_WRAPLOW(s1 - s3, bd); - x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd); - x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd); - x8 = HIGHBD_WRAPLOW(s8 + s10, bd); - x9 = HIGHBD_WRAPLOW(s9 + s11, bd); - x10 = HIGHBD_WRAPLOW(s8 - s10, bd); - x11 = HIGHBD_WRAPLOW(s9 - s11, bd); - x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd); - x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd); - - // stage 4 - s2 = (- cospi_16_64) * (x2 + x3); - s3 = cospi_16_64 * (x2 - x3); - s6 = cospi_16_64 * (x6 + x7); - s7 = cospi_16_64 * (-x6 + x7); - s10 = cospi_16_64 * (x10 + x11); - s11 = cospi_16_64 * (-x10 + x11); - s14 = (- cospi_16_64) * (x14 + x15); - s15 = cospi_16_64 * (x14 - x15); - - x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd); - x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd); - x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd); - x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd); - x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd); - x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd); - x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd); - x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd); - - output[0] = HIGHBD_WRAPLOW(x0, bd); - output[1] = HIGHBD_WRAPLOW(-x8, bd); - output[2] = HIGHBD_WRAPLOW(x12, bd); - output[3] = HIGHBD_WRAPLOW(-x4, bd); - output[4] = HIGHBD_WRAPLOW(x6, bd); - output[5] = HIGHBD_WRAPLOW(x14, bd); - output[6] = HIGHBD_WRAPLOW(x10, bd); - output[7] = HIGHBD_WRAPLOW(x2, bd); - output[8] = HIGHBD_WRAPLOW(x3, bd); - output[9] = HIGHBD_WRAPLOW(x11, bd); - output[10] = HIGHBD_WRAPLOW(x15, bd); - output[11] = HIGHBD_WRAPLOW(x7, bd); - output[12] = HIGHBD_WRAPLOW(x5, bd); - output[13] = HIGHBD_WRAPLOW(-x13, bd); - output[14] = HIGHBD_WRAPLOW(x9, bd); - output[15] = HIGHBD_WRAPLOW(-x1, bd); -} - -void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16] = { 0 }; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[16], temp_out[16]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // First transform rows. Since all non-zero dct coefficients are in - // upper-left 4x4 area, we only need to calculate first 4 rows here. - for (i = 0; i < 4; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - - // Then transform columns. - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j*16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - int i, j; - tran_high_t a1; - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 6); - for (j = 0; j < 16; ++j) { - for (i = 0; i < 16; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); - dest += stride; - } -} - -static void highbd_idct32_c(const tran_low_t *input, - tran_low_t *output, int bd) { - tran_low_t step1[32], step2[32]; - tran_high_t temp1, temp2; - (void) bd; - - // stage 1 - step1[0] = input[0]; - step1[1] = input[16]; - step1[2] = input[8]; - step1[3] = input[24]; - step1[4] = input[4]; - step1[5] = input[20]; - step1[6] = input[12]; - step1[7] = input[28]; - step1[8] = input[2]; - step1[9] = input[18]; - step1[10] = input[10]; - step1[11] = input[26]; - step1[12] = input[6]; - step1[13] = input[22]; - step1[14] = input[14]; - step1[15] = input[30]; - - temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; - temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; - step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; - temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; - step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; - temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; - step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; - temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; - step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; - temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; - step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; - temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; - temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; - step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; - temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; - step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - // stage 2 - step2[0] = step1[0]; - step2[1] = step1[1]; - step2[2] = step1[2]; - step2[3] = step1[3]; - step2[4] = step1[4]; - step2[5] = step1[5]; - step2[6] = step1[6]; - step2[7] = step1[7]; - - temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; - temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; - step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; - temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; - temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; - temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); - step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); - step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); - step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); - step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); - step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); - step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); - step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); - step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); - step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); - step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); - step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); - step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); - step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); - step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); - step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); - - // stage 3 - step1[0] = step2[0]; - step1[1] = step2[1]; - step1[2] = step2[2]; - step1[3] = step2[3]; - - temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; - temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; - step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; - temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); - step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); - step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); - step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); - step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); - - step1[16] = step2[16]; - step1[31] = step2[31]; - temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; - temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; - step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; - temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; - step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[19] = step2[19]; - step1[20] = step2[20]; - temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; - temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; - temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; - step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[27] = step2[27]; - step1[28] = step2[28]; - - // stage 4 - temp1 = (step1[0] + step1[1]) * cospi_16_64; - temp2 = (step1[0] - step1[1]) * cospi_16_64; - step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; - temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; - step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); - step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); - step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); - - step2[8] = step1[8]; - step2[15] = step1[15]; - temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; - temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; - step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; - temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[11] = step1[11]; - step2[12] = step1[12]; - - step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); - step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); - step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); - step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); - step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); - step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); - step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); - step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); - - step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); - step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); - step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); - step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); - step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); - step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); - step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); - step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); - - // stage 5 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); - step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); - step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); - step1[4] = step2[4]; - temp1 = (step2[6] - step2[5]) * cospi_16_64; - temp2 = (step2[5] + step2[6]) * cospi_16_64; - step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[7] = step2[7]; - - step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); - step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); - step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); - step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); - step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); - - step1[16] = step2[16]; - step1[17] = step2[17]; - temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; - temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; - step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; - temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; - step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; - temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; - step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; - temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[22] = step2[22]; - step1[23] = step2[23]; - step1[24] = step2[24]; - step1[25] = step2[25]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // stage 6 - step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); - step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); - step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); - step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); - step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); - step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); - step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); - step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); - step2[8] = step1[8]; - step2[9] = step1[9]; - temp1 = (-step1[10] + step1[13]) * cospi_16_64; - temp2 = (step1[10] + step1[13]) * cospi_16_64; - step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step1[11] + step1[12]) * cospi_16_64; - temp2 = (step1[11] + step1[12]) * cospi_16_64; - step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step2[14] = step1[14]; - step2[15] = step1[15]; - - step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); - step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); - step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); - step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); - step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); - step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); - step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); - step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); - - step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); - step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); - step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); - step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); - step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); - step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); - step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); - step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); - - // stage 7 - step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); - step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); - step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); - step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); - step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); - step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); - step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); - step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); - step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); - step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); - step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); - step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); - step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); - step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); - step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); - step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); - - step1[16] = step2[16]; - step1[17] = step2[17]; - step1[18] = step2[18]; - step1[19] = step2[19]; - temp1 = (-step2[20] + step2[27]) * cospi_16_64; - temp2 = (step2[20] + step2[27]) * cospi_16_64; - step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step2[21] + step2[26]) * cospi_16_64; - temp2 = (step2[21] + step2[26]) * cospi_16_64; - step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step2[22] + step2[25]) * cospi_16_64; - temp2 = (step2[22] + step2[25]) * cospi_16_64; - step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - temp1 = (-step2[23] + step2[24]) * cospi_16_64; - temp2 = (step2[23] + step2[24]) * cospi_16_64; - step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd); - step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd); - step1[28] = step2[28]; - step1[29] = step2[29]; - step1[30] = step2[30]; - step1[31] = step2[31]; - - // final stage - output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); - output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); - output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); - output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); - output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); - output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); - output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); - output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); - output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); - output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); - output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); - output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); - output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); - output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); - output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); - output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); - output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); - output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); - output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); - output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); - output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); - output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); - output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); - output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); - output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); - output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); - output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); - output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); - output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); - output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); - output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); - output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); -} - -void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[32 * 32]; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - for (i = 0; i < 32; ++i) { - tran_low_t zero_coeff[16]; - for (j = 0; j < 16; ++j) - zero_coeff[j] = input[2 * j] | input[2 * j + 1]; - for (j = 0; j < 8; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 4; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - for (j = 0; j < 2; ++j) - zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; - - if (zero_coeff[0] | zero_coeff[1]) - highbd_idct32_c(input, outptr, bd); - else - memset(outptr, 0, sizeof(tran_low_t) * 32); - input += 32; - outptr += 32; - } - - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - highbd_idct32_c(temp_in, temp_out, bd); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[32 * 32] = {0}; - tran_low_t *outptr = out; - int i, j; - tran_low_t temp_in[32], temp_out[32]; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - // Rows - // Only upper-left 8x8 has non-zero coeff. - for (i = 0; i < 8; ++i) { - highbd_idct32_c(input, outptr, bd); - input += 32; - outptr += 32; - } - // Columns - for (i = 0; i < 32; ++i) { - for (j = 0; j < 32; ++j) - temp_in[j] = out[j * 32 + i]; - highbd_idct32_c(temp_in, temp_out, bd); - for (j = 0; j < 32; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } -} - -void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - int i, j; - int a1; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - - tran_low_t out = HIGHBD_WRAPLOW( - highbd_dct_const_round_shift(input[0] * cospi_16_64), bd); - out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd); - a1 = ROUND_POWER_OF_TWO(out, 6); - - for (j = 0; j < 32; ++j) { - for (i = 0; i < 32; ++i) - dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); - dest += stride; - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/inv_txfm.h b/thirdparty/libvpx/vpx_dsp/inv_txfm.h deleted file mode 100644 index 9cfe1be3a7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/inv_txfm.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_INV_TXFM_H_ -#define VPX_DSP_INV_TXFM_H_ - -#include <assert.h> - -#include "./vpx_config.h" -#include "vpx_dsp/txfm_common.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static INLINE tran_high_t check_range(tran_high_t input) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - // For valid VP9 input streams, intermediate stage coefficients should always - // stay within the range of a signed 16 bit integer. Coefficients can go out - // of this range for invalid/corrupt VP9 streams. However, strictly checking - // this range for every intermediate coefficient can burdensome for a decoder, - // therefore the following assertion is only enabled when configured with - // --enable-coefficient-range-checking. - assert(INT16_MIN <= input); - assert(input <= INT16_MAX); -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING - return input; -} - -static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { - tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return (tran_high_t)rv; -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE tran_high_t highbd_check_range(tran_high_t input, - int bd) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - // For valid highbitdepth VP9 streams, intermediate stage coefficients will - // stay within the ranges: - // - 8 bit: signed 16 bit integer - // - 10 bit: signed 18 bit integer - // - 12 bit: signed 20 bit integer - const int32_t int_max = (1 << (7 + bd)) - 1; - const int32_t int_min = -int_max - 1; - assert(int_min <= input); - assert(input <= int_max); - (void) int_min; -#endif // CONFIG_COEFFICIENT_RANGE_CHECKING - (void) bd; - return input; -} - -static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) { - tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); - return (tran_high_t)rv; -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#if CONFIG_EMULATE_HARDWARE -// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a -// non-normative method to handle overflows. A stream that causes -// overflows in the inverse transform is considered invalid in VP9, -// and a hardware implementer is free to choose any reasonable -// method to handle overflows. However to aid in hardware -// verification they can use a specific implementation of the -// WRAPLOW() macro below that is identical to their intended -// hardware implementation (and also use configure options to trigger -// the C-implementation of the transform). -// -// The particular WRAPLOW implementation below performs strict -// overflow wrapping to match common hardware implementations. -// bd of 8 uses trans_low with 16bits, need to remove 16bits -// bd of 10 uses trans_low with 18bits, need to remove 14bits -// bd of 12 uses trans_low with 20bits, need to remove 12bits -// bd of x uses trans_low with 8+x bits, need to remove 24-x bits - -#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) -#if CONFIG_VP9_HIGHBITDEPTH -#define HIGHBD_WRAPLOW(x, bd) \ - ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) -#endif // CONFIG_VP9_HIGHBITDEPTH - -#else // CONFIG_EMULATE_HARDWARE - -#define WRAPLOW(x) ((int32_t)check_range(x)) -#if CONFIG_VP9_HIGHBITDEPTH -#define HIGHBD_WRAPLOW(x, bd) \ - ((int32_t)highbd_check_range((x), bd)) -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_EMULATE_HARDWARE - -void idct4_c(const tran_low_t *input, tran_low_t *output); -void idct8_c(const tran_low_t *input, tran_low_t *output); -void idct16_c(const tran_low_t *input, tran_low_t *output); -void idct32_c(const tran_low_t *input, tran_low_t *output); -void iadst4_c(const tran_low_t *input, tran_low_t *output); -void iadst8_c(const tran_low_t *input, tran_low_t *output); -void iadst16_c(const tran_low_t *input, tran_low_t *output); - -#if CONFIG_VP9_HIGHBITDEPTH -void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd); - -void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd); -void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); - -static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, - int bd) { - trans = HIGHBD_WRAPLOW(trans, bd); - return clip_pixel_highbd(dest + (int)trans, bd); -} -#endif - -static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { - trans = WRAPLOW(trans); - return clip_pixel(dest + (int)trans); -} -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_INV_TXFM_H_ diff --git a/thirdparty/libvpx/vpx_dsp/loopfilter.c b/thirdparty/libvpx/vpx_dsp/loopfilter.c deleted file mode 100644 index 645a1ab95e..0000000000 --- a/thirdparty/libvpx/vpx_dsp/loopfilter.c +++ /dev/null @@ -1,767 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_ports/mem.h" - -static INLINE int8_t signed_char_clamp(int t) { - return (int8_t)clamp(t, -128, 127); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE int16_t signed_char_clamp_high(int t, int bd) { - switch (bd) { - case 10: - return (int16_t)clamp(t, -128*4, 128*4-1); - case 12: - return (int16_t)clamp(t, -128*16, 128*16-1); - case 8: - default: - return (int16_t)clamp(t, -128, 128-1); - } -} -#endif - -// should we apply any filter at all: 11111111 yes, 00000000 no -static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, - uint8_t p3, uint8_t p2, - uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1, - uint8_t q2, uint8_t q3) { - int8_t mask = 0; - mask |= (abs(p3 - p2) > limit) * -1; - mask |= (abs(p2 - p1) > limit) * -1; - mask |= (abs(p1 - p0) > limit) * -1; - mask |= (abs(q1 - q0) > limit) * -1; - mask |= (abs(q2 - q1) > limit) * -1; - mask |= (abs(q3 - q2) > limit) * -1; - mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - return ~mask; -} - -static INLINE int8_t flat_mask4(uint8_t thresh, - uint8_t p3, uint8_t p2, - uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1, - uint8_t q2, uint8_t q3) { - int8_t mask = 0; - mask |= (abs(p1 - p0) > thresh) * -1; - mask |= (abs(q1 - q0) > thresh) * -1; - mask |= (abs(p2 - p0) > thresh) * -1; - mask |= (abs(q2 - q0) > thresh) * -1; - mask |= (abs(p3 - p0) > thresh) * -1; - mask |= (abs(q3 - q0) > thresh) * -1; - return ~mask; -} - -static INLINE int8_t flat_mask5(uint8_t thresh, - uint8_t p4, uint8_t p3, - uint8_t p2, uint8_t p1, - uint8_t p0, uint8_t q0, - uint8_t q1, uint8_t q2, - uint8_t q3, uint8_t q4) { - int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); - mask |= (abs(p4 - p0) > thresh) * -1; - mask |= (abs(q4 - q0) > thresh) * -1; - return ~mask; -} - -// is there high edge variance internal edge: 11111111 yes, 00000000 no -static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0, - uint8_t q0, uint8_t q1) { - int8_t hev = 0; - hev |= (abs(p1 - p0) > thresh) * -1; - hev |= (abs(q1 - q0) > thresh) * -1; - return hev; -} - -static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, - uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { - int8_t filter1, filter2; - - const int8_t ps1 = (int8_t) *op1 ^ 0x80; - const int8_t ps0 = (int8_t) *op0 ^ 0x80; - const int8_t qs0 = (int8_t) *oq0 ^ 0x80; - const int8_t qs1 = (int8_t) *oq1 ^ 0x80; - const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); - - // add outer taps if we have high edge variance - int8_t filter = signed_char_clamp(ps1 - qs1) & hev; - - // inner taps - filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; - - // save bottom 3 bits so that we round one side +4 and the other +3 - // if it equals 4 we'll set to adjust by -1 to account for the fact - // we'd round 3 the other way - filter1 = signed_char_clamp(filter + 4) >> 3; - filter2 = signed_char_clamp(filter + 3) >> 3; - - *oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80; - *op0 = signed_char_clamp(ps0 + filter2) ^ 0x80; - - // outer tap adjustments - filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; - - *oq1 = signed_char_clamp(qs1 - filter) ^ 0x80; - *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; -} - -void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p); - ++s; - } -} - -void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - filter4(mask, *thresh, s - 2, s - 1, s, s + 1); - s += pitch; - } -} - -void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); - vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); -} - -static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, - uint8_t *op3, uint8_t *op2, - uint8_t *op1, uint8_t *op0, - uint8_t *oq0, uint8_t *oq1, - uint8_t *oq2, uint8_t *oq3) { - if (flat && mask) { - const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; - const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; - - // 7-tap filter [1, 1, 1, 2, 1, 1, 1] - *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); - *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); - *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); - *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); - *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); - *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); - } else { - filter4(mask, thresh, op1, op0, oq0, oq1); - } -} - -void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - filter8(mask, *thresh, flat, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p); - ++s; - } -} - -void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0); - vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1); -} - -void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - int i; - - for (i = 0; i < 8; ++i) { - const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3); - s += pitch; - } -} - -void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, - const uint8_t *limit0, const uint8_t *thresh0, - const uint8_t *blimit1, const uint8_t *limit1, - const uint8_t *thresh1) { - vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); - vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); -} - -static INLINE void filter16(int8_t mask, uint8_t thresh, - uint8_t flat, uint8_t flat2, - uint8_t *op7, uint8_t *op6, - uint8_t *op5, uint8_t *op4, - uint8_t *op3, uint8_t *op2, - uint8_t *op1, uint8_t *op0, - uint8_t *oq0, uint8_t *oq1, - uint8_t *oq2, uint8_t *oq3, - uint8_t *oq4, uint8_t *oq5, - uint8_t *oq6, uint8_t *oq7) { - if (flat2 && flat && mask) { - const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4, - p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; - - const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, - q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; - - // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] - *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + - q0, 4); - *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + - q0 + q1, 4); - *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + - q0 + q1 + q2, 4); - *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + - q0 + q1 + q2 + q3, 4); - *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4, 4); - *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + - q0 + q1 + q2 + q3 + q4 + q5, 4); - *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + - q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); - *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + - q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); - *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + - q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); - *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + - q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); - *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + - q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); - *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); - *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + - q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); - *oq6 = ROUND_POWER_OF_TWO(p0 + - q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); - } else { - filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3); - } -} - -static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int count) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { - const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat2 = flat_mask5(1, - s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, - q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p]); - - filter16(mask, *thresh, flat, flat2, - s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, - s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p, - s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p); - ++s; - } -} - -void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); -} - -void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); -} - -static void mb_lpf_vertical_edge_w(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { - int i; - - for (i = 0; i < count; ++i) { - const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); - const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, - q0, s[4], s[5], s[6], s[7]); - - filter16(mask, *thresh, flat, flat2, - s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); - s += p; - } -} - -void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); -} - -void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh) { - mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); -} - -#if CONFIG_VP9_HIGHBITDEPTH -// Should we apply any filter at all: 11111111 yes, 00000000 no ? -static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit, - uint16_t p3, uint16_t p2, - uint16_t p1, uint16_t p0, - uint16_t q0, uint16_t q1, - uint16_t q2, uint16_t q3, int bd) { - int8_t mask = 0; - int16_t limit16 = (uint16_t)limit << (bd - 8); - int16_t blimit16 = (uint16_t)blimit << (bd - 8); - mask |= (abs(p3 - p2) > limit16) * -1; - mask |= (abs(p2 - p1) > limit16) * -1; - mask |= (abs(p1 - p0) > limit16) * -1; - mask |= (abs(q1 - q0) > limit16) * -1; - mask |= (abs(q2 - q1) > limit16) * -1; - mask |= (abs(q3 - q2) > limit16) * -1; - mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1; - return ~mask; -} - -static INLINE int8_t highbd_flat_mask4(uint8_t thresh, - uint16_t p3, uint16_t p2, - uint16_t p1, uint16_t p0, - uint16_t q0, uint16_t q1, - uint16_t q2, uint16_t q3, int bd) { - int8_t mask = 0; - int16_t thresh16 = (uint16_t)thresh << (bd - 8); - mask |= (abs(p1 - p0) > thresh16) * -1; - mask |= (abs(q1 - q0) > thresh16) * -1; - mask |= (abs(p2 - p0) > thresh16) * -1; - mask |= (abs(q2 - q0) > thresh16) * -1; - mask |= (abs(p3 - p0) > thresh16) * -1; - mask |= (abs(q3 - q0) > thresh16) * -1; - return ~mask; -} - -static INLINE int8_t highbd_flat_mask5(uint8_t thresh, - uint16_t p4, uint16_t p3, - uint16_t p2, uint16_t p1, - uint16_t p0, uint16_t q0, - uint16_t q1, uint16_t q2, - uint16_t q3, uint16_t q4, int bd) { - int8_t mask = ~highbd_flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3, bd); - int16_t thresh16 = (uint16_t)thresh << (bd - 8); - mask |= (abs(p4 - p0) > thresh16) * -1; - mask |= (abs(q4 - q0) > thresh16) * -1; - return ~mask; -} - -// Is there high edge variance internal edge: -// 11111111_11111111 yes, 00000000_00000000 no ? -static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0, - uint16_t q0, uint16_t q1, int bd) { - int16_t hev = 0; - int16_t thresh16 = (uint16_t)thresh << (bd - 8); - hev |= (abs(p1 - p0) > thresh16) * -1; - hev |= (abs(q1 - q0) > thresh16) * -1; - return hev; -} - -static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, - uint16_t *op0, uint16_t *oq0, uint16_t *oq1, - int bd) { - int16_t filter1, filter2; - // ^0x80 equivalent to subtracting 0x80 from the values to turn them - // into -128 to +127 instead of 0 to 255. - int shift = bd - 8; - const int16_t ps1 = (int16_t)*op1 - (0x80 << shift); - const int16_t ps0 = (int16_t)*op0 - (0x80 << shift); - const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift); - const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift); - const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); - - // Add outer taps if we have high edge variance. - int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev; - - // Inner taps. - filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask; - - // Save bottom 3 bits so that we round one side +4 and the other +3 - // if it equals 4 we'll set to adjust by -1 to account for the fact - // we'd round 3 the other way. - filter1 = signed_char_clamp_high(filter + 4, bd) >> 3; - filter2 = signed_char_clamp_high(filter + 3, bd) >> 3; - - *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift); - *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift); - - // Outer tap adjustments. - filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; - - *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift); - *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); -} - -void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh, int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4 * p]; - const uint16_t p2 = s[-3 * p]; - const uint16_t p1 = s[-2 * p]; - const uint16_t p0 = s[-p]; - const uint16_t q0 = s[0 * p]; - const uint16_t q1 = s[1 * p]; - const uint16_t q2 = s[2 * p]; - const uint16_t q3 = s[3 * p]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd); - ++s; - } -} - -void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd); -} - -void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd); - s += pitch; - } -} - -void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, bd); -} - -static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, - uint16_t *op3, uint16_t *op2, - uint16_t *op1, uint16_t *op0, - uint16_t *oq0, uint16_t *oq1, - uint16_t *oq2, uint16_t *oq3, int bd) { - if (flat && mask) { - const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; - const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; - - // 7-tap filter [1, 1, 1, 2, 1, 1, 1] - *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); - *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); - *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); - *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); - *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); - *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); - } else { - highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd); - } -} - -void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; - const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; - - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - highbd_filter8(mask, *thresh, flat, - s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p, bd); - ++s; - } -} - -void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd); -} - -void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - int i; - - for (i = 0; i < 8; ++i) { - const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; - const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - highbd_filter8(mask, *thresh, flat, - s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, - bd); - s += pitch; - } -} - -void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1, - int bd) { - vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd); - vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, - thresh1, bd); -} - -static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, - uint8_t flat, uint8_t flat2, - uint16_t *op7, uint16_t *op6, - uint16_t *op5, uint16_t *op4, - uint16_t *op3, uint16_t *op2, - uint16_t *op1, uint16_t *op0, - uint16_t *oq0, uint16_t *oq1, - uint16_t *oq2, uint16_t *oq3, - uint16_t *oq4, uint16_t *oq5, - uint16_t *oq6, uint16_t *oq7, int bd) { - if (flat2 && flat && mask) { - const uint16_t p7 = *op7; - const uint16_t p6 = *op6; - const uint16_t p5 = *op5; - const uint16_t p4 = *op4; - const uint16_t p3 = *op3; - const uint16_t p2 = *op2; - const uint16_t p1 = *op1; - const uint16_t p0 = *op0; - const uint16_t q0 = *oq0; - const uint16_t q1 = *oq1; - const uint16_t q2 = *oq2; - const uint16_t q3 = *oq3; - const uint16_t q4 = *oq4; - const uint16_t q5 = *oq5; - const uint16_t q6 = *oq6; - const uint16_t q7 = *oq7; - - // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] - *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + - q0, 4); - *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + - q0 + q1, 4); - *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + - q0 + q1 + q2, 4); - *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + - q0 + q1 + q2 + q3, 4); - *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4, 4); - *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + - q0 + q1 + q2 + q3 + q4 + q5, 4); - *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + - q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); - *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + - q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); - *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + - q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); - *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + - q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); - *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + - q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); - *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + - q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); - *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + - q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); - *oq6 = ROUND_POWER_OF_TWO(p0 + - q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); - } else { - highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3, - bd); - } -} - -static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count, int bd) { - int i; - - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. - for (i = 0; i < 8 * count; ++i) { - const uint16_t p3 = s[-4 * p]; - const uint16_t p2 = s[-3 * p]; - const uint16_t p1 = s[-2 * p]; - const uint16_t p0 = s[-p]; - const uint16_t q0 = s[0 * p]; - const uint16_t q1 = s[1 * p]; - const uint16_t q2 = s[2 * p]; - const uint16_t q3 = s[3 * p]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - const int8_t flat2 = highbd_flat_mask5( - 1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, - q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p], bd); - - highbd_filter16(mask, *thresh, flat, flat2, - s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, - s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, - s, s + 1 * p, s + 2 * p, s + 3 * p, - s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p, - bd); - ++s; - } -} - -void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int bd) { - highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); -} - -void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, int bd) { - highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); -} - -static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count, int bd) { - int i; - - for (i = 0; i < count; ++i) { - const uint16_t p3 = s[-4]; - const uint16_t p2 = s[-3]; - const uint16_t p1 = s[-2]; - const uint16_t p0 = s[-1]; - const uint16_t q0 = s[0]; - const uint16_t q1 = s[1]; - const uint16_t q2 = s[2]; - const uint16_t q3 = s[3]; - const int8_t mask = highbd_filter_mask(*limit, *blimit, - p3, p2, p1, p0, q0, q1, q2, q3, bd); - const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, - bd); - const int8_t flat2 = highbd_flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, - q0, s[4], s[5], s[6], s[7], bd); - - highbd_filter16(mask, *thresh, flat, flat2, - s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, - s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, - bd); - s += p; - } -} - -void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, - const uint8_t *limit, const uint8_t *thresh, - int bd) { - highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); -} - -void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int bd) { - highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd); -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/prob.c b/thirdparty/libvpx/vpx_dsp/prob.c deleted file mode 100644 index 639d24dd2f..0000000000 --- a/thirdparty/libvpx/vpx_dsp/prob.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./prob.h" - -const uint8_t vpx_norm[256] = { - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static unsigned int tree_merge_probs_impl(unsigned int i, - const vpx_tree_index *tree, - const vpx_prob *pre_probs, - const unsigned int *counts, - vpx_prob *probs) { - const int l = tree[i]; - const unsigned int left_count = (l <= 0) - ? counts[-l] - : tree_merge_probs_impl(l, tree, pre_probs, counts, probs); - const int r = tree[i + 1]; - const unsigned int right_count = (r <= 0) - ? counts[-r] - : tree_merge_probs_impl(r, tree, pre_probs, counts, probs); - const unsigned int ct[2] = { left_count, right_count }; - probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct); - return left_count + right_count; -} - -void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, - const unsigned int *counts, vpx_prob *probs) { - tree_merge_probs_impl(0, tree, pre_probs, counts, probs); -} diff --git a/thirdparty/libvpx/vpx_dsp/prob.h b/thirdparty/libvpx/vpx_dsp/prob.h deleted file mode 100644 index c3cb103ffb..0000000000 --- a/thirdparty/libvpx/vpx_dsp/prob.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_PROB_H_ -#define VPX_DSP_PROB_H_ - -#include "./vpx_config.h" -#include "./vpx_dsp_common.h" - -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef uint8_t vpx_prob; - -#define MAX_PROB 255 - -#define vpx_prob_half ((vpx_prob) 128) - -typedef int8_t vpx_tree_index; - -#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2) - -#define vpx_complement(x) (255 - x) - -#define MODE_MV_COUNT_SAT 20 - -/* We build coding trees compactly in arrays. - Each node of the tree is a pair of vpx_tree_indices. - Array index often references a corresponding probability table. - Index <= 0 means done encoding/decoding and value = -Index, - Index > 0 means need another bit, specification at index. - Nonnegative indices are always even; processing begins at node 0. */ - -typedef const vpx_tree_index vpx_tree[]; - -static INLINE vpx_prob clip_prob(int p) { - return (p > 255) ? 255 : (p < 1) ? 1 : p; -} - -static INLINE vpx_prob get_prob(int num, int den) { - return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den); -} - -static INLINE vpx_prob get_binary_prob(int n0, int n1) { - return get_prob(n0, n0 + n1); -} - -/* This function assumes prob1 and prob2 are already within [1,255] range. */ -static INLINE vpx_prob weighted_prob(int prob1, int prob2, int factor) { - return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); -} - -static INLINE vpx_prob merge_probs(vpx_prob pre_prob, - const unsigned int ct[2], - unsigned int count_sat, - unsigned int max_update_factor) { - const vpx_prob prob = get_binary_prob(ct[0], ct[1]); - const unsigned int count = VPXMIN(ct[0] + ct[1], count_sat); - const unsigned int factor = max_update_factor * count / count_sat; - return weighted_prob(pre_prob, prob, factor); -} - -// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; -static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { - 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, - 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 -}; - -static INLINE vpx_prob mode_mv_merge_probs(vpx_prob pre_prob, - const unsigned int ct[2]) { - const unsigned int den = ct[0] + ct[1]; - if (den == 0) { - return pre_prob; - } else { - const unsigned int count = VPXMIN(den, MODE_MV_COUNT_SAT); - const unsigned int factor = count_to_update_factor[count]; - const vpx_prob prob = - clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den); - return weighted_prob(pre_prob, prob, factor); - } -} - -void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, - const unsigned int *counts, vpx_prob *probs); - - -DECLARE_ALIGNED(16, extern const uint8_t, vpx_norm[256]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_PROB_H_ diff --git a/thirdparty/libvpx/vpx_dsp/txfm_common.h b/thirdparty/libvpx/vpx_dsp/txfm_common.h deleted file mode 100644 index 442e6a57b5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/txfm_common.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_TXFM_COMMON_H_ -#define VPX_DSP_TXFM_COMMON_H_ - -#include "vpx_dsp/vpx_dsp_common.h" - -// Constants and Macros used by all idct/dct functions -#define DCT_CONST_BITS 14 -#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) - -#define UNIT_QUANT_SHIFT 2 -#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) - -// Constants: -// for (int i = 1; i< 32; ++i) -// printf("static const int cospi_%d_64 = %.0f;\n", i, -// round(16384 * cos(i*M_PI/64))); -// Note: sin(k*Pi/64) = cos((32-k)*Pi/64) -static const tran_high_t cospi_1_64 = 16364; -static const tran_high_t cospi_2_64 = 16305; -static const tran_high_t cospi_3_64 = 16207; -static const tran_high_t cospi_4_64 = 16069; -static const tran_high_t cospi_5_64 = 15893; -static const tran_high_t cospi_6_64 = 15679; -static const tran_high_t cospi_7_64 = 15426; -static const tran_high_t cospi_8_64 = 15137; -static const tran_high_t cospi_9_64 = 14811; -static const tran_high_t cospi_10_64 = 14449; -static const tran_high_t cospi_11_64 = 14053; -static const tran_high_t cospi_12_64 = 13623; -static const tran_high_t cospi_13_64 = 13160; -static const tran_high_t cospi_14_64 = 12665; -static const tran_high_t cospi_15_64 = 12140; -static const tran_high_t cospi_16_64 = 11585; -static const tran_high_t cospi_17_64 = 11003; -static const tran_high_t cospi_18_64 = 10394; -static const tran_high_t cospi_19_64 = 9760; -static const tran_high_t cospi_20_64 = 9102; -static const tran_high_t cospi_21_64 = 8423; -static const tran_high_t cospi_22_64 = 7723; -static const tran_high_t cospi_23_64 = 7005; -static const tran_high_t cospi_24_64 = 6270; -static const tran_high_t cospi_25_64 = 5520; -static const tran_high_t cospi_26_64 = 4756; -static const tran_high_t cospi_27_64 = 3981; -static const tran_high_t cospi_28_64 = 3196; -static const tran_high_t cospi_29_64 = 2404; -static const tran_high_t cospi_30_64 = 1606; -static const tran_high_t cospi_31_64 = 804; - -// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 -static const tran_high_t sinpi_1_9 = 5283; -static const tran_high_t sinpi_2_9 = 9929; -static const tran_high_t sinpi_3_9 = 13377; -static const tran_high_t sinpi_4_9 = 15212; - -#endif // VPX_DSP_TXFM_COMMON_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_convolve.c b/thirdparty/libvpx/vpx_dsp/vpx_convolve.c deleted file mode 100644 index 2d1c927cbe..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_convolve.c +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> -#include <string.h> - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/vpx_convolve.h" -#include "vpx_dsp/vpx_dsp_common.h" -#include "vpx_dsp/vpx_filter.h" -#include "vpx_ports/mem.h" - -static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - int x, y; - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - int x, y; - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = ROUND_POWER_OF_TWO(dst[x] + - clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int x, y; - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int x, y; - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + - clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void convolve(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const x_filters, - int x0_q4, int x_step_q4, - const InterpKernel *const y_filters, - int y0_q4, int y_step_q4, - int w, int h) { - // Note: Fixed size intermediate buffer, temp, places limits on parameters. - // 2d filtering proceeds in 2 steps: - // (1) Interpolate horizontally into an intermediate buffer, temp. - // (2) Interpolate temp vertically to derive the sub-pixel result. - // Deriving the maximum number of rows in the temp buffer (135): - // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). - // --Largest block size is 64x64 pixels. - // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the - // original frame (in 1/16th pixel units). - // --Must round-up because block may be located at sub-pixel position. - // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. - // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - uint8_t temp[135 * 64]; - int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= 64); - assert(h <= 64); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, - x_filters, x0_q4, x_step_q4, w, intermediate_height); - convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, - y_filters, y0_q4, y_step_q4, w, h); -} - -static const InterpKernel *get_filter_base(const int16_t *filter) { - // NOTE: This assumes that the filter table is 256-byte aligned. - // TODO(agrange) Modify to make independent of table alignment. - return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); -} - -static int get_filter_offset(const int16_t *f, const InterpKernel *base) { - return (int)((const InterpKernel *)(intptr_t)f - base); -} - -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - (void)filter_y; - (void)y_step_q4; - - convolve_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h); -} - -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - (void)filter_y; - (void)y_step_q4; - - convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h); -} - -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - (void)filter_x; - (void)x_step_q4; - - convolve_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h); -} - -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - (void)filter_x; - (void)x_step_q4; - - convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h); -} - -void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - convolve(src, src_stride, dst, dst_stride, - filters_x, x0_q4, x_step_q4, - filters_y, y0_q4, y_step_q4, w, h); -} - -void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - /* Fixed size intermediate buffer places limits on parameters. */ - DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); - assert(w <= 64); - assert(h <= 64); - - vpx_convolve8_c(src, src_stride, temp, 64, - filter_x, x_step_q4, filter_y, y_step_q4, w, h); - vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h); -} - -void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int r; - - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - for (r = h; r > 0; --r) { - memcpy(dst, src, w); - src += src_stride; - dst += dst_stride; - } -} - -void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h) { - int x, y; - - (void)filter_x; (void)filter_x_stride; - (void)filter_y; (void)filter_y_stride; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) - dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); - - src += src_stride; - dst += dst_stride; - } -} - -void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, - x_step_q4, filter_y, y_step_q4, w, h); -} - -void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, - x_step_q4, filter_y, y_step_q4, w, h); -} - -void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4, - filter_y, y_step_q4, w, h); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, - int w, int h, int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, - int w, int h, int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= SUBPEL_TAPS / 2 - 1; - for (y = 0; y < h; ++y) { - int x_q4 = x0_q4; - for (x = 0; x < w; ++x) { - const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_x[k] * x_filter[k]; - dst[x] = ROUND_POWER_OF_TWO(dst[x] + - clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); - x_q4 += x_step_q4; - } - src += src_stride; - dst += dst_stride; - } -} - -static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h, - int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = clip_pixel_highbd( - ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h, - int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (x = 0; x < w; ++x) { - int y_q4 = y0_q4; - for (y = 0; y < h; ++y) { - const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - int k, sum = 0; - for (k = 0; k < SUBPEL_TAPS; ++k) - sum += src_y[k * src_stride] * y_filter[k]; - dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + - clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); - y_q4 += y_step_q4; - } - ++src; - ++dst; - } -} - -static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const x_filters, - int x0_q4, int x_step_q4, - const InterpKernel *const y_filters, - int y0_q4, int y_step_q4, - int w, int h, int bd) { - // Note: Fixed size intermediate buffer, temp, places limits on parameters. - // 2d filtering proceeds in 2 steps: - // (1) Interpolate horizontally into an intermediate buffer, temp. - // (2) Interpolate temp vertically to derive the sub-pixel result. - // Deriving the maximum number of rows in the temp buffer (135): - // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). - // --Largest block size is 64x64 pixels. - // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the - // original frame (in 1/16th pixel units). - // --Must round-up because block may be located at sub-pixel position. - // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. - // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - uint16_t temp[64 * 135]; - int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= 64); - assert(h <= 64); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, CONVERT_TO_BYTEPTR(temp), 64, - x_filters, x0_q4, x_step_q4, w, - intermediate_height, bd); - highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), - 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, - w, h, bd); -} - - -void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - (void)filter_y; - (void)y_step_q4; - - highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - (void)filter_y; - (void)y_step_q4; - - highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, - x0_q4, x_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - (void)filter_x; - (void)x_step_q4; - - highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - (void)filter_x; - (void)x_step_q4; - - highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, - y0_q4, y_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - highbd_convolve(src, src_stride, dst, dst_stride, - filters_x, x0_q4, x_step_q4, - filters_y, y0_q4, y_step_q4, w, h, bd); -} - -void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd) { - // Fixed size intermediate buffer places limits on parameters. - DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]); - assert(w <= 64); - assert(h <= 64); - - vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, - filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); - vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, - NULL, 0, NULL, 0, w, h, bd); -} - -void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h, int bd) { - int r; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - (void)filter_x; - (void)filter_y; - (void)filter_x_stride; - (void)filter_y_stride; - (void)bd; - - for (r = h; r > 0; --r) { - memcpy(dst, src, w * sizeof(uint16_t)); - src += src_stride; - dst += dst_stride; - } -} - -void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, - uint8_t *dst8, ptrdiff_t dst_stride, - const int16_t *filter_x, int filter_x_stride, - const int16_t *filter_y, int filter_y_stride, - int w, int h, int bd) { - int x, y; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - (void)filter_x; - (void)filter_y; - (void)filter_x_stride; - (void)filter_y_stride; - (void)bd; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); - } - src += src_stride; - dst += dst_stride; - } -} -#endif diff --git a/thirdparty/libvpx/vpx_dsp/vpx_convolve.h b/thirdparty/libvpx/vpx_dsp/vpx_convolve.h deleted file mode 100644 index 9ed3f1750f..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_convolve.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_DSP_VPX_CONVOLVE_H_ -#define VPX_DSP_VPX_CONVOLVE_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - -#if CONFIG_VP9_HIGHBITDEPTH -typedef void (*highbd_convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h, int bd); -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_VPX_CONVOLVE_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h b/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h deleted file mode 100644 index a1d0a51ef5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_dsp_common.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_VPX_DSP_COMMON_H_ -#define VPX_DSP_VPX_DSP_COMMON_H_ - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) -#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) - -#if CONFIG_VP9_HIGHBITDEPTH -// Note: -// tran_low_t is the datatype used for final transform coefficients. -// tran_high_t is the datatype used for intermediate transform stages. -typedef int64_t tran_high_t; -typedef int32_t tran_low_t; -#else -// Note: -// tran_low_t is the datatype used for final transform coefficients. -// tran_high_t is the datatype used for intermediate transform stages. -typedef int32_t tran_high_t; -typedef int16_t tran_low_t; -#endif // CONFIG_VP9_HIGHBITDEPTH - -static INLINE uint8_t clip_pixel(int val) { - return (val > 255) ? 255 : (val < 0) ? 0 : val; -} - -static INLINE int clamp(int value, int low, int high) { - return value < low ? low : (value > high ? high : value); -} - -static INLINE double fclamp(double value, double low, double high) { - return value < low ? low : (value > high ? high : value); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE uint16_t clip_pixel_highbd(int val, int bd) { - switch (bd) { - case 8: - default: - return (uint16_t)clamp(val, 0, 255); - case 10: - return (uint16_t)clamp(val, 0, 1023); - case 12: - return (uint16_t)clamp(val, 0, 4095); - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_VPX_DSP_COMMON_H_ diff --git a/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c b/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c deleted file mode 100644 index 5fe27b614b..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_dsp_rtcd.c +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vpx_dsp_rtcd.h" -#include "vpx_ports/vpx_once.h" - -void vpx_dsp_rtcd() { - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vpx_dsp/vpx_filter.h b/thirdparty/libvpx/vpx_dsp/vpx_filter.h deleted file mode 100644 index 2617febf3b..0000000000 --- a/thirdparty/libvpx/vpx_dsp/vpx_filter.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_VPX_FILTER_H_ -#define VPX_DSP_VPX_FILTER_H_ - -#include "vpx/vpx_integer.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -#define FILTER_BITS 7 - -#define SUBPEL_BITS 4 -#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) -#define SUBPEL_SHIFTS (1 << SUBPEL_BITS) -#define SUBPEL_TAPS 8 - -typedef int16_t InterpKernel[SUBPEL_TAPS]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_DSP_VPX_FILTER_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/convolve.h b/thirdparty/libvpx/vpx_dsp/x86/convolve.h deleted file mode 100644 index 7e43eb7c72..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/convolve.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef VPX_DSP_X86_CONVOLVE_H_ -#define VPX_DSP_X86_CONVOLVE_H_ - -#include <assert.h> - -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_ports/mem.h" - -typedef void filter8_1dfunction ( - const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter -); - -#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - assert(filter[3] != 128); \ - assert(step_q4 == 16); \ - if (filter[0] | filter[1] | filter[2]) { \ - while (w >= 16) { \ - vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - if (w == 8) { \ - vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } else if (w == 4) { \ - vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } \ - } else { \ - while (w >= 16) { \ - vpx_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - if (w == 8) { \ - vpx_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } else if (w == 4) { \ - vpx_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter); \ - } \ - } \ -} - -#define FUN_CONV_2D(avg, opt) \ -void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h) { \ - assert(filter_x[3] != 128); \ - assert(filter_y[3] != 128); \ - assert(w <= 64); \ - assert(h <= 64); \ - assert(x_step_q4 == 16); \ - assert(y_step_q4 == 16); \ - if (filter_x[0] | filter_x[1] | filter_x[2]) { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ - vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 7); \ - vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } else { \ - DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ - vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h + 1); \ - vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, \ - y_step_q4, w, h); \ - } \ -} - -#if CONFIG_VP9_HIGHBITDEPTH - -typedef void highbd_filter8_1dfunction ( - const uint16_t *src_ptr, - const ptrdiff_t src_pitch, - uint16_t *output_ptr, - ptrdiff_t out_pitch, - unsigned int output_height, - const int16_t *filter, - int bd -); - -#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ - void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ - ptrdiff_t src_stride, \ - uint8_t *dst8, \ - ptrdiff_t dst_stride, \ - const int16_t *filter_x, \ - int x_step_q4, \ - const int16_t *filter_y, \ - int y_step_q4, \ - int w, int h, int bd) { \ - if (step_q4 == 16 && filter[3] != 128) { \ - uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ - if (filter[0] | filter[1] | filter[2]) { \ - while (w >= 16) { \ - vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } else { \ - while (w >= 16) { \ - vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 16; \ - dst += 16; \ - w -= 16; \ - } \ - while (w >= 8) { \ - vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 8; \ - dst += 8; \ - w -= 8; \ - } \ - while (w >= 4) { \ - vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ - src_stride, \ - dst, \ - dst_stride, \ - h, \ - filter, \ - bd); \ - src += 4; \ - dst += 4; \ - w -= 4; \ - } \ - } \ - } \ - if (w) { \ - vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, \ - w, h, bd); \ - } \ -} - -#define HIGH_FUN_CONV_2D(avg, opt) \ -void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ - uint8_t *dst, ptrdiff_t dst_stride, \ - const int16_t *filter_x, int x_step_q4, \ - const int16_t *filter_y, int y_step_q4, \ - int w, int h, int bd) { \ - assert(w <= 64); \ - assert(h <= 64); \ - if (x_step_q4 == 16 && y_step_q4 == 16) { \ - if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ - vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 7, bd); \ - vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ - 64, dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } else { \ - DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ - vpx_highbd_convolve8_horiz_##opt(src, src_stride, \ - CONVERT_TO_BYTEPTR(fdata2), 64, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h + 1, bd); \ - vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ - dst, dst_stride, \ - filter_x, x_step_q4, \ - filter_y, y_step_q4, \ - w, h, bd); \ - } \ - } else { \ - vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ - filter_x, x_step_q4, filter_y, y_step_q4, w, \ - h, bd); \ - } \ -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -#endif // VPX_DSP_X86_CONVOLVE_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm deleted file mode 100644 index cd6a6ae982..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/intrapred_sse2.asm +++ /dev/null @@ -1,860 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pb_1: times 16 db 1 -pw_4: times 8 dw 4 -pw_8: times 8 dw 8 -pw_16: times 8 dw 16 -pw_32: times 8 dw 32 -dc_128: times 16 db 128 -pw2_4: times 8 dw 2 -pw2_8: times 8 dw 4 -pw2_16: times 8 dw 8 -pw2_32: times 8 dw 16 - -SECTION .text - -; ------------------------------------------ -; input: x, y, z, result -; -; trick from pascal -; (x+2y+z+2)>>2 can be calculated as: -; result = avg(x,z) -; result -= xor(x,z) & 1 -; result = avg(result,y) -; ------------------------------------------ -%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 - pavgb %4, %1, %3 - pxor %3, %1 - pand %3, [GLOBAL(pb_1)] - psubb %4, %3 - pavgb %4, %2 -%endmacro - -INIT_XMM sse2 -cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset - GET_GOT goffsetq - - movq m0, [aboveq] - DEFINE_ARGS dst, stride, temp - psrldq m1, m0, 1 - psrldq m2, m0, 2 - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 - - ; store 4 lines - movd [dstq ], m3 - psrlq m3, 8 - movd [dstq+strideq ], m3 - lea dstq, [dstq+strideq*2] - psrlq m3, 8 - movd [dstq ], m3 - psrlq m3, 8 - movd [dstq+strideq ], m3 - psrlq m0, 56 - movd tempq, m0 - mov [dstq+strideq+3], tempb - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset - GET_GOT goffsetq - - movu m1, [aboveq] - pslldq m0, m1, 1 - psrldq m2, m1, 1 - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 - punpckhbw m0, m0 ; 7 7 - punpcklwd m0, m0 ; 7 7 7 7 - punpckldq m0, m0 ; 7 7 7 7 7 7 7 7 - punpcklqdq m3, m0 ; -1 0 1 2 3 4 5 6 7 7 7 7 7 7 7 7 - - ; store 4 lines - psrldq m3, 1 - movq [dstq ], m3 - psrldq m3, 1 - movq [dstq+strideq ], m3 - psrldq m3, 1 - movq [dstq+strideq*2], m3 - psrldq m3, 1 - movq [dstq+stride3q ], m3 - lea dstq, [dstq+strideq*4] - - ; store next 4 lines - psrldq m3, 1 - movq [dstq ], m3 - psrldq m3, 1 - movq [dstq+strideq ], m3 - psrldq m3, 1 - movq [dstq+strideq*2], m3 - psrldq m3, 1 - movq [dstq+stride3q ], m3 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal d207_predictor_4x4, 4, 4, 5, dst, stride, unused, left, goffset - GET_GOT goffsetq - - movd m0, [leftq] ; abcd [byte] - punpcklbw m4, m0, m0 ; aabb ccdd - punpcklwd m4, m4 ; aaaa bbbb cccc dddd - psrldq m4, 12 ; dddd - punpckldq m0, m4 ; abcd dddd - psrldq m1, m0, 1 ; bcdd - psrldq m2, m0, 2 ; cddd - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; a2bc b2cd c3d d - pavgb m1, m0 ; ab, bc, cd, d [byte] - - punpcklbw m1, m3 ; ab, a2bc, bc, b2cd, cd, c3d, d, d - movd [dstq ], m1 - psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d - movd [dstq+strideq], m1 - - lea dstq, [dstq+strideq*2] - psrlq m1, 16 ; cd, c3d, d, d - movd [dstq ], m1 - movd [dstq+strideq], m4 ; d, d, d, d - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - movd m2, [leftq] - movd m0, [aboveq] - pxor m1, m1 - punpckldq m0, m2 - psadbw m0, m1 - paddw m0, [GLOBAL(pw_4)] - psraw m0, 3 - pshuflw m0, m0, 0x0 - packuswb m0, m0 - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset - movifnidn leftq, leftmp - GET_GOT goffsetq - - pxor m1, m1 - movd m0, [leftq] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_4)] - psraw m0, 2 - pshuflw m0, m0, 0x0 - packuswb m0, m0 - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - movd m0, [aboveq] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_4)] - psraw m0, 2 - pshuflw m0, m0, 0x0 - packuswb m0, m0 - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - movq m0, [aboveq] - movq m2, [leftq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - paddw m0, [GLOBAL(pw_8)] - psraw m0, 4 - punpcklbw m0, m0 - pshuflw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - movq m0, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_8)] - psraw m0, 3 - punpcklbw m0, m0 - pshuflw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset - movifnidn leftq, leftmp - GET_GOT goffsetq - - pxor m1, m1 - movq m0, [leftq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - psadbw m0, m1 - paddw m0, [GLOBAL(pw2_8)] - psraw m0, 3 - punpcklbw m0, m0 - pshuflw m0, m0, 0x0 - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - movd m0, [GLOBAL(dc_128)] - movd [dstq ], m0 - movd [dstq+strideq ], m0 - movd [dstq+strideq*2], m0 - movd [dstq+stride3q ], m0 - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - movq m0, [GLOBAL(dc_128)] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [leftq] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw_16)] - psraw m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - - -INIT_XMM sse2 -cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - psadbw m0, m1 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_16)] - psraw m0, 4 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [leftq] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - psadbw m0, m1 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_16)] - psraw m0, 4 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 4 - mova m0, [GLOBAL(dc_128)] -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - RESTORE_GOT - RET - - -INIT_XMM sse2 -cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [aboveq+16] - mova m3, [leftq] - mova m4, [leftq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - psadbw m0, m1 - psadbw m2, m1 - psadbw m3, m1 - psadbw m4, m1 - paddw m0, m2 - paddw m0, m3 - paddw m0, m4 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw_32)] - psraw m0, 6 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [aboveq] - mova m2, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_32)] - psraw m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset - GET_GOT goffsetq - - pxor m1, m1 - mova m0, [leftq] - mova m2, [leftq+16] - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - psadbw m0, m1 - psadbw m2, m1 - paddw m0, m2 - movhlps m2, m0 - paddw m0, m2 - paddw m0, [GLOBAL(pw2_32)] - psraw m0, 5 - pshuflw m0, m0, 0x0 - punpcklqdq m0, m0 - packuswb m0, m0 -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - - RESTORE_GOT - REP_RET - -INIT_XMM sse2 -cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset - GET_GOT goffsetq - - DEFINE_ARGS dst, stride, stride3, lines4 - lea stride3q, [strideq*3] - mov lines4d, 8 - mova m0, [GLOBAL(dc_128)] -.loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m0 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m0 - lea dstq, [dstq+strideq*4] - dec lines4d - jnz .loop - RESTORE_GOT - RET - -INIT_XMM sse2 -cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above - movd m0, [aboveq] - movd [dstq ], m0 - movd [dstq+strideq], m0 - lea dstq, [dstq+strideq*2] - movd [dstq ], m0 - movd [dstq+strideq], m0 - RET - -INIT_XMM sse2 -cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above - movq m0, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq ], m0 - movq [dstq+strideq ], m0 - movq [dstq+strideq*2], m0 - movq [dstq+stride3q ], m0 - RET - -INIT_XMM sse2 -cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, nlines4 - lea stride3q, [strideq*3] - mov nlines4d, 4 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m0 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - dec nlines4d - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above - mova m0, [aboveq] - mova m1, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, nlines4 - lea stride3q, [strideq*3] - mov nlines4d, 8 -.loop: - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+strideq ], m0 - mova [dstq+strideq +16], m1 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m1 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q +16], m1 - lea dstq, [dstq+strideq*4] - dec nlines4d - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left - movifnidn leftq, leftmp - movd m0, [leftq] - punpcklbw m0, m0 - punpcklbw m0, m0 - pshufd m1, m0, 0x1 - movd [dstq ], m0 - movd [dstq+strideq], m1 - pshufd m2, m0, 0x2 - lea dstq, [dstq+strideq*2] - pshufd m3, m0, 0x3 - movd [dstq ], m2 - movd [dstq+strideq], m3 - RET - -INIT_XMM sse2 -cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left - movifnidn leftq, leftmp - mov lineq, -2 - DEFINE_ARGS dst, stride, line, left, stride3 - lea stride3q, [strideq*3] - movq m0, [leftq ] - punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8 -.loop: - pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1 - pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2 - movq [dstq ], m1 - movq [dstq+strideq], m2 - pshuflw m1, m0, 0xaa - pshuflw m2, m0, 0xff - movq [dstq+strideq*2], m1 - movq [dstq+stride3q ], m2 - pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8 - inc lineq - lea dstq, [dstq+strideq*4] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left - movifnidn leftq, leftmp - mov lineq, -4 - DEFINE_ARGS dst, stride, line, left, stride3 - lea stride3q, [strideq*3] -.loop: - movd m0, [leftq] - punpcklbw m0, m0 - punpcklbw m0, m0 ; l1 to l4 each repeated 4 times - pshufd m1, m0, 0x0 ; l1 repeated 16 times - pshufd m2, m0, 0x55 ; l2 repeated 16 times - mova [dstq ], m1 - mova [dstq+strideq ], m2 - pshufd m1, m0, 0xaa - pshufd m2, m0, 0xff - mova [dstq+strideq*2], m1 - mova [dstq+stride3q ], m2 - inc lineq - lea leftq, [leftq+4 ] - lea dstq, [dstq+strideq*4] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left - movifnidn leftq, leftmp - mov lineq, -8 - DEFINE_ARGS dst, stride, line, left, stride3 - lea stride3q, [strideq*3] -.loop: - movd m0, [leftq] - punpcklbw m0, m0 - punpcklbw m0, m0 ; l1 to l4 each repeated 4 times - pshufd m1, m0, 0x0 ; l1 repeated 16 times - pshufd m2, m0, 0x55 ; l2 repeated 16 times - mova [dstq ], m1 - mova [dstq+16 ], m1 - mova [dstq+strideq ], m2 - mova [dstq+strideq+16 ], m2 - pshufd m1, m0, 0xaa - pshufd m2, m0, 0xff - mova [dstq+strideq*2 ], m1 - mova [dstq+strideq*2+16], m1 - mova [dstq+stride3q ], m2 - mova [dstq+stride3q+16 ], m2 - inc lineq - lea leftq, [leftq+4 ] - lea dstq, [dstq+strideq*4] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left - pxor m1, m1 - movq m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x - punpcklbw m0, m1 - pshuflw m2, m0, 0x0 ; [63:0] tl tl tl tl [word] - psrldq m0, 2 - psubw m0, m2 ; [63:0] t1-tl t2-tl t3-tl t4-tl [word] - movd m2, [leftq] - punpcklbw m2, m1 - pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] - pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] - paddw m4, m0 - paddw m3, m0 - packuswb m4, m4 - packuswb m3, m3 - movd [dstq ], m4 - movd [dstq+strideq], m3 - lea dstq, [dstq+strideq*2] - pshuflw m4, m2, 0xaa - pshuflw m3, m2, 0xff - paddw m4, m0 - paddw m3, m0 - packuswb m4, m4 - packuswb m3, m3 - movd [dstq ], m4 - movd [dstq+strideq], m3 - RET - -INIT_XMM sse2 -cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left - pxor m1, m1 - movd m2, [aboveq-1] - movq m0, [aboveq] - punpcklbw m2, m1 - punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word] - pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word] - DEFINE_ARGS dst, stride, line, left - mov lineq, -4 - punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word] - psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word] - movq m2, [leftq] - punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word] -.loop - pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] - pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] - punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word] - punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word] - paddw m4, m0 - paddw m3, m0 - packuswb m4, m3 - movq [dstq ], m4 - movhps [dstq+strideq], m4 - lea dstq, [dstq+strideq*2] - psrldq m2, 4 - inc lineq - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left - pxor m1, m1 - mova m2, [aboveq-16]; - mova m0, [aboveq] ; t1 t2 ... t16 [byte] - punpckhbw m2, m1 ; [127:112] tl [word] - punpckhbw m4, m0, m1 - punpcklbw m0, m1 ; m0:m4 t1 t2 ... t16 [word] - DEFINE_ARGS dst, stride, line, left, stride8 - mov lineq, -8 - pshufhw m2, m2, 0xff - mova m3, [leftq] ; l1 l2 ... l16 [byte] - punpckhqdq m2, m2 ; tl repeated 8 times [word] - psubw m0, m2 - psubw m4, m2 ; m0:m4 t1-tl t2-tl ... t16-tl [word] - punpckhbw m5, m3, m1 - punpcklbw m3, m1 ; m3:m5 l1 l2 ... l16 [word] - lea stride8q, [strideq*8] -.loop: - pshuflw m6, m3, 0x0 - pshuflw m7, m5, 0x0 - punpcklqdq m6, m6 ; l1 repeated 8 times [word] - punpcklqdq m7, m7 ; l8 repeated 8 times [word] - paddw m1, m6, m0 - paddw m6, m4 ; m1:m6 ti-tl+l1 [i=1,15] [word] - psrldq m5, 2 - packuswb m1, m6 - mova [dstq ], m1 - paddw m1, m7, m0 - paddw m7, m4 ; m1:m7 ti-tl+l8 [i=1,15] [word] - psrldq m3, 2 - packuswb m1, m7 - mova [dstq+stride8q], m1 - inc lineq - lea dstq, [dstq+strideq] - jnz .loop - REP_RET - -INIT_XMM sse2 -cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left - pxor m1, m1 - movd m2, [aboveq-1] - mova m0, [aboveq] - mova m4, [aboveq+16] - punpcklbw m2, m1 - punpckhbw m3, m0, m1 - punpckhbw m5, m4, m1 - punpcklbw m0, m1 - punpcklbw m4, m1 - pshuflw m2, m2, 0x0 - DEFINE_ARGS dst, stride, line, left - mov lineq, -16 - punpcklqdq m2, m2 - add leftq, 32 - psubw m0, m2 - psubw m3, m2 - psubw m4, m2 - psubw m5, m2 -.loop: - movd m2, [leftq+lineq*2] - pxor m1, m1 - punpcklbw m2, m1 - pshuflw m7, m2, 0x55 - pshuflw m2, m2, 0x0 - punpcklqdq m2, m2 - punpcklqdq m7, m7 - paddw m6, m2, m3 - paddw m1, m2, m0 - packuswb m1, m6 - mova [dstq ], m1 - paddw m6, m2, m5 - paddw m1, m2, m4 - packuswb m1, m6 - mova [dstq+16 ], m1 - paddw m6, m7, m3 - paddw m1, m7, m0 - packuswb m1, m6 - mova [dstq+strideq ], m1 - paddw m6, m7, m5 - paddw m1, m7, m4 - packuswb m1, m6 - mova [dstq+strideq+16], m1 - lea dstq, [dstq+strideq*2] - inc lineq - jnz .loop - REP_RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm deleted file mode 100644 index 5e0139fa8d..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/intrapred_ssse3.asm +++ /dev/null @@ -1,871 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA - -pb_1: times 16 db 1 -sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 -sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 -sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15 -sh_b32104567: db 3, 2, 1, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b8091a2b345: db 8, 0, 9, 1, 10, 2, 11, 3, 4, 5, 0, 0, 0, 0, 0, 0 -sh_b76543210: db 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0 -sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 -sh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - -SECTION .text - -INIT_XMM ssse3 -cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, dst8, line - lea stride3q, [strideq*3] - lea dst8q, [dstq+strideq*8] - mova m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] - pavgb m3, m2, m0 - pxor m2, m0 - pshufb m0, m1 - pand m2, [GLOBAL(pb_1)] - psubb m3, m2 - pavgb m0, m3 - - ; first 4 lines and first half of 3rd 4 lines - mov lined, 2 -.loop: - mova [dstq ], m0 - movhps [dst8q ], m0 - pshufb m0, m1 - mova [dstq +strideq ], m0 - movhps [dst8q+strideq ], m0 - pshufb m0, m1 - mova [dstq +strideq*2 ], m0 - movhps [dst8q+strideq*2 ], m0 - pshufb m0, m1 - mova [dstq +stride3q ], m0 - movhps [dst8q+stride3q ], m0 - pshufb m0, m1 - lea dstq, [dstq +strideq*4] - lea dst8q, [dst8q+strideq*4] - dec lined - jnz .loop - - ; bottom-right 8x8 block - movhps [dstq +8], m0 - movhps [dstq+strideq +8], m0 - movhps [dstq+strideq*2+8], m0 - movhps [dstq+stride3q +8], m0 - lea dstq, [dstq+strideq*4] - movhps [dstq +8], m0 - movhps [dstq+strideq +8], m0 - movhps [dstq+strideq*2+8], m0 - movhps [dstq+stride3q +8], m0 - - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - mova m4, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, dst16, line - lea stride3q, [strideq*3] - lea dst16q, [dstq +strideq*8] - lea dst16q, [dst16q+strideq*8] - mova m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)] - pavgb m3, m2, m4 - pxor m2, m4 - palignr m5, m4, m0, 1 - palignr m6, m4, m0, 2 - pshufb m4, m1 - pand m2, [GLOBAL(pb_1)] - psubb m3, m2 - pavgb m4, m3 - pavgb m3, m0, m6 - pxor m0, m6 - pand m0, [GLOBAL(pb_1)] - psubb m3, m0 - pavgb m5, m3 - - ; write 4x4 lines (and the first half of the second 4x4 lines) - mov lined, 4 -.loop: - mova [dstq ], m5 - mova [dstq +16], m4 - mova [dst16q ], m4 - palignr m3, m4, m5, 1 - pshufb m4, m1 - mova [dstq +strideq ], m3 - mova [dstq +strideq +16], m4 - mova [dst16q+strideq ], m4 - palignr m5, m4, m3, 1 - pshufb m4, m1 - mova [dstq +strideq*2 ], m5 - mova [dstq +strideq*2+16], m4 - mova [dst16q+strideq*2 ], m4 - palignr m3, m4, m5, 1 - pshufb m4, m1 - mova [dstq +stride3q ], m3 - mova [dstq +stride3q +16], m4 - mova [dst16q+stride3q ], m4 - palignr m5, m4, m3, 1 - pshufb m4, m1 - lea dstq, [dstq +strideq*4] - lea dst16q, [dst16q+strideq*4] - dec lined - jnz .loop - - ; write second half of second 4x4 lines - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - lea dstq, [dstq +strideq*4] - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - lea dstq, [dstq +strideq*4] - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - lea dstq, [dstq +strideq*4] - mova [dstq +16], m4 - mova [dstq +strideq +16], m4 - mova [dstq +strideq*2+16], m4 - mova [dstq +stride3q +16], m4 - - RESTORE_GOT - RET - -; ------------------------------------------ -; input: x, y, z, result -; -; trick from pascal -; (x+2y+z+2)>>2 can be calculated as: -; result = avg(x,z) -; result -= xor(x,z) & 1 -; result = avg(result,y) -; ------------------------------------------ -%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 - pavgb %4, %1, %3 - pxor %3, %1 - pand %3, [GLOBAL(pb_1)] - psubb %4, %3 - pavgb %4, %2 -%endmacro - -INIT_XMM ssse3 -cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset - GET_GOT goffsetq - - movq m3, [aboveq] - pshufb m1, m3, [GLOBAL(sh_b23456777)] - pshufb m2, m3, [GLOBAL(sh_b12345677)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4 - pavgb m3, m2 - - ; store 4 lines - movd [dstq ], m3 - movd [dstq+strideq], m4 - lea dstq, [dstq+strideq*2] - psrldq m3, 1 - psrldq m4, 1 - movd [dstq ], m3 - movd [dstq+strideq], m4 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset - GET_GOT goffsetq - - movq m3, [aboveq] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] - pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] - pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] - pshufb m3, [GLOBAL(sh_b0123456777777777)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4 - pavgb m3, m2 - - ; store 4 lines - movq [dstq ], m3 - movq [dstq+strideq], m4 - psrldq m3, 1 - psrldq m4, 1 - movq [dstq+strideq*2], m3 - movq [dstq+stride3q ], m4 - lea dstq, [dstq+strideq*4] - psrldq m3, 1 - psrldq m4, 1 - - ; store 4 lines - movq [dstq ], m3 - movq [dstq+strideq], m4 - psrldq m3, 1 - psrldq m4, 1 - movq [dstq+strideq*2], m3 - movq [dstq+stride3q ], m4 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - DEFINE_ARGS dst, stride, stride3, line - lea stride3q, [strideq*3] - mova m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] - pshufb m3, m0, m1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4 - pavgb m0, m3 - - mov lined, 4 -.loop: - mova [dstq ], m0 - mova [dstq+strideq ], m4 - pshufb m0, m1 - pshufb m4, m1 - mova [dstq+strideq*2], m0 - mova [dstq+stride3q ], m4 - pshufb m0, m1 - pshufb m4, m1 - lea dstq, [dstq+strideq*4] - dec lined - jnz .loop - RESTORE_GOT - REP_RET - -INIT_XMM ssse3 -cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset - GET_GOT goffsetq - - mova m0, [aboveq] - mova m7, [aboveq+16] - DEFINE_ARGS dst, stride, stride3, line - mova m1, [GLOBAL(sh_b123456789abcdeff)] - lea stride3q, [strideq*3] - pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)] - pshufb m3, m7, m1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4 - palignr m6, m7, m0, 1 - palignr m5, m7, m0, 2 - pavgb m7, m3 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2 - pavgb m0, m6 - - mov lined, 8 -.loop: - mova [dstq ], m0 - mova [dstq +16], m7 - mova [dstq+strideq ], m2 - mova [dstq+strideq +16], m4 - palignr m3, m7, m0, 1 - palignr m5, m4, m2, 1 - pshufb m7, m1 - pshufb m4, m1 - - mova [dstq+strideq*2 ], m3 - mova [dstq+strideq*2+16], m7 - mova [dstq+stride3q ], m5 - mova [dstq+stride3q +16], m4 - palignr m0, m7, m3, 1 - palignr m2, m4, m5, 1 - pshufb m7, m1 - pshufb m4, m1 - lea dstq, [dstq+strideq*4] - dec lined - jnz .loop - RESTORE_GOT - REP_RET - -INIT_XMM ssse3 -cglobal d153_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset - GET_GOT goffsetq - movd m0, [leftq] ; l1, l2, l3, l4 - movd m1, [aboveq-1] ; tl, t1, t2, t3 - punpckldq m0, m1 ; l1, l2, l3, l4, tl, t1, t2, t3 - pshufb m0, [GLOBAL(sh_b32104567)]; l4, l3, l2, l1, tl, t1, t2, t3 - psrldq m1, m0, 1 ; l3, l2, l1, tl, t1, t2, t3 - psrldq m2, m0, 2 ; l2, l1, tl, t1, t2, t3 - ; comments below are for a predictor like this - ; A1 B1 C1 D1 - ; A2 B2 A1 B1 - ; A3 B3 A2 B2 - ; A4 B4 A3 B3 - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; 3-tap avg B4 B3 B2 B1 C1 D1 - pavgb m1, m0 ; 2-tap avg A4 A3 A2 A1 - - punpcklqdq m3, m1 ; B4 B3 B2 B1 C1 D1 x x A4 A3 A2 A1 .. - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - pshufb m3, [GLOBAL(sh_b8091a2b345)] ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 .. - movd [dstq+stride3q ], m3 - psrldq m3, 2 ; A3 B3 A2 B2 A1 B1 C1 D1 .. - movd [dstq+strideq*2], m3 - psrldq m3, 2 ; A2 B2 A1 B1 C1 D1 .. - movd [dstq+strideq ], m3 - psrldq m3, 2 ; A1 B1 C1 D1 .. - movd [dstq ], m3 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d153_predictor_8x8, 4, 5, 8, dst, stride, above, left, goffset - GET_GOT goffsetq - movq m0, [leftq] ; [0- 7] l1-8 [byte] - movhps m0, [aboveq-1] ; [8-15] tl, t1-7 [byte] - pshufb m1, m0, [GLOBAL(sh_b76543210)] ; l8-1 [word] - pshufb m2, m0, [GLOBAL(sh_b65432108)] ; l7-1,tl [word] - pshufb m3, m0, [GLOBAL(sh_b54321089)] ; l6-1,tl,t1 [word] - pshufb m0, [GLOBAL(sh_b89abcdef)] ; tl,t1-7 [word] - psrldq m4, m0, 1 ; t1-7 [word] - psrldq m5, m0, 2 ; t2-7 [word] - ; comments below are for a predictor like this - ; A1 B1 C1 D1 E1 F1 G1 H1 - ; A2 B2 A1 B1 C1 D1 E1 F1 - ; A3 B3 A2 B2 A1 B1 C1 D1 - ; A4 B4 A3 B3 A2 B2 A1 B1 - ; A5 B5 A4 B4 A3 B3 A2 B2 - ; A6 B6 A5 B5 A4 B4 A3 B3 - ; A7 B7 A6 B6 A5 B5 A4 B4 - ; A8 B8 A7 B7 A6 B6 A5 B5 - pavgb m6, m1, m2 ; 2-tap avg A8-A1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m4, m5, m7 ; 3-tap avg C-H1 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m2, m3, m0 ; 3-tap avg B8-1 - - punpcklbw m6, m0 ; A-B8, A-B7 ... A-B2, A-B1 - - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - - movhps [dstq+stride3q], m6 ; A-B4, A-B3, A-B2, A-B1 - palignr m0, m7, m6, 10 ; A-B3, A-B2, A-B1, C-H1 - movq [dstq+strideq*2], m0 - psrldq m0, 2 ; A-B2, A-B1, C-H1 - movq [dstq+strideq ], m0 - psrldq m0, 2 ; A-H1 - movq [dstq ], m0 - lea dstq, [dstq+strideq*4] - movq [dstq+stride3q ], m6 ; A-B8, A-B7, A-B6, A-B5 - psrldq m6, 2 ; A-B7, A-B6, A-B5, A-B4 - movq [dstq+strideq*2], m6 - psrldq m6, 2 ; A-B6, A-B5, A-B4, A-B3 - movq [dstq+strideq ], m6 - psrldq m6, 2 ; A-B5, A-B4, A-B3, A-B2 - movq [dstq ], m6 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d153_predictor_16x16, 4, 5, 8, dst, stride, above, left, goffset - GET_GOT goffsetq - mova m0, [leftq] - movu m7, [aboveq-1] - ; comments below are for a predictor like this - ; A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 O1 P1 - ; A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 - ; A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 - ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 - ; A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 - ; A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 - ; A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 - ; A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 - ; A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 - ; Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 - ; Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 - ; Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 - ; Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 - ; Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 - ; Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 - ; Ag Bg Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 - pshufb m6, m7, [GLOBAL(sh_bfedcba9876543210)] - palignr m5, m0, m6, 15 - palignr m3, m0, m6, 14 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg - pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] - pavgb m5, m0 ; A1 - Ag - - punpcklbw m0, m4, m5 ; A-B8 ... A-B1 - punpckhbw m4, m5 ; A-B9 ... A-Bg - - pshufb m3, m7, [GLOBAL(sh_b123456789abcdeff)] - pshufb m5, m7, [GLOBAL(sh_b23456789abcdefff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg C1-P1 - - pshufb m6, m0, [GLOBAL(sh_bfedcba9876543210)] - DEFINE_ARGS dst, stride, stride3 - lea stride3q, [strideq*3] - palignr m2, m1, m6, 14 - mova [dstq ], m2 - palignr m2, m1, m6, 12 - mova [dstq+strideq ], m2 - palignr m2, m1, m6, 10 - mova [dstq+strideq*2], m2 - palignr m2, m1, m6, 8 - mova [dstq+stride3q ], m2 - lea dstq, [dstq+strideq*4] - palignr m2, m1, m6, 6 - mova [dstq ], m2 - palignr m2, m1, m6, 4 - mova [dstq+strideq ], m2 - palignr m2, m1, m6, 2 - mova [dstq+strideq*2], m2 - pshufb m4, [GLOBAL(sh_bfedcba9876543210)] - mova [dstq+stride3q ], m6 - lea dstq, [dstq+strideq*4] - - palignr m2, m6, m4, 14 - mova [dstq ], m2 - palignr m2, m6, m4, 12 - mova [dstq+strideq ], m2 - palignr m2, m6, m4, 10 - mova [dstq+strideq*2], m2 - palignr m2, m6, m4, 8 - mova [dstq+stride3q ], m2 - lea dstq, [dstq+strideq*4] - palignr m2, m6, m4, 6 - mova [dstq ], m2 - palignr m2, m6, m4, 4 - mova [dstq+strideq ], m2 - palignr m2, m6, m4, 2 - mova [dstq+strideq*2], m2 - mova [dstq+stride3q ], m4 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset - GET_GOT goffsetq - mova m0, [leftq] - movu m7, [aboveq-1] - movu m1, [aboveq+15] - - pshufb m4, m1, [GLOBAL(sh_b123456789abcdeff)] - pshufb m6, m1, [GLOBAL(sh_b23456789abcdefff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m4, m6, m2 ; 3-tap avg above [high] - - palignr m3, m1, m7, 1 - palignr m5, m1, m7, 2 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg above [low] - - pshufb m7, [GLOBAL(sh_bfedcba9876543210)] - palignr m5, m0, m7, 15 - palignr m3, m0, m7, 14 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg - pavgb m5, m0 ; A1 - Ag - punpcklbw m6, m4, m5 ; A-B8 ... A-B1 - punpckhbw m4, m5 ; A-B9 ... A-Bg - pshufb m6, [GLOBAL(sh_bfedcba9876543210)] - pshufb m4, [GLOBAL(sh_bfedcba9876543210)] - - DEFINE_ARGS dst, stride, stride3, left, line - lea stride3q, [strideq*3] - - palignr m5, m2, m1, 14 - palignr m7, m1, m6, 14 - mova [dstq ], m7 - mova [dstq+16 ], m5 - palignr m5, m2, m1, 12 - palignr m7, m1, m6, 12 - mova [dstq+strideq ], m7 - mova [dstq+strideq+16 ], m5 - palignr m5, m2, m1, 10 - palignr m7, m1, m6, 10 - mova [dstq+strideq*2 ], m7 - mova [dstq+strideq*2+16], m5 - palignr m5, m2, m1, 8 - palignr m7, m1, m6, 8 - mova [dstq+stride3q ], m7 - mova [dstq+stride3q+16 ], m5 - lea dstq, [dstq+strideq*4] - palignr m5, m2, m1, 6 - palignr m7, m1, m6, 6 - mova [dstq ], m7 - mova [dstq+16 ], m5 - palignr m5, m2, m1, 4 - palignr m7, m1, m6, 4 - mova [dstq+strideq ], m7 - mova [dstq+strideq+16 ], m5 - palignr m5, m2, m1, 2 - palignr m7, m1, m6, 2 - mova [dstq+strideq*2 ], m7 - mova [dstq+strideq*2+16], m5 - mova [dstq+stride3q ], m6 - mova [dstq+stride3q+16 ], m1 - lea dstq, [dstq+strideq*4] - - palignr m5, m1, m6, 14 - palignr m3, m6, m4, 14 - mova [dstq ], m3 - mova [dstq+16 ], m5 - palignr m5, m1, m6, 12 - palignr m3, m6, m4, 12 - mova [dstq+strideq ], m3 - mova [dstq+strideq+16 ], m5 - palignr m5, m1, m6, 10 - palignr m3, m6, m4, 10 - mova [dstq+strideq*2 ], m3 - mova [dstq+strideq*2+16], m5 - palignr m5, m1, m6, 8 - palignr m3, m6, m4, 8 - mova [dstq+stride3q ], m3 - mova [dstq+stride3q+16 ], m5 - lea dstq, [dstq+strideq*4] - palignr m5, m1, m6, 6 - palignr m3, m6, m4, 6 - mova [dstq ], m3 - mova [dstq+16 ], m5 - palignr m5, m1, m6, 4 - palignr m3, m6, m4, 4 - mova [dstq+strideq ], m3 - mova [dstq+strideq+16 ], m5 - palignr m5, m1, m6, 2 - palignr m3, m6, m4, 2 - mova [dstq+strideq*2 ], m3 - mova [dstq+strideq*2+16], m5 - mova [dstq+stride3q ], m4 - mova [dstq+stride3q+16 ], m6 - lea dstq, [dstq+strideq*4] - - mova m7, [leftq] - mova m3, [leftq+16] - palignr m5, m3, m7, 15 - palignr m0, m3, m7, 14 - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m5, m0, m2 ; 3-tap avg Bh - - pavgb m5, m3 ; Ah - - punpcklbw m3, m2, m5 ; A-B8 ... A-B1 - punpckhbw m2, m5 ; A-B9 ... A-Bg - pshufb m3, [GLOBAL(sh_bfedcba9876543210)] - pshufb m2, [GLOBAL(sh_bfedcba9876543210)] - - palignr m7, m6, m4, 14 - palignr m0, m4, m3, 14 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m6, m4, 12 - palignr m0, m4, m3, 12 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m6, m4, 10 - palignr m0, m4, m3, 10 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - palignr m7, m6, m4, 8 - palignr m0, m4, m3, 8 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q+16 ], m7 - lea dstq, [dstq+strideq*4] - palignr m7, m6, m4, 6 - palignr m0, m4, m3, 6 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m6, m4, 4 - palignr m0, m4, m3, 4 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m6, m4, 2 - palignr m0, m4, m3, 2 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - mova [dstq+stride3q ], m3 - mova [dstq+stride3q+16 ], m4 - lea dstq, [dstq+strideq*4] - - palignr m7, m4, m3, 14 - palignr m0, m3, m2, 14 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m4, m3, 12 - palignr m0, m3, m2, 12 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m4, m3, 10 - palignr m0, m3, m2, 10 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - palignr m7, m4, m3, 8 - palignr m0, m3, m2, 8 - mova [dstq+stride3q ], m0 - mova [dstq+stride3q+16 ], m7 - lea dstq, [dstq+strideq*4] - palignr m7, m4, m3, 6 - palignr m0, m3, m2, 6 - mova [dstq ], m0 - mova [dstq+16 ], m7 - palignr m7, m4, m3, 4 - palignr m0, m3, m2, 4 - mova [dstq+strideq ], m0 - mova [dstq+strideq+16 ], m7 - palignr m7, m4, m3, 2 - palignr m0, m3, m2, 2 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2+16], m7 - mova [dstq+stride3q ], m2 - mova [dstq+stride3q+16 ], m3 - - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset - GET_GOT goffsetq - movq m3, [leftq] ; abcdefgh [byte] - lea stride3q, [strideq*3] - - pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] - pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] - pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m3 - pavgb m0, m2 - punpcklbw m0, m3 ; interleaved output - - movq [dstq ], m0 - psrldq m0, 2 - movq [dstq+strideq ], m0 - psrldq m0, 2 - movq [dstq+strideq*2], m0 - psrldq m0, 2 - movq [dstq+stride3q ], m0 - lea dstq, [dstq+strideq*4] - pshufhw m0, m0, q0000 ; de, d2ef, ef, e2fg, fg, f2gh, gh, g3h, 8xh - psrldq m0, 2 - movq [dstq ], m0 - psrldq m0, 2 - movq [dstq+strideq ], m0 - psrldq m0, 2 - movq [dstq+strideq*2], m0 - psrldq m0, 2 - movq [dstq+stride3q ], m0 - RESTORE_GOT - RET - -INIT_XMM ssse3 -cglobal d207_predictor_16x16, 4, 5, 5, dst, stride, stride3, left, goffset - GET_GOT goffsetq - lea stride3q, [strideq*3] - mova m0, [leftq] ; abcdefghijklmnop [byte] - pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] ; bcdefghijklmnopp - pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 - pavgb m1, m0 ; ab, bc, cd .. no, op, pp [byte] - - punpckhbw m4, m1, m3 ; interleaved input - punpcklbw m1, m3 ; interleaved output - mova [dstq ], m1 - palignr m3, m4, m1, 2 - mova [dstq+strideq ], m3 - palignr m3, m4, m1, 4 - mova [dstq+strideq*2], m3 - palignr m3, m4, m1, 6 - mova [dstq+stride3q ], m3 - lea dstq, [dstq+strideq*4] - palignr m3, m4, m1, 8 - mova [dstq ], m3 - palignr m3, m4, m1, 10 - mova [dstq+strideq ], m3 - palignr m3, m4, m1, 12 - mova [dstq+strideq*2], m3 - palignr m3, m4, m1, 14 - mova [dstq+stride3q ], m3 - DEFINE_ARGS dst, stride, stride3, line - mov lined, 2 - mova m0, [GLOBAL(sh_b23456789abcdefff)] -.loop: - lea dstq, [dstq+strideq*4] - mova [dstq ], m4 - pshufb m4, m0 - mova [dstq+strideq ], m4 - pshufb m4, m0 - mova [dstq+strideq*2], m4 - pshufb m4, m0 - mova [dstq+stride3q ], m4 - pshufb m4, m0 - dec lined - jnz .loop - RESTORE_GOT - REP_RET - -INIT_XMM ssse3 -cglobal d207_predictor_32x32, 4, 5, 8, dst, stride, stride3, left, goffset - GET_GOT goffsetq - lea stride3q, [strideq*3] - mova m1, [leftq] ; 0-15 [byte] - mova m2, [leftq+16] ; 16-31 [byte] - pshufb m0, m2, [GLOBAL(sh_b23456789abcdefff)] - pshufb m4, m2, [GLOBAL(sh_b123456789abcdeff)] - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m2, m4, m0, m3 - palignr m6, m2, m1, 1 - palignr m5, m2, m1, 2 - pavgb m2, m4 ; high 16px even lines - - X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m6, m5, m0 - pavgb m1, m6 ; low 16px even lines - - punpckhbw m6, m1, m0 ; interleaved output 2 - punpcklbw m1, m0 ; interleaved output 1 - - punpckhbw m7, m2, m3 ; interleaved output 4 - punpcklbw m2, m3 ; interleaved output 3 - - ; output 1st 8 lines (and half of 2nd 8 lines) - DEFINE_ARGS dst, stride, stride3, dst8 - lea dst8q, [dstq+strideq*8] - mova [dstq ], m1 - mova [dstq +16], m6 - mova [dst8q ], m6 - palignr m0, m6, m1, 2 - palignr m4, m2, m6, 2 - mova [dstq +strideq ], m0 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m0, m6, m1, 4 - palignr m4, m2, m6, 4 - mova [dstq +strideq*2 ], m0 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m0, m6, m1, 6 - palignr m4, m2, m6, 6 - mova [dstq +stride3q ], m0 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq +strideq*4] - lea dst8q, [dst8q+strideq*4] - palignr m0, m6, m1, 8 - palignr m4, m2, m6, 8 - mova [dstq ], m0 - mova [dstq +16], m4 - mova [dst8q ], m4 - palignr m0, m6, m1, 10 - palignr m4, m2, m6, 10 - mova [dstq +strideq ], m0 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m0, m6, m1, 12 - palignr m4, m2, m6, 12 - mova [dstq +strideq*2 ], m0 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m0, m6, m1, 14 - palignr m4, m2, m6, 14 - mova [dstq +stride3q ], m0 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - - ; output 2nd half of 2nd 8 lines and half of 3rd 8 lines - mova [dstq +16], m2 - mova [dst8q ], m2 - palignr m4, m7, m2, 2 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m4, m7, m2, 4 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m4, m7, m2, 6 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - palignr m4, m7, m2, 8 - mova [dstq +16], m4 - mova [dst8q ], m4 - palignr m4, m7, m2, 10 - mova [dstq +strideq +16], m4 - mova [dst8q+strideq ], m4 - palignr m4, m7, m2, 12 - mova [dstq +strideq*2+16], m4 - mova [dst8q+strideq*2 ], m4 - palignr m4, m7, m2, 14 - mova [dstq +stride3q +16], m4 - mova [dst8q+stride3q ], m4 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - - ; output 2nd half of 3rd 8 lines and half of 4th 8 lines - mova m0, [GLOBAL(sh_b23456789abcdefff)] - mova [dstq +16], m7 - mova [dst8q ], m7 - pshufb m7, m0 - mova [dstq +strideq +16], m7 - mova [dst8q+strideq ], m7 - pshufb m7, m0 - mova [dstq +strideq*2+16], m7 - mova [dst8q+strideq*2 ], m7 - pshufb m7, m0 - mova [dstq +stride3q +16], m7 - mova [dst8q+stride3q ], m7 - pshufb m7, m0 - lea dstq, [dstq+strideq*4] - lea dst8q, [dst8q+strideq*4] - mova [dstq +16], m7 - mova [dst8q ], m7 - pshufb m7, m0 - mova [dstq +strideq +16], m7 - mova [dst8q+strideq ], m7 - pshufb m7, m0 - mova [dstq +strideq*2+16], m7 - mova [dst8q+strideq*2 ], m7 - pshufb m7, m0 - mova [dstq +stride3q +16], m7 - mova [dst8q+stride3q ], m7 - pshufb m7, m0 - lea dstq, [dstq+strideq*4] - - ; output last half of 4th 8 lines - mova [dstq +16], m7 - mova [dstq +strideq +16], m7 - mova [dstq +strideq*2+16], m7 - mova [dstq +stride3q +16], m7 - lea dstq, [dstq+strideq*4] - mova [dstq +16], m7 - mova [dstq +strideq +16], m7 - mova [dstq +strideq*2+16], m7 - mova [dstq +stride3q +16], m7 - - ; done! - RESTORE_GOT - RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c deleted file mode 100644 index df5068c624..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.c +++ /dev/null @@ -1,4046 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/inv_txfm_sse2.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" - -#define RECON_AND_STORE4X4(dest, in_x) \ -{ \ - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - *(int *)(dest) = _mm_cvtsi128_si32(d0); \ -} - -void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i zero = _mm_setzero_si128(); - const __m128i eight = _mm_set1_epi16(8); - const __m128i cst = _mm_setr_epi16( - (int16_t)cospi_16_64, (int16_t)cospi_16_64, (int16_t)cospi_16_64, - (int16_t)-cospi_16_64, (int16_t)cospi_24_64, (int16_t)-cospi_8_64, - (int16_t)cospi_8_64, (int16_t)cospi_24_64); - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i input0, input1, input2, input3; - - // Rows - input0 = load_input_data(input); - input2 = load_input_data(input + 8); - - // Construct i3, i1, i3, i1, i2, i0, i2, i0 - input0 = _mm_shufflelo_epi16(input0, 0xd8); - input0 = _mm_shufflehi_epi16(input0, 0xd8); - input2 = _mm_shufflelo_epi16(input2, 0xd8); - input2 = _mm_shufflehi_epi16(input2, 0xd8); - - input1 = _mm_unpackhi_epi32(input0, input0); - input0 = _mm_unpacklo_epi32(input0, input0); - input3 = _mm_unpackhi_epi32(input2, input2); - input2 = _mm_unpacklo_epi32(input2, input2); - - // Stage 1 - input0 = _mm_madd_epi16(input0, cst); - input1 = _mm_madd_epi16(input1, cst); - input2 = _mm_madd_epi16(input2, cst); - input3 = _mm_madd_epi16(input3, cst); - - input0 = _mm_add_epi32(input0, rounding); - input1 = _mm_add_epi32(input1, rounding); - input2 = _mm_add_epi32(input2, rounding); - input3 = _mm_add_epi32(input3, rounding); - - input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); - input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); - input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); - input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); - - // Stage 2 - input0 = _mm_packs_epi32(input0, input1); - input1 = _mm_packs_epi32(input2, input3); - - // Transpose - input2 = _mm_unpacklo_epi16(input0, input1); - input3 = _mm_unpackhi_epi16(input0, input1); - input0 = _mm_unpacklo_epi32(input2, input3); - input1 = _mm_unpackhi_epi32(input2, input3); - - // Switch column2, column 3, and then, we got: - // input2: column1, column 0; input3: column2, column 3. - input1 = _mm_shuffle_epi32(input1, 0x4e); - input2 = _mm_add_epi16(input0, input1); - input3 = _mm_sub_epi16(input0, input1); - - // Columns - // Construct i3, i1, i3, i1, i2, i0, i2, i0 - input0 = _mm_unpacklo_epi32(input2, input2); - input1 = _mm_unpackhi_epi32(input2, input2); - input2 = _mm_unpackhi_epi32(input3, input3); - input3 = _mm_unpacklo_epi32(input3, input3); - - // Stage 1 - input0 = _mm_madd_epi16(input0, cst); - input1 = _mm_madd_epi16(input1, cst); - input2 = _mm_madd_epi16(input2, cst); - input3 = _mm_madd_epi16(input3, cst); - - input0 = _mm_add_epi32(input0, rounding); - input1 = _mm_add_epi32(input1, rounding); - input2 = _mm_add_epi32(input2, rounding); - input3 = _mm_add_epi32(input3, rounding); - - input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); - input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); - input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); - input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); - - // Stage 2 - input0 = _mm_packs_epi32(input0, input2); - input1 = _mm_packs_epi32(input1, input3); - - // Transpose - input2 = _mm_unpacklo_epi16(input0, input1); - input3 = _mm_unpackhi_epi16(input0, input1); - input0 = _mm_unpacklo_epi32(input2, input3); - input1 = _mm_unpackhi_epi32(input2, input3); - - // Switch column2, column 3, and then, we got: - // input2: column1, column 0; input3: column2, column 3. - input1 = _mm_shuffle_epi32(input1, 0x4e); - input2 = _mm_add_epi16(input0, input1); - input3 = _mm_sub_epi16(input0, input1); - - // Final round and shift - input2 = _mm_add_epi16(input2, eight); - input3 = _mm_add_epi16(input3, eight); - - input2 = _mm_srai_epi16(input2, 4); - input3 = _mm_srai_epi16(input3, 4); - - // Reconstruction and Store - { - __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); - __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi32(d0, - _mm_cvtsi32_si128(*(const int *)(dest + stride))); - d2 = _mm_unpacklo_epi32( - _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)), d2); - d0 = _mm_unpacklo_epi8(d0, zero); - d2 = _mm_unpacklo_epi8(d2, zero); - d0 = _mm_add_epi16(d0, input2); - d2 = _mm_add_epi16(d2, input3); - d0 = _mm_packus_epi16(d0, d2); - // store input0 - *(int *)dest = _mm_cvtsi128_si32(d0); - // store input1 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); - // store input2 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); - // store input3 - d0 = _mm_srli_si128(d0, 4); - *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); - } -} - -void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 4); - - dc_value = _mm_set1_epi16(a); - - RECON_AND_STORE4X4(dest + 0 * stride, dc_value); - RECON_AND_STORE4X4(dest + 1 * stride, dc_value); - RECON_AND_STORE4X4(dest + 2 * stride, dc_value); - RECON_AND_STORE4X4(dest + 3 * stride, dc_value); -} - -static INLINE void transpose_4x4(__m128i *res) { - const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); - const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); - - res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1); - res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); -} - -void idct4_sse2(__m128i *in) { - const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i u[8], v[8]; - - transpose_4x4(in); - // stage 1 - u[0] = _mm_unpacklo_epi16(in[0], in[1]); - u[1] = _mm_unpackhi_epi16(in[0], in[1]); - v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[1] = _mm_madd_epi16(u[0], k__cospi_p16_m16); - v[2] = _mm_madd_epi16(u[1], k__cospi_p24_m08); - v[3] = _mm_madd_epi16(u[1], k__cospi_p08_p24); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - - u[0] = _mm_packs_epi32(v[0], v[1]); - u[1] = _mm_packs_epi32(v[3], v[2]); - - // stage 2 - in[0] = _mm_add_epi16(u[0], u[1]); - in[1] = _mm_sub_epi16(u[0], u[1]); - in[1] = _mm_shuffle_epi32(in[1], 0x4E); -} - -void iadst4_sse2(__m128i *in) { - const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9); - const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9); - const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9); - const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9); - const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); - const __m128i kZero = _mm_set1_epi16(0); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i u[8], v[8], in7; - - transpose_4x4(in); - in7 = _mm_srli_si128(in[1], 8); - in7 = _mm_add_epi16(in7, in[0]); - in7 = _mm_sub_epi16(in7, in[1]); - - u[0] = _mm_unpacklo_epi16(in[0], in[1]); - u[1] = _mm_unpackhi_epi16(in[0], in[1]); - u[2] = _mm_unpacklo_epi16(in7, kZero); - u[3] = _mm_unpackhi_epi16(in[0], kZero); - - v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04); // s0 + s3 - v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02); // s2 + s5 - v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x2 - v[3] = _mm_madd_epi16(u[0], k__sinpi_p02_m01); // s1 - s4 - v[4] = _mm_madd_epi16(u[1], k__sinpi_p03_m04); // s2 - s6 - v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s2 - - u[0] = _mm_add_epi32(v[0], v[1]); - u[1] = _mm_add_epi32(v[3], v[4]); - u[2] = v[2]; - u[3] = _mm_add_epi32(u[0], u[1]); - u[4] = _mm_slli_epi32(v[5], 2); - u[5] = _mm_add_epi32(u[3], v[5]); - u[6] = _mm_sub_epi32(u[5], u[4]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - - in[0] = _mm_packs_epi32(u[0], u[1]); - in[1] = _mm_packs_epi32(u[2], u[3]); -} - -#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \ - const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \ - const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \ - const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \ - const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \ - const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \ - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \ - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \ - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \ - \ - out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ - out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ - out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ - out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ - out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \ - out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \ - out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \ - out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \ - } - -#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, \ - out0, out1, out2, out3) \ - { \ - const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0); \ - const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3); \ - const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ - \ - out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ - out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ - out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ - out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ - } - -#define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ - out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ - } - -// Define Macro for multiplying elements by constants and adding them together. -#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \ - cst0, cst1, cst2, cst3, res0, res1, res2, res3) \ - { \ - tmp0 = _mm_madd_epi16(lo_0, cst0); \ - tmp1 = _mm_madd_epi16(hi_0, cst0); \ - tmp2 = _mm_madd_epi16(lo_0, cst1); \ - tmp3 = _mm_madd_epi16(hi_0, cst1); \ - tmp4 = _mm_madd_epi16(lo_1, cst2); \ - tmp5 = _mm_madd_epi16(hi_1, cst2); \ - tmp6 = _mm_madd_epi16(lo_1, cst3); \ - tmp7 = _mm_madd_epi16(hi_1, cst3); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - tmp4 = _mm_add_epi32(tmp4, rounding); \ - tmp5 = _mm_add_epi32(tmp5, rounding); \ - tmp6 = _mm_add_epi32(tmp6, rounding); \ - tmp7 = _mm_add_epi32(tmp7, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \ - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \ - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \ - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \ - \ - res0 = _mm_packs_epi32(tmp0, tmp1); \ - res1 = _mm_packs_epi32(tmp2, tmp3); \ - res2 = _mm_packs_epi32(tmp4, tmp5); \ - res3 = _mm_packs_epi32(tmp6, tmp7); \ - } - -#define MULTIPLICATION_AND_ADD_2(lo_0, hi_0, cst0, cst1, res0, res1) \ - { \ - tmp0 = _mm_madd_epi16(lo_0, cst0); \ - tmp1 = _mm_madd_epi16(hi_0, cst0); \ - tmp2 = _mm_madd_epi16(lo_0, cst1); \ - tmp3 = _mm_madd_epi16(hi_0, cst1); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - res0 = _mm_packs_epi32(tmp0, tmp1); \ - res1 = _mm_packs_epi32(tmp2, tmp3); \ - } - -#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) \ - { \ - /* Stage1 */ \ - { \ - const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \ - const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \ - const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \ - const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \ - \ - MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, \ - stg1_1, stg1_2, stg1_3, stp1_4, \ - stp1_7, stp1_5, stp1_6) \ - } \ - \ - /* Stage2 */ \ - { \ - const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \ - const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \ - const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \ - const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \ - \ - MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, \ - stg2_1, stg2_2, stg2_3, stp2_0, \ - stp2_1, stp2_2, stp2_3) \ - \ - stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_56, stg2_1); \ - tmp1 = _mm_madd_epi16(hi_56, stg2_1); \ - tmp2 = _mm_madd_epi16(lo_56, stg2_0); \ - tmp3 = _mm_madd_epi16(hi_56, stg2_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - } \ - \ - /* Stage4 */ \ - out0 = _mm_adds_epi16(stp1_0, stp2_7); \ - out1 = _mm_adds_epi16(stp1_1, stp1_6); \ - out2 = _mm_adds_epi16(stp1_2, stp1_5); \ - out3 = _mm_adds_epi16(stp1_3, stp2_4); \ - out4 = _mm_subs_epi16(stp1_3, stp2_4); \ - out5 = _mm_subs_epi16(stp1_2, stp1_5); \ - out6 = _mm_subs_epi16(stp1_1, stp1_6); \ - out7 = _mm_subs_epi16(stp1_0, stp2_7); \ - } - -void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i zero = _mm_setzero_si128(); - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 4); - const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - - // Load input data. - in0 = load_input_data(input); - in1 = load_input_data(input + 8 * 1); - in2 = load_input_data(input + 8 * 2); - in3 = load_input_data(input + 8 * 3); - in4 = load_input_data(input + 8 * 4); - in5 = load_input_data(input + 8 * 5); - in6 = load_input_data(input + 8 * 6); - in7 = load_input_data(input + 8 * 7); - - // 2-D - for (i = 0; i < 2; i++) { - // 8x8 Transpose is copied from vpx_fdct8x8_sse2() - TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - - // 4-stage 1D idct8x8 - IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in0, in1, in2, in3, in4, in5, in6, in7); - } - - // Final rounding and shift - in0 = _mm_adds_epi16(in0, final_rounding); - in1 = _mm_adds_epi16(in1, final_rounding); - in2 = _mm_adds_epi16(in2, final_rounding); - in3 = _mm_adds_epi16(in3, final_rounding); - in4 = _mm_adds_epi16(in4, final_rounding); - in5 = _mm_adds_epi16(in5, final_rounding); - in6 = _mm_adds_epi16(in6, final_rounding); - in7 = _mm_adds_epi16(in7, final_rounding); - - in0 = _mm_srai_epi16(in0, 5); - in1 = _mm_srai_epi16(in1, 5); - in2 = _mm_srai_epi16(in2, 5); - in3 = _mm_srai_epi16(in3, 5); - in4 = _mm_srai_epi16(in4, 5); - in5 = _mm_srai_epi16(in5, 5); - in6 = _mm_srai_epi16(in6, 5); - in7 = _mm_srai_epi16(in7, 5); - - RECON_AND_STORE(dest + 0 * stride, in0); - RECON_AND_STORE(dest + 1 * stride, in1); - RECON_AND_STORE(dest + 2 * stride, in2); - RECON_AND_STORE(dest + 3 * stride, in3); - RECON_AND_STORE(dest + 4 * stride, in4); - RECON_AND_STORE(dest + 5 * stride, in5); - RECON_AND_STORE(dest + 6 * stride, in6); - RECON_AND_STORE(dest + 7 * stride, in7); -} - -void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 5); - - dc_value = _mm_set1_epi16(a); - - RECON_AND_STORE(dest + 0 * stride, dc_value); - RECON_AND_STORE(dest + 1 * stride, dc_value); - RECON_AND_STORE(dest + 2 * stride, dc_value); - RECON_AND_STORE(dest + 3 * stride, dc_value); - RECON_AND_STORE(dest + 4 * stride, dc_value); - RECON_AND_STORE(dest + 5 * stride, dc_value); - RECON_AND_STORE(dest + 6 * stride, dc_value); - RECON_AND_STORE(dest + 7 * stride, dc_value); -} - -void idct8_sse2(__m128i *in) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - - // 8x8 Transpose is copied from vpx_fdct8x8_sse2() - TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], - in0, in1, in2, in3, in4, in5, in6, in7); - - // 4-stage 1D idct8x8 - IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, - in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7]); -} - -void iadst8_sse2(__m128i *in) { - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__const_0 = _mm_set1_epi16(0); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - - __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; - __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; - __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - - // transpose - array_transpose_8x8(in, in); - - // properly aligned for butterfly input - in0 = in[7]; - in1 = in[0]; - in2 = in[5]; - in3 = in[2]; - in4 = in[3]; - in5 = in[4]; - in6 = in[1]; - in7 = in[6]; - - // column transformation - // stage 1 - // interleave and multiply/add into 32-bit integer - s0 = _mm_unpacklo_epi16(in0, in1); - s1 = _mm_unpackhi_epi16(in0, in1); - s2 = _mm_unpacklo_epi16(in2, in3); - s3 = _mm_unpackhi_epi16(in2, in3); - s4 = _mm_unpacklo_epi16(in4, in5); - s5 = _mm_unpackhi_epi16(in4, in5); - s6 = _mm_unpacklo_epi16(in6, in7); - s7 = _mm_unpackhi_epi16(in6, in7); - - u0 = _mm_madd_epi16(s0, k__cospi_p02_p30); - u1 = _mm_madd_epi16(s1, k__cospi_p02_p30); - u2 = _mm_madd_epi16(s0, k__cospi_p30_m02); - u3 = _mm_madd_epi16(s1, k__cospi_p30_m02); - u4 = _mm_madd_epi16(s2, k__cospi_p10_p22); - u5 = _mm_madd_epi16(s3, k__cospi_p10_p22); - u6 = _mm_madd_epi16(s2, k__cospi_p22_m10); - u7 = _mm_madd_epi16(s3, k__cospi_p22_m10); - u8 = _mm_madd_epi16(s4, k__cospi_p18_p14); - u9 = _mm_madd_epi16(s5, k__cospi_p18_p14); - u10 = _mm_madd_epi16(s4, k__cospi_p14_m18); - u11 = _mm_madd_epi16(s5, k__cospi_p14_m18); - u12 = _mm_madd_epi16(s6, k__cospi_p26_p06); - u13 = _mm_madd_epi16(s7, k__cospi_p26_p06); - u14 = _mm_madd_epi16(s6, k__cospi_p06_m26); - u15 = _mm_madd_epi16(s7, k__cospi_p06_m26); - - // addition - w0 = _mm_add_epi32(u0, u8); - w1 = _mm_add_epi32(u1, u9); - w2 = _mm_add_epi32(u2, u10); - w3 = _mm_add_epi32(u3, u11); - w4 = _mm_add_epi32(u4, u12); - w5 = _mm_add_epi32(u5, u13); - w6 = _mm_add_epi32(u6, u14); - w7 = _mm_add_epi32(u7, u15); - w8 = _mm_sub_epi32(u0, u8); - w9 = _mm_sub_epi32(u1, u9); - w10 = _mm_sub_epi32(u2, u10); - w11 = _mm_sub_epi32(u3, u11); - w12 = _mm_sub_epi32(u4, u12); - w13 = _mm_sub_epi32(u5, u13); - w14 = _mm_sub_epi32(u6, u14); - w15 = _mm_sub_epi32(u7, u15); - - // shift and rounding - v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING); - v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING); - v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING); - v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING); - v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING); - v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING); - v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING); - v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - u8 = _mm_srai_epi32(v8, DCT_CONST_BITS); - u9 = _mm_srai_epi32(v9, DCT_CONST_BITS); - u10 = _mm_srai_epi32(v10, DCT_CONST_BITS); - u11 = _mm_srai_epi32(v11, DCT_CONST_BITS); - u12 = _mm_srai_epi32(v12, DCT_CONST_BITS); - u13 = _mm_srai_epi32(v13, DCT_CONST_BITS); - u14 = _mm_srai_epi32(v14, DCT_CONST_BITS); - u15 = _mm_srai_epi32(v15, DCT_CONST_BITS); - - // back to 16-bit and pack 8 integers into __m128i - in[0] = _mm_packs_epi32(u0, u1); - in[1] = _mm_packs_epi32(u2, u3); - in[2] = _mm_packs_epi32(u4, u5); - in[3] = _mm_packs_epi32(u6, u7); - in[4] = _mm_packs_epi32(u8, u9); - in[5] = _mm_packs_epi32(u10, u11); - in[6] = _mm_packs_epi32(u12, u13); - in[7] = _mm_packs_epi32(u14, u15); - - // stage 2 - s0 = _mm_add_epi16(in[0], in[2]); - s1 = _mm_add_epi16(in[1], in[3]); - s2 = _mm_sub_epi16(in[0], in[2]); - s3 = _mm_sub_epi16(in[1], in[3]); - u0 = _mm_unpacklo_epi16(in[4], in[5]); - u1 = _mm_unpackhi_epi16(in[4], in[5]); - u2 = _mm_unpacklo_epi16(in[6], in[7]); - u3 = _mm_unpackhi_epi16(in[6], in[7]); - - v0 = _mm_madd_epi16(u0, k__cospi_p08_p24); - v1 = _mm_madd_epi16(u1, k__cospi_p08_p24); - v2 = _mm_madd_epi16(u0, k__cospi_p24_m08); - v3 = _mm_madd_epi16(u1, k__cospi_p24_m08); - v4 = _mm_madd_epi16(u2, k__cospi_m24_p08); - v5 = _mm_madd_epi16(u3, k__cospi_m24_p08); - v6 = _mm_madd_epi16(u2, k__cospi_p08_p24); - v7 = _mm_madd_epi16(u3, k__cospi_p08_p24); - - w0 = _mm_add_epi32(v0, v4); - w1 = _mm_add_epi32(v1, v5); - w2 = _mm_add_epi32(v2, v6); - w3 = _mm_add_epi32(v3, v7); - w4 = _mm_sub_epi32(v0, v4); - w5 = _mm_sub_epi32(v1, v5); - w6 = _mm_sub_epi32(v2, v6); - w7 = _mm_sub_epi32(v3, v7); - - v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); - v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); - v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); - v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); - v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); - v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); - v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); - v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); - - u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); - u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); - u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); - u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - - // back to 16-bit intergers - s4 = _mm_packs_epi32(u0, u1); - s5 = _mm_packs_epi32(u2, u3); - s6 = _mm_packs_epi32(u4, u5); - s7 = _mm_packs_epi32(u6, u7); - - // stage 3 - u0 = _mm_unpacklo_epi16(s2, s3); - u1 = _mm_unpackhi_epi16(s2, s3); - u2 = _mm_unpacklo_epi16(s6, s7); - u3 = _mm_unpackhi_epi16(s6, s7); - - v0 = _mm_madd_epi16(u0, k__cospi_p16_p16); - v1 = _mm_madd_epi16(u1, k__cospi_p16_p16); - v2 = _mm_madd_epi16(u0, k__cospi_p16_m16); - v3 = _mm_madd_epi16(u1, k__cospi_p16_m16); - v4 = _mm_madd_epi16(u2, k__cospi_p16_p16); - v5 = _mm_madd_epi16(u3, k__cospi_p16_p16); - v6 = _mm_madd_epi16(u2, k__cospi_p16_m16); - v7 = _mm_madd_epi16(u3, k__cospi_p16_m16); - - u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING); - u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING); - u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING); - u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING); - u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING); - u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING); - u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING); - u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING); - - v0 = _mm_srai_epi32(u0, DCT_CONST_BITS); - v1 = _mm_srai_epi32(u1, DCT_CONST_BITS); - v2 = _mm_srai_epi32(u2, DCT_CONST_BITS); - v3 = _mm_srai_epi32(u3, DCT_CONST_BITS); - v4 = _mm_srai_epi32(u4, DCT_CONST_BITS); - v5 = _mm_srai_epi32(u5, DCT_CONST_BITS); - v6 = _mm_srai_epi32(u6, DCT_CONST_BITS); - v7 = _mm_srai_epi32(u7, DCT_CONST_BITS); - - s2 = _mm_packs_epi32(v0, v1); - s3 = _mm_packs_epi32(v2, v3); - s6 = _mm_packs_epi32(v4, v5); - s7 = _mm_packs_epi32(v6, v7); - - in[0] = s0; - in[1] = _mm_sub_epi16(k__const_0, s4); - in[2] = s6; - in[3] = _mm_sub_epi16(k__const_0, s2); - in[4] = s3; - in[5] = _mm_sub_epi16(k__const_0, s7); - in[6] = s5; - in[7] = _mm_sub_epi16(k__const_0, s1); -} - -void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i zero = _mm_setzero_si128(); - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 4); - const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i stg3_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in0, in1, in2, in3, in4, in5, in6, in7; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - - // Rows. Load 4-row input data. - in0 = load_input_data(input); - in1 = load_input_data(input + 8 * 1); - in2 = load_input_data(input + 8 * 2); - in3 = load_input_data(input + 8 * 3); - - // 8x4 Transpose - TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1); - // Stage1 - { - const __m128i lo_17 = _mm_unpackhi_epi16(in0, zero); - const __m128i lo_35 = _mm_unpackhi_epi16(in1, zero); - - tmp0 = _mm_madd_epi16(lo_17, stg1_0); - tmp2 = _mm_madd_epi16(lo_17, stg1_1); - tmp4 = _mm_madd_epi16(lo_35, stg1_2); - tmp6 = _mm_madd_epi16(lo_35, stg1_3); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp1_4 = _mm_packs_epi32(tmp0, tmp2); - stp1_5 = _mm_packs_epi32(tmp4, tmp6); - } - - // Stage2 - { - const __m128i lo_04 = _mm_unpacklo_epi16(in0, zero); - const __m128i lo_26 = _mm_unpacklo_epi16(in1, zero); - - tmp0 = _mm_madd_epi16(lo_04, stg2_0); - tmp2 = _mm_madd_epi16(lo_04, stg2_1); - tmp4 = _mm_madd_epi16(lo_26, stg2_2); - tmp6 = _mm_madd_epi16(lo_26, stg2_3); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp2_0 = _mm_packs_epi32(tmp0, tmp2); - stp2_2 = _mm_packs_epi32(tmp6, tmp4); - - tmp0 = _mm_adds_epi16(stp1_4, stp1_5); - tmp1 = _mm_subs_epi16(stp1_4, stp1_5); - - stp2_4 = tmp0; - stp2_5 = _mm_unpacklo_epi64(tmp1, zero); - stp2_6 = _mm_unpackhi_epi64(tmp1, zero); - } - - // Stage3 - { - const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6); - - tmp4 = _mm_adds_epi16(stp2_0, stp2_2); - tmp6 = _mm_subs_epi16(stp2_0, stp2_2); - - stp1_2 = _mm_unpackhi_epi64(tmp6, tmp4); - stp1_3 = _mm_unpacklo_epi64(tmp6, tmp4); - - tmp0 = _mm_madd_epi16(lo_56, stg3_0); - tmp2 = _mm_madd_epi16(lo_56, stg2_0); // stg3_1 = stg2_0 - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - - stp1_5 = _mm_packs_epi32(tmp0, tmp2); - } - - // Stage4 - tmp0 = _mm_adds_epi16(stp1_3, stp2_4); - tmp1 = _mm_adds_epi16(stp1_2, stp1_5); - tmp2 = _mm_subs_epi16(stp1_3, stp2_4); - tmp3 = _mm_subs_epi16(stp1_2, stp1_5); - - TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3) - - IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, - in0, in1, in2, in3, in4, in5, in6, in7); - // Final rounding and shift - in0 = _mm_adds_epi16(in0, final_rounding); - in1 = _mm_adds_epi16(in1, final_rounding); - in2 = _mm_adds_epi16(in2, final_rounding); - in3 = _mm_adds_epi16(in3, final_rounding); - in4 = _mm_adds_epi16(in4, final_rounding); - in5 = _mm_adds_epi16(in5, final_rounding); - in6 = _mm_adds_epi16(in6, final_rounding); - in7 = _mm_adds_epi16(in7, final_rounding); - - in0 = _mm_srai_epi16(in0, 5); - in1 = _mm_srai_epi16(in1, 5); - in2 = _mm_srai_epi16(in2, 5); - in3 = _mm_srai_epi16(in3, 5); - in4 = _mm_srai_epi16(in4, 5); - in5 = _mm_srai_epi16(in5, 5); - in6 = _mm_srai_epi16(in6, 5); - in7 = _mm_srai_epi16(in7, 5); - - RECON_AND_STORE(dest + 0 * stride, in0); - RECON_AND_STORE(dest + 1 * stride, in1); - RECON_AND_STORE(dest + 2 * stride, in2); - RECON_AND_STORE(dest + 3 * stride, in3); - RECON_AND_STORE(dest + 4 * stride, in4); - RECON_AND_STORE(dest + 5 * stride, in5); - RECON_AND_STORE(dest + 6 * stride, in6); - RECON_AND_STORE(dest + 7 * stride, in7); -} - -#define IDCT16 \ - /* Stage2 */ \ - { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \ - const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \ - const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \ - const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \ - const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, \ - stg2_0, stg2_1, stg2_2, stg2_3, \ - stp2_8, stp2_15, stp2_9, stp2_14) \ - \ - MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, \ - stg2_4, stg2_5, stg2_6, stg2_7, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \ - const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \ - const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \ - \ - MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, \ - stg3_0, stg3_1, stg3_2, stg3_3, \ - stp1_4, stp1_7, stp1_5, stp1_6) \ - \ - stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \ - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ - \ - stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \ - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ - } \ - \ - /* Stage4 */ \ - { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \ - const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \ - const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, \ - stg4_0, stg4_1, stg4_2, stg4_3, \ - stp2_0, stp2_1, stp2_2, stp2_3) \ - \ - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ - stg4_4, stg4_5, stg4_6, stg4_7, \ - stp2_9, stp2_14, stp2_10, stp2_13) \ - } \ - \ - /* Stage5 */ \ - { \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ - \ - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ - } \ - \ - /* Stage6 */ \ - { \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } - -#define IDCT16_10 \ - /* Stage2 */ \ - { \ - const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \ - const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \ - const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \ - const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, \ - stg2_0, stg2_1, stg2_6, stg2_7, \ - stp1_8_0, stp1_15, stp1_11, stp1_12_0) \ - } \ - \ - /* Stage3 */ \ - { \ - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \ - const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \ - \ - MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, \ - stg3_0, stg3_1, \ - stp2_4, stp2_7) \ - \ - stp1_9 = stp1_8_0; \ - stp1_10 = stp1_11; \ - \ - stp1_13 = stp1_12_0; \ - stp1_14 = stp1_15; \ - } \ - \ - /* Stage4 */ \ - { \ - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \ - const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, \ - stg4_0, stg4_1, \ - stp1_0, stp1_1) \ - stp2_5 = stp2_4; \ - stp2_6 = stp2_7; \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, \ - stg4_4, stg4_5, stg4_6, stg4_7, \ - stp2_9, stp2_14, stp2_10, stp2_13) \ - } \ - \ - /* Stage5 */ \ - { \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - \ - stp1_2 = stp1_1; \ - stp1_3 = stp1_0; \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \ - \ - stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \ - } \ - \ - /* Stage6 */ \ - { \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, \ - stp2_10, stp2_13, stp2_11, stp2_12) \ - } - -void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 5); - const __m128i zero = _mm_setzero_si128(); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in[16], l[16], r[16], *curr1; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - - curr1 = l; - for (i = 0; i < 2; i++) { - // 1-D idct - - // Load input data. - in[0] = load_input_data(input); - in[8] = load_input_data(input + 8 * 1); - in[1] = load_input_data(input + 8 * 2); - in[9] = load_input_data(input + 8 * 3); - in[2] = load_input_data(input + 8 * 4); - in[10] = load_input_data(input + 8 * 5); - in[3] = load_input_data(input + 8 * 6); - in[11] = load_input_data(input + 8 * 7); - in[4] = load_input_data(input + 8 * 8); - in[12] = load_input_data(input + 8 * 9); - in[5] = load_input_data(input + 8 * 10); - in[13] = load_input_data(input + 8 * 11); - in[6] = load_input_data(input + 8 * 12); - in[14] = load_input_data(input + 8 * 13); - in[7] = load_input_data(input + 8 * 14); - in[15] = load_input_data(input + 8 * 15); - - array_transpose_8x8(in, in); - array_transpose_8x8(in + 8, in + 8); - - IDCT16 - - // Stage7 - curr1[0] = _mm_add_epi16(stp2_0, stp1_15); - curr1[1] = _mm_add_epi16(stp2_1, stp1_14); - curr1[2] = _mm_add_epi16(stp2_2, stp2_13); - curr1[3] = _mm_add_epi16(stp2_3, stp2_12); - curr1[4] = _mm_add_epi16(stp2_4, stp2_11); - curr1[5] = _mm_add_epi16(stp2_5, stp2_10); - curr1[6] = _mm_add_epi16(stp2_6, stp1_9); - curr1[7] = _mm_add_epi16(stp2_7, stp1_8); - curr1[8] = _mm_sub_epi16(stp2_7, stp1_8); - curr1[9] = _mm_sub_epi16(stp2_6, stp1_9); - curr1[10] = _mm_sub_epi16(stp2_5, stp2_10); - curr1[11] = _mm_sub_epi16(stp2_4, stp2_11); - curr1[12] = _mm_sub_epi16(stp2_3, stp2_12); - curr1[13] = _mm_sub_epi16(stp2_2, stp2_13); - curr1[14] = _mm_sub_epi16(stp2_1, stp1_14); - curr1[15] = _mm_sub_epi16(stp2_0, stp1_15); - - curr1 = r; - input += 128; - } - for (i = 0; i < 2; i++) { - int j; - // 1-D idct - array_transpose_8x8(l + i * 8, in); - array_transpose_8x8(r + i * 8, in + 8); - - IDCT16 - - // 2-D - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); - - for (j = 0; j < 16; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a, i; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 6); - - dc_value = _mm_set1_epi16(a); - - for (i = 0; i < 16; ++i) { - RECON_AND_STORE(dest + 0, dc_value); - RECON_AND_STORE(dest + 8, dc_value); - dest += stride; - } -} - -static void iadst16_8col(__m128i *in) { - // perform 16x16 1-D ADST for 8 columns - __m128i s[16], x[16], u[32], v[32]; - const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); - const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); - const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); - const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); - const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); - const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64); - const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64); - const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64); - const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64); - const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64); - const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64); - const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64); - const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64); - const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); - const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); - const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i kZero = _mm_set1_epi16(0); - - u[0] = _mm_unpacklo_epi16(in[15], in[0]); - u[1] = _mm_unpackhi_epi16(in[15], in[0]); - u[2] = _mm_unpacklo_epi16(in[13], in[2]); - u[3] = _mm_unpackhi_epi16(in[13], in[2]); - u[4] = _mm_unpacklo_epi16(in[11], in[4]); - u[5] = _mm_unpackhi_epi16(in[11], in[4]); - u[6] = _mm_unpacklo_epi16(in[9], in[6]); - u[7] = _mm_unpackhi_epi16(in[9], in[6]); - u[8] = _mm_unpacklo_epi16(in[7], in[8]); - u[9] = _mm_unpackhi_epi16(in[7], in[8]); - u[10] = _mm_unpacklo_epi16(in[5], in[10]); - u[11] = _mm_unpackhi_epi16(in[5], in[10]); - u[12] = _mm_unpacklo_epi16(in[3], in[12]); - u[13] = _mm_unpackhi_epi16(in[3], in[12]); - u[14] = _mm_unpacklo_epi16(in[1], in[14]); - u[15] = _mm_unpackhi_epi16(in[1], in[14]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31); - v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31); - v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01); - v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01); - v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27); - v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27); - v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05); - v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05); - v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23); - v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23); - v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09); - v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09); - v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19); - v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19); - v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13); - v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13); - v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15); - v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15); - v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17); - v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17); - v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11); - v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11); - v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21); - v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21); - v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07); - v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07); - v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25); - v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25); - v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03); - v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03); - v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29); - v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29); - - u[0] = _mm_add_epi32(v[0], v[16]); - u[1] = _mm_add_epi32(v[1], v[17]); - u[2] = _mm_add_epi32(v[2], v[18]); - u[3] = _mm_add_epi32(v[3], v[19]); - u[4] = _mm_add_epi32(v[4], v[20]); - u[5] = _mm_add_epi32(v[5], v[21]); - u[6] = _mm_add_epi32(v[6], v[22]); - u[7] = _mm_add_epi32(v[7], v[23]); - u[8] = _mm_add_epi32(v[8], v[24]); - u[9] = _mm_add_epi32(v[9], v[25]); - u[10] = _mm_add_epi32(v[10], v[26]); - u[11] = _mm_add_epi32(v[11], v[27]); - u[12] = _mm_add_epi32(v[12], v[28]); - u[13] = _mm_add_epi32(v[13], v[29]); - u[14] = _mm_add_epi32(v[14], v[30]); - u[15] = _mm_add_epi32(v[15], v[31]); - u[16] = _mm_sub_epi32(v[0], v[16]); - u[17] = _mm_sub_epi32(v[1], v[17]); - u[18] = _mm_sub_epi32(v[2], v[18]); - u[19] = _mm_sub_epi32(v[3], v[19]); - u[20] = _mm_sub_epi32(v[4], v[20]); - u[21] = _mm_sub_epi32(v[5], v[21]); - u[22] = _mm_sub_epi32(v[6], v[22]); - u[23] = _mm_sub_epi32(v[7], v[23]); - u[24] = _mm_sub_epi32(v[8], v[24]); - u[25] = _mm_sub_epi32(v[9], v[25]); - u[26] = _mm_sub_epi32(v[10], v[26]); - u[27] = _mm_sub_epi32(v[11], v[27]); - u[28] = _mm_sub_epi32(v[12], v[28]); - u[29] = _mm_sub_epi32(v[13], v[29]); - u[30] = _mm_sub_epi32(v[14], v[30]); - u[31] = _mm_sub_epi32(v[15], v[31]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING); - v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING); - v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING); - v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING); - v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING); - v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING); - v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING); - v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING); - v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING); - v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING); - v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING); - v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING); - v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING); - v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING); - v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING); - v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS); - u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS); - u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS); - u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS); - u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS); - u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS); - u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS); - u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS); - u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS); - u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS); - u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS); - u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS); - u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS); - u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS); - u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS); - u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS); - - s[0] = _mm_packs_epi32(u[0], u[1]); - s[1] = _mm_packs_epi32(u[2], u[3]); - s[2] = _mm_packs_epi32(u[4], u[5]); - s[3] = _mm_packs_epi32(u[6], u[7]); - s[4] = _mm_packs_epi32(u[8], u[9]); - s[5] = _mm_packs_epi32(u[10], u[11]); - s[6] = _mm_packs_epi32(u[12], u[13]); - s[7] = _mm_packs_epi32(u[14], u[15]); - s[8] = _mm_packs_epi32(u[16], u[17]); - s[9] = _mm_packs_epi32(u[18], u[19]); - s[10] = _mm_packs_epi32(u[20], u[21]); - s[11] = _mm_packs_epi32(u[22], u[23]); - s[12] = _mm_packs_epi32(u[24], u[25]); - s[13] = _mm_packs_epi32(u[26], u[27]); - s[14] = _mm_packs_epi32(u[28], u[29]); - s[15] = _mm_packs_epi32(u[30], u[31]); - - // stage 2 - u[0] = _mm_unpacklo_epi16(s[8], s[9]); - u[1] = _mm_unpackhi_epi16(s[8], s[9]); - u[2] = _mm_unpacklo_epi16(s[10], s[11]); - u[3] = _mm_unpackhi_epi16(s[10], s[11]); - u[4] = _mm_unpacklo_epi16(s[12], s[13]); - u[5] = _mm_unpackhi_epi16(s[12], s[13]); - u[6] = _mm_unpacklo_epi16(s[14], s[15]); - u[7] = _mm_unpackhi_epi16(s[14], s[15]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28); - v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28); - v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04); - v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04); - v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12); - v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12); - v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20); - v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20); - v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04); - v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04); - v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28); - v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28); - v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20); - v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20); - v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12); - v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12); - - u[0] = _mm_add_epi32(v[0], v[8]); - u[1] = _mm_add_epi32(v[1], v[9]); - u[2] = _mm_add_epi32(v[2], v[10]); - u[3] = _mm_add_epi32(v[3], v[11]); - u[4] = _mm_add_epi32(v[4], v[12]); - u[5] = _mm_add_epi32(v[5], v[13]); - u[6] = _mm_add_epi32(v[6], v[14]); - u[7] = _mm_add_epi32(v[7], v[15]); - u[8] = _mm_sub_epi32(v[0], v[8]); - u[9] = _mm_sub_epi32(v[1], v[9]); - u[10] = _mm_sub_epi32(v[2], v[10]); - u[11] = _mm_sub_epi32(v[3], v[11]); - u[12] = _mm_sub_epi32(v[4], v[12]); - u[13] = _mm_sub_epi32(v[5], v[13]); - u[14] = _mm_sub_epi32(v[6], v[14]); - u[15] = _mm_sub_epi32(v[7], v[15]); - - v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); - - x[0] = _mm_add_epi16(s[0], s[4]); - x[1] = _mm_add_epi16(s[1], s[5]); - x[2] = _mm_add_epi16(s[2], s[6]); - x[3] = _mm_add_epi16(s[3], s[7]); - x[4] = _mm_sub_epi16(s[0], s[4]); - x[5] = _mm_sub_epi16(s[1], s[5]); - x[6] = _mm_sub_epi16(s[2], s[6]); - x[7] = _mm_sub_epi16(s[3], s[7]); - x[8] = _mm_packs_epi32(u[0], u[1]); - x[9] = _mm_packs_epi32(u[2], u[3]); - x[10] = _mm_packs_epi32(u[4], u[5]); - x[11] = _mm_packs_epi32(u[6], u[7]); - x[12] = _mm_packs_epi32(u[8], u[9]); - x[13] = _mm_packs_epi32(u[10], u[11]); - x[14] = _mm_packs_epi32(u[12], u[13]); - x[15] = _mm_packs_epi32(u[14], u[15]); - - // stage 3 - u[0] = _mm_unpacklo_epi16(x[4], x[5]); - u[1] = _mm_unpackhi_epi16(x[4], x[5]); - u[2] = _mm_unpacklo_epi16(x[6], x[7]); - u[3] = _mm_unpackhi_epi16(x[6], x[7]); - u[4] = _mm_unpacklo_epi16(x[12], x[13]); - u[5] = _mm_unpackhi_epi16(x[12], x[13]); - u[6] = _mm_unpacklo_epi16(x[14], x[15]); - u[7] = _mm_unpackhi_epi16(x[14], x[15]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); - v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); - v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); - v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); - v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); - v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); - v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); - v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24); - v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24); - v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08); - v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08); - v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08); - v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08); - v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24); - v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24); - - u[0] = _mm_add_epi32(v[0], v[4]); - u[1] = _mm_add_epi32(v[1], v[5]); - u[2] = _mm_add_epi32(v[2], v[6]); - u[3] = _mm_add_epi32(v[3], v[7]); - u[4] = _mm_sub_epi32(v[0], v[4]); - u[5] = _mm_sub_epi32(v[1], v[5]); - u[6] = _mm_sub_epi32(v[2], v[6]); - u[7] = _mm_sub_epi32(v[3], v[7]); - u[8] = _mm_add_epi32(v[8], v[12]); - u[9] = _mm_add_epi32(v[9], v[13]); - u[10] = _mm_add_epi32(v[10], v[14]); - u[11] = _mm_add_epi32(v[11], v[15]); - u[12] = _mm_sub_epi32(v[8], v[12]); - u[13] = _mm_sub_epi32(v[9], v[13]); - u[14] = _mm_sub_epi32(v[10], v[14]); - u[15] = _mm_sub_epi32(v[11], v[15]); - - u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[0] = _mm_add_epi16(x[0], x[2]); - s[1] = _mm_add_epi16(x[1], x[3]); - s[2] = _mm_sub_epi16(x[0], x[2]); - s[3] = _mm_sub_epi16(x[1], x[3]); - s[4] = _mm_packs_epi32(v[0], v[1]); - s[5] = _mm_packs_epi32(v[2], v[3]); - s[6] = _mm_packs_epi32(v[4], v[5]); - s[7] = _mm_packs_epi32(v[6], v[7]); - s[8] = _mm_add_epi16(x[8], x[10]); - s[9] = _mm_add_epi16(x[9], x[11]); - s[10] = _mm_sub_epi16(x[8], x[10]); - s[11] = _mm_sub_epi16(x[9], x[11]); - s[12] = _mm_packs_epi32(v[8], v[9]); - s[13] = _mm_packs_epi32(v[10], v[11]); - s[14] = _mm_packs_epi32(v[12], v[13]); - s[15] = _mm_packs_epi32(v[14], v[15]); - - // stage 4 - u[0] = _mm_unpacklo_epi16(s[2], s[3]); - u[1] = _mm_unpackhi_epi16(s[2], s[3]); - u[2] = _mm_unpacklo_epi16(s[6], s[7]); - u[3] = _mm_unpackhi_epi16(s[6], s[7]); - u[4] = _mm_unpacklo_epi16(s[10], s[11]); - u[5] = _mm_unpackhi_epi16(s[10], s[11]); - u[6] = _mm_unpacklo_epi16(s[14], s[15]); - u[7] = _mm_unpackhi_epi16(s[14], s[15]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); - v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16); - v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16); - v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16); - v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16); - v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16); - v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16); - v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16); - v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16); - v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16); - v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16); - v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16); - v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - in[0] = s[0]; - in[1] = _mm_sub_epi16(kZero, s[8]); - in[2] = s[12]; - in[3] = _mm_sub_epi16(kZero, s[4]); - in[4] = _mm_packs_epi32(v[4], v[5]); - in[5] = _mm_packs_epi32(v[12], v[13]); - in[6] = _mm_packs_epi32(v[8], v[9]); - in[7] = _mm_packs_epi32(v[0], v[1]); - in[8] = _mm_packs_epi32(v[2], v[3]); - in[9] = _mm_packs_epi32(v[10], v[11]); - in[10] = _mm_packs_epi32(v[14], v[15]); - in[11] = _mm_packs_epi32(v[6], v[7]); - in[12] = s[5]; - in[13] = _mm_sub_epi16(kZero, s[13]); - in[14] = s[9]; - in[15] = _mm_sub_epi16(kZero, s[1]); -} - -static void idct16_8col(__m128i *in) { - const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); - const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); - const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); - __m128i v[16], u[16], s[16], t[16]; - - // stage 1 - s[0] = in[0]; - s[1] = in[8]; - s[2] = in[4]; - s[3] = in[12]; - s[4] = in[2]; - s[5] = in[10]; - s[6] = in[6]; - s[7] = in[14]; - s[8] = in[1]; - s[9] = in[9]; - s[10] = in[5]; - s[11] = in[13]; - s[12] = in[3]; - s[13] = in[11]; - s[14] = in[7]; - s[15] = in[15]; - - // stage 2 - u[0] = _mm_unpacklo_epi16(s[8], s[15]); - u[1] = _mm_unpackhi_epi16(s[8], s[15]); - u[2] = _mm_unpacklo_epi16(s[9], s[14]); - u[3] = _mm_unpackhi_epi16(s[9], s[14]); - u[4] = _mm_unpacklo_epi16(s[10], s[13]); - u[5] = _mm_unpackhi_epi16(s[10], s[13]); - u[6] = _mm_unpacklo_epi16(s[11], s[12]); - u[7] = _mm_unpackhi_epi16(s[11], s[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02); - v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02); - v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30); - v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30); - v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18); - v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18); - v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14); - v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14); - v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10); - v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10); - v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22); - v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22); - v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26); - v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26); - v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06); - v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[8] = _mm_packs_epi32(u[0], u[1]); - s[15] = _mm_packs_epi32(u[2], u[3]); - s[9] = _mm_packs_epi32(u[4], u[5]); - s[14] = _mm_packs_epi32(u[6], u[7]); - s[10] = _mm_packs_epi32(u[8], u[9]); - s[13] = _mm_packs_epi32(u[10], u[11]); - s[11] = _mm_packs_epi32(u[12], u[13]); - s[12] = _mm_packs_epi32(u[14], u[15]); - - // stage 3 - t[0] = s[0]; - t[1] = s[1]; - t[2] = s[2]; - t[3] = s[3]; - u[0] = _mm_unpacklo_epi16(s[4], s[7]); - u[1] = _mm_unpackhi_epi16(s[4], s[7]); - u[2] = _mm_unpacklo_epi16(s[5], s[6]); - u[3] = _mm_unpackhi_epi16(s[5], s[6]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04); - v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04); - v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28); - v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28); - v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20); - v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20); - v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12); - v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - t[4] = _mm_packs_epi32(u[0], u[1]); - t[7] = _mm_packs_epi32(u[2], u[3]); - t[5] = _mm_packs_epi32(u[4], u[5]); - t[6] = _mm_packs_epi32(u[6], u[7]); - t[8] = _mm_add_epi16(s[8], s[9]); - t[9] = _mm_sub_epi16(s[8], s[9]); - t[10] = _mm_sub_epi16(s[11], s[10]); - t[11] = _mm_add_epi16(s[10], s[11]); - t[12] = _mm_add_epi16(s[12], s[13]); - t[13] = _mm_sub_epi16(s[12], s[13]); - t[14] = _mm_sub_epi16(s[15], s[14]); - t[15] = _mm_add_epi16(s[14], s[15]); - - // stage 4 - u[0] = _mm_unpacklo_epi16(t[0], t[1]); - u[1] = _mm_unpackhi_epi16(t[0], t[1]); - u[2] = _mm_unpacklo_epi16(t[2], t[3]); - u[3] = _mm_unpackhi_epi16(t[2], t[3]); - u[4] = _mm_unpacklo_epi16(t[9], t[14]); - u[5] = _mm_unpackhi_epi16(t[9], t[14]); - u[6] = _mm_unpacklo_epi16(t[10], t[13]); - u[7] = _mm_unpackhi_epi16(t[10], t[13]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); - v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08); - v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08); - v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); - v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); - v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24); - v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24); - v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08); - v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08); - v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08); - v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08); - v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24); - v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); - u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); - u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); - u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); - u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); - u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); - u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); - u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); - u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); - u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); - u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); - u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); - u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); - u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); - u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); - - s[0] = _mm_packs_epi32(u[0], u[1]); - s[1] = _mm_packs_epi32(u[2], u[3]); - s[2] = _mm_packs_epi32(u[4], u[5]); - s[3] = _mm_packs_epi32(u[6], u[7]); - s[4] = _mm_add_epi16(t[4], t[5]); - s[5] = _mm_sub_epi16(t[4], t[5]); - s[6] = _mm_sub_epi16(t[7], t[6]); - s[7] = _mm_add_epi16(t[6], t[7]); - s[8] = t[8]; - s[15] = t[15]; - s[9] = _mm_packs_epi32(u[8], u[9]); - s[14] = _mm_packs_epi32(u[10], u[11]); - s[10] = _mm_packs_epi32(u[12], u[13]); - s[13] = _mm_packs_epi32(u[14], u[15]); - s[11] = t[11]; - s[12] = t[12]; - - // stage 5 - t[0] = _mm_add_epi16(s[0], s[3]); - t[1] = _mm_add_epi16(s[1], s[2]); - t[2] = _mm_sub_epi16(s[1], s[2]); - t[3] = _mm_sub_epi16(s[0], s[3]); - t[4] = s[4]; - t[7] = s[7]; - - u[0] = _mm_unpacklo_epi16(s[5], s[6]); - u[1] = _mm_unpackhi_epi16(s[5], s[6]); - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - t[5] = _mm_packs_epi32(u[0], u[1]); - t[6] = _mm_packs_epi32(u[2], u[3]); - - t[8] = _mm_add_epi16(s[8], s[11]); - t[9] = _mm_add_epi16(s[9], s[10]); - t[10] = _mm_sub_epi16(s[9], s[10]); - t[11] = _mm_sub_epi16(s[8], s[11]); - t[12] = _mm_sub_epi16(s[15], s[12]); - t[13] = _mm_sub_epi16(s[14], s[13]); - t[14] = _mm_add_epi16(s[13], s[14]); - t[15] = _mm_add_epi16(s[12], s[15]); - - // stage 6 - s[0] = _mm_add_epi16(t[0], t[7]); - s[1] = _mm_add_epi16(t[1], t[6]); - s[2] = _mm_add_epi16(t[2], t[5]); - s[3] = _mm_add_epi16(t[3], t[4]); - s[4] = _mm_sub_epi16(t[3], t[4]); - s[5] = _mm_sub_epi16(t[2], t[5]); - s[6] = _mm_sub_epi16(t[1], t[6]); - s[7] = _mm_sub_epi16(t[0], t[7]); - s[8] = t[8]; - s[9] = t[9]; - - u[0] = _mm_unpacklo_epi16(t[10], t[13]); - u[1] = _mm_unpackhi_epi16(t[10], t[13]); - u[2] = _mm_unpacklo_epi16(t[11], t[12]); - u[3] = _mm_unpackhi_epi16(t[11], t[12]); - - v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); - v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); - v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); - v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); - v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16); - v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16); - v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16); - v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16); - - u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); - u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); - u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); - u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); - u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); - u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); - u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); - u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); - - u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); - u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); - u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); - u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); - u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); - u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); - u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); - u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); - - s[10] = _mm_packs_epi32(u[0], u[1]); - s[13] = _mm_packs_epi32(u[2], u[3]); - s[11] = _mm_packs_epi32(u[4], u[5]); - s[12] = _mm_packs_epi32(u[6], u[7]); - s[14] = t[14]; - s[15] = t[15]; - - // stage 7 - in[0] = _mm_add_epi16(s[0], s[15]); - in[1] = _mm_add_epi16(s[1], s[14]); - in[2] = _mm_add_epi16(s[2], s[13]); - in[3] = _mm_add_epi16(s[3], s[12]); - in[4] = _mm_add_epi16(s[4], s[11]); - in[5] = _mm_add_epi16(s[5], s[10]); - in[6] = _mm_add_epi16(s[6], s[9]); - in[7] = _mm_add_epi16(s[7], s[8]); - in[8] = _mm_sub_epi16(s[7], s[8]); - in[9] = _mm_sub_epi16(s[6], s[9]); - in[10] = _mm_sub_epi16(s[5], s[10]); - in[11] = _mm_sub_epi16(s[4], s[11]); - in[12] = _mm_sub_epi16(s[3], s[12]); - in[13] = _mm_sub_epi16(s[2], s[13]); - in[14] = _mm_sub_epi16(s[1], s[14]); - in[15] = _mm_sub_epi16(s[0], s[15]); -} - -void idct16_sse2(__m128i *in0, __m128i *in1) { - array_transpose_16x16(in0, in1); - idct16_8col(in0); - idct16_8col(in1); -} - -void iadst16_sse2(__m128i *in0, __m128i *in1) { - array_transpose_16x16(in0, in1); - iadst16_8col(in0); - iadst16_8col(in1); -} - -void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 5); - const __m128i zero = _mm_setzero_si128(); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - __m128i in[16], l[16]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_8_0, stp1_12_0; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - // First 1-D inverse DCT - // Load input data. - in[0] = load_input_data(input); - in[1] = load_input_data(input + 8 * 2); - in[2] = load_input_data(input + 8 * 4); - in[3] = load_input_data(input + 8 * 6); - - TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]); - - // Stage2 - { - const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero); - const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]); - - tmp0 = _mm_madd_epi16(lo_1_15, stg2_0); - tmp2 = _mm_madd_epi16(lo_1_15, stg2_1); - tmp5 = _mm_madd_epi16(lo_13_3, stg2_6); - tmp7 = _mm_madd_epi16(lo_13_3, stg2_7); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp5 = _mm_add_epi32(tmp5, rounding); - tmp7 = _mm_add_epi32(tmp7, rounding); - - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - - stp2_8 = _mm_packs_epi32(tmp0, tmp2); - stp2_11 = _mm_packs_epi32(tmp5, tmp7); - } - - // Stage3 - { - const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], zero); - - tmp0 = _mm_madd_epi16(lo_2_14, stg3_0); - tmp2 = _mm_madd_epi16(lo_2_14, stg3_1); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - - stp1_13 = _mm_unpackhi_epi64(stp2_11, zero); - stp1_14 = _mm_unpackhi_epi64(stp2_8, zero); - - stp1_4 = _mm_packs_epi32(tmp0, tmp2); - } - - // Stage4 - { - const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13); - - tmp0 = _mm_madd_epi16(lo_0_8, stg4_0); - tmp2 = _mm_madd_epi16(lo_0_8, stg4_1); - tmp1 = _mm_madd_epi16(lo_9_14, stg4_4); - tmp3 = _mm_madd_epi16(lo_9_14, stg4_5); - tmp5 = _mm_madd_epi16(lo_10_13, stg4_6); - tmp7 = _mm_madd_epi16(lo_10_13, stg4_7); - - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp5 = _mm_add_epi32(tmp5, rounding); - tmp7 = _mm_add_epi32(tmp7, rounding); - - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); - tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); - tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); - - stp1_0 = _mm_packs_epi32(tmp0, tmp0); - stp1_1 = _mm_packs_epi32(tmp2, tmp2); - stp2_9 = _mm_packs_epi32(tmp1, tmp3); - stp2_10 = _mm_packs_epi32(tmp5, tmp7); - - stp2_6 = _mm_unpackhi_epi64(stp1_4, zero); - } - - // Stage5 and Stage6 - { - tmp0 = _mm_add_epi16(stp2_8, stp2_11); - tmp1 = _mm_sub_epi16(stp2_8, stp2_11); - tmp2 = _mm_add_epi16(stp2_9, stp2_10); - tmp3 = _mm_sub_epi16(stp2_9, stp2_10); - - stp1_9 = _mm_unpacklo_epi64(tmp2, zero); - stp1_10 = _mm_unpacklo_epi64(tmp3, zero); - stp1_8 = _mm_unpacklo_epi64(tmp0, zero); - stp1_11 = _mm_unpacklo_epi64(tmp1, zero); - - stp1_13 = _mm_unpackhi_epi64(tmp3, zero); - stp1_14 = _mm_unpackhi_epi64(tmp2, zero); - stp1_12 = _mm_unpackhi_epi64(tmp1, zero); - stp1_15 = _mm_unpackhi_epi64(tmp0, zero); - } - - // Stage6 - { - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp1_4); - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); - - tmp1 = _mm_madd_epi16(lo_6_5, stg4_1); - tmp3 = _mm_madd_epi16(lo_6_5, stg4_0); - tmp0 = _mm_madd_epi16(lo_10_13, stg6_0); - tmp2 = _mm_madd_epi16(lo_10_13, stg4_0); - tmp4 = _mm_madd_epi16(lo_11_12, stg6_0); - tmp6 = _mm_madd_epi16(lo_11_12, stg4_0); - - tmp1 = _mm_add_epi32(tmp1, rounding); - tmp3 = _mm_add_epi32(tmp3, rounding); - tmp0 = _mm_add_epi32(tmp0, rounding); - tmp2 = _mm_add_epi32(tmp2, rounding); - tmp4 = _mm_add_epi32(tmp4, rounding); - tmp6 = _mm_add_epi32(tmp6, rounding); - - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); - tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); - tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); - - stp1_6 = _mm_packs_epi32(tmp3, tmp1); - - stp2_10 = _mm_packs_epi32(tmp0, zero); - stp2_13 = _mm_packs_epi32(tmp2, zero); - stp2_11 = _mm_packs_epi32(tmp4, zero); - stp2_12 = _mm_packs_epi32(tmp6, zero); - - tmp0 = _mm_add_epi16(stp1_0, stp1_4); - tmp1 = _mm_sub_epi16(stp1_0, stp1_4); - tmp2 = _mm_add_epi16(stp1_1, stp1_6); - tmp3 = _mm_sub_epi16(stp1_1, stp1_6); - - stp2_0 = _mm_unpackhi_epi64(tmp0, zero); - stp2_1 = _mm_unpacklo_epi64(tmp2, zero); - stp2_2 = _mm_unpackhi_epi64(tmp2, zero); - stp2_3 = _mm_unpacklo_epi64(tmp0, zero); - stp2_4 = _mm_unpacklo_epi64(tmp1, zero); - stp2_5 = _mm_unpackhi_epi64(tmp3, zero); - stp2_6 = _mm_unpacklo_epi64(tmp3, zero); - stp2_7 = _mm_unpackhi_epi64(tmp1, zero); - } - - // Stage7. Left 8x16 only. - l[0] = _mm_add_epi16(stp2_0, stp1_15); - l[1] = _mm_add_epi16(stp2_1, stp1_14); - l[2] = _mm_add_epi16(stp2_2, stp2_13); - l[3] = _mm_add_epi16(stp2_3, stp2_12); - l[4] = _mm_add_epi16(stp2_4, stp2_11); - l[5] = _mm_add_epi16(stp2_5, stp2_10); - l[6] = _mm_add_epi16(stp2_6, stp1_9); - l[7] = _mm_add_epi16(stp2_7, stp1_8); - l[8] = _mm_sub_epi16(stp2_7, stp1_8); - l[9] = _mm_sub_epi16(stp2_6, stp1_9); - l[10] = _mm_sub_epi16(stp2_5, stp2_10); - l[11] = _mm_sub_epi16(stp2_4, stp2_11); - l[12] = _mm_sub_epi16(stp2_3, stp2_12); - l[13] = _mm_sub_epi16(stp2_2, stp2_13); - l[14] = _mm_sub_epi16(stp2_1, stp1_14); - l[15] = _mm_sub_epi16(stp2_0, stp1_15); - - // Second 1-D inverse transform, performed per 8x16 block - for (i = 0; i < 2; i++) { - int j; - array_transpose_4X8(l + 8 * i, in); - - IDCT16_10 - - // Stage7 - in[0] = _mm_add_epi16(stp2_0, stp1_15); - in[1] = _mm_add_epi16(stp2_1, stp1_14); - in[2] = _mm_add_epi16(stp2_2, stp2_13); - in[3] = _mm_add_epi16(stp2_3, stp2_12); - in[4] = _mm_add_epi16(stp2_4, stp2_11); - in[5] = _mm_add_epi16(stp2_5, stp2_10); - in[6] = _mm_add_epi16(stp2_6, stp1_9); - in[7] = _mm_add_epi16(stp2_7, stp1_8); - in[8] = _mm_sub_epi16(stp2_7, stp1_8); - in[9] = _mm_sub_epi16(stp2_6, stp1_9); - in[10] = _mm_sub_epi16(stp2_5, stp2_10); - in[11] = _mm_sub_epi16(stp2_4, stp2_11); - in[12] = _mm_sub_epi16(stp2_3, stp2_12); - in[13] = _mm_sub_epi16(stp2_2, stp2_13); - in[14] = _mm_sub_epi16(stp2_1, stp1_14); - in[15] = _mm_sub_epi16(stp2_0, stp1_15); - - for (j = 0; j < 16; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -#define LOAD_DQCOEFF(reg, input) \ - { \ - reg = load_input_data(input); \ - input += 8; \ - } \ - -#define IDCT32_34 \ -/* Stage1 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \ - const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \ - \ - const __m128i lo_25_7= _mm_unpacklo_epi16(zero, in[7]); \ - const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]); \ - \ - const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero); \ - const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero); \ - \ - const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]); \ - const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]); \ - \ - MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, \ - stg1_1, stp1_16, stp1_31); \ - MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, \ - stg1_7, stp1_19, stp1_28); \ - MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, \ - stg1_9, stp1_20, stp1_27); \ - MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, \ - stg1_15, stp1_23, stp1_24); \ -} \ -\ -/* Stage2 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \ - const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \ - \ - const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]); \ - const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]); \ - \ - MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, \ - stg2_1, stp2_8, stp2_15); \ - MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, \ - stg2_7, stp2_11, stp2_12); \ - \ - stp2_16 = stp1_16; \ - stp2_19 = stp1_19; \ - \ - stp2_20 = stp1_20; \ - stp2_23 = stp1_23; \ - \ - stp2_24 = stp1_24; \ - stp2_27 = stp1_27; \ - \ - stp2_28 = stp1_28; \ - stp2_31 = stp1_31; \ -} \ -\ -/* Stage3 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \ - const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \ - \ - const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31); \ - const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27); \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24); \ - \ - MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, \ - stg3_1, stp1_4, stp1_7); \ - \ - stp1_8 = stp2_8; \ - stp1_11 = stp2_11; \ - stp1_12 = stp2_12; \ - stp1_15 = stp2_15; \ - \ - MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ - stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ - stp1_18, stp1_29) \ - MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ - stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ - stp1_22, stp1_25) \ - \ - stp1_16 = stp2_16; \ - stp1_31 = stp2_31; \ - stp1_19 = stp2_19; \ - stp1_20 = stp2_20; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_27 = stp2_27; \ - stp1_28 = stp2_28; \ -} \ -\ -/* Stage4 */ \ -{ \ - const __m128i zero = _mm_setzero_si128();\ - const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \ - const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12); \ - \ - MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, \ - stg4_1, stp2_0, stp2_1); \ - \ - stp2_4 = stp1_4; \ - stp2_5 = stp1_4; \ - stp2_6 = stp1_7; \ - stp2_7 = stp1_7; \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ - stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ - stp2_10, stp2_13) \ - \ - stp2_8 = stp1_8; \ - stp2_15 = stp1_15; \ - stp2_11 = stp1_11; \ - stp2_12 = stp1_12; \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ - stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ - stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ - stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ - stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ - stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ - stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ - stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ - stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ -} \ -\ -/* Stage5 */ \ -{ \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ - const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - stp1_0 = stp2_0; \ - stp1_1 = stp2_1; \ - stp1_2 = stp2_1; \ - stp1_3 = stp2_0; \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_4 = stp2_4; \ - stp1_7 = stp2_7; \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - \ - MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ - stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ - stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - \ - stp1_22 = stp2_22; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_25 = stp2_25; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} \ -\ -/* Stage6 */ \ -{ \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ - \ - stp2_8 = stp1_8; \ - stp2_9 = stp1_9; \ - stp2_14 = stp1_14; \ - stp2_15 = stp1_15; \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ - stp2_13, stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ - stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ - stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ - stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ - \ - stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ - stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ - stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ - stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ -} \ -\ -/* Stage7 */ \ -{ \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ - const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ - stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ - stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ - stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ - stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ - stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ - stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ - stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ - stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ - stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - stp1_18 = stp2_18; \ - stp1_19 = stp2_19; \ - \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ - \ - stp1_28 = stp2_28; \ - stp1_29 = stp2_29; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} - - -#define IDCT32 \ -/* Stage1 */ \ -{ \ - const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \ - const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]); \ - const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]); \ - const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]); \ - \ - const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]); \ - const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]); \ - const __m128i lo_25_7= _mm_unpacklo_epi16(in[25], in[7]); \ - const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]); \ - \ - const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]); \ - const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]); \ - const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]); \ - const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]); \ - \ - const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]); \ - const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]); \ - const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]); \ - const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]); \ - \ - MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, \ - stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, \ - stp1_17, stp1_30) \ - MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, \ - stg1_5, stg1_6, stg1_7, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, \ - stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, \ - stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ -} \ -\ -/* Stage2 */ \ -{ \ - const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]); \ - const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]); \ - const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]); \ - const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]); \ - \ - const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]); \ - const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]); \ - const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]); \ - const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]); \ - \ - MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, \ - stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, \ - stp2_14) \ - MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, \ - stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, \ - stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_17); \ - stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); \ - stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_18); \ - \ - stp2_20 = _mm_add_epi16(stp1_20, stp1_21); \ - stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_22); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_25); \ - stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); \ - stp2_27 = _mm_add_epi16(stp1_27, stp1_26); \ - \ - stp2_28 = _mm_add_epi16(stp1_28, stp1_29); \ - stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); \ - stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_31, stp1_30); \ -} \ -\ -/* Stage3 */ \ -{ \ - const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]); \ - const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]); \ - const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]); \ - const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]); \ - \ - const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); \ - const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - \ - MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, \ - stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, \ - stp1_6) \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_9); \ - stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \ - stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \ - stp1_12 = _mm_add_epi16(stp2_12, stp2_13); \ - stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \ - \ - MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \ - stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, \ - stp1_18, stp1_29) \ - MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \ - stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, \ - stp1_22, stp1_25) \ - \ - stp1_16 = stp2_16; \ - stp1_31 = stp2_31; \ - stp1_19 = stp2_19; \ - stp1_20 = stp2_20; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_27 = stp2_27; \ - stp1_28 = stp2_28; \ -} \ -\ -/* Stage4 */ \ -{ \ - const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]); \ - const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]); \ - const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]); \ - const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]); \ - \ - const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \ - const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - \ - MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, \ - stg4_1, stg4_2, stg4_3, stp2_0, stp2_1, \ - stp2_2, stp2_3) \ - \ - stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \ - stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \ - stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \ - \ - MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \ - stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, \ - stp2_10, stp2_13) \ - \ - stp2_8 = stp1_8; \ - stp2_15 = stp1_15; \ - stp2_11 = stp1_11; \ - stp2_12 = stp1_12; \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \ - stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \ - stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \ - stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \ - stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \ - stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \ - \ - stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \ - stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \ - stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \ - stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \ - stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \ -} \ -\ -/* Stage5 */ \ -{ \ - const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \ - const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \ - const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \ - const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \ - \ - const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \ - const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \ - stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \ - stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \ - \ - tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \ - tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \ - tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \ - tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \ - \ - tmp0 = _mm_add_epi32(tmp0, rounding); \ - tmp1 = _mm_add_epi32(tmp1, rounding); \ - tmp2 = _mm_add_epi32(tmp2, rounding); \ - tmp3 = _mm_add_epi32(tmp3, rounding); \ - \ - tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \ - tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \ - tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \ - tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \ - \ - stp1_5 = _mm_packs_epi32(tmp0, tmp1); \ - stp1_6 = _mm_packs_epi32(tmp2, tmp3); \ - \ - stp1_4 = stp2_4; \ - stp1_7 = stp2_7; \ - \ - stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \ - stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \ - stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \ - stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \ - stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - \ - MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \ - stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, \ - stp1_19, stp1_28) \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \ - stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - \ - stp1_22 = stp2_22; \ - stp1_23 = stp2_23; \ - stp1_24 = stp2_24; \ - stp1_25 = stp2_25; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} \ -\ -/* Stage6 */ \ -{ \ - const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \ - const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \ - const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \ - const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \ - \ - stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \ - stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \ - stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \ - stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \ - stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \ - stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \ - stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \ - stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \ - \ - stp2_8 = stp1_8; \ - stp2_9 = stp1_9; \ - stp2_14 = stp1_14; \ - stp2_15 = stp1_15; \ - \ - MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, \ - stg6_0, stg4_0, stg6_0, stg4_0, stp2_10, \ - stp2_13, stp2_11, stp2_12) \ - \ - stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \ - stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \ - stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \ - stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \ - stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \ - stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \ - stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \ - stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \ - \ - stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \ - stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \ - stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \ - stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \ - stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \ - stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \ - stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \ - stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \ -} \ -\ -/* Stage7 */ \ -{ \ - const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \ - const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \ - const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \ - const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \ - \ - const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \ - const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \ - const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \ - const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \ - \ - stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \ - stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \ - stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \ - stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \ - stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \ - stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \ - stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \ - stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \ - stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \ - stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \ - stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \ - stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \ - stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \ - stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \ - stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \ - stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \ - \ - stp1_16 = stp2_16; \ - stp1_17 = stp2_17; \ - stp1_18 = stp2_18; \ - stp1_19 = stp2_19; \ - \ - MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, \ - stp1_21, stp1_26) \ - MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \ - stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, \ - stp1_23, stp1_24) \ - \ - stp1_28 = stp2_28; \ - stp1_29 = stp2_29; \ - stp1_30 = stp2_30; \ - stp1_31 = stp2_31; \ -} - -// Only upper-left 8x8 has non-zero coeff -void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1<<5); - - // idct constants for each stage - const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); - const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); - const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); - const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); - const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); - const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); - const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in[32], col[32]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, - stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, - stp1_30, stp1_31; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, - stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, - stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, - stp2_30, stp2_31; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i; - - // Load input data. Only need to load the top left 8x8 block. - in[0] = load_input_data(input); - in[1] = load_input_data(input + 32); - in[2] = load_input_data(input + 64); - in[3] = load_input_data(input + 96); - in[4] = load_input_data(input + 128); - in[5] = load_input_data(input + 160); - in[6] = load_input_data(input + 192); - in[7] = load_input_data(input + 224); - - for (i = 8; i < 32; ++i) { - in[i] = _mm_setzero_si128(); - } - - array_transpose_8x8(in, in); - // TODO(hkuang): Following transposes are unnecessary. But remove them will - // lead to performance drop on some devices. - array_transpose_8x8(in + 8, in + 8); - array_transpose_8x8(in + 16, in + 16); - array_transpose_8x8(in + 24, in + 24); - - IDCT32_34 - - // 1_D: Store 32 intermediate results for each 8x32 block. - col[0] = _mm_add_epi16(stp1_0, stp1_31); - col[1] = _mm_add_epi16(stp1_1, stp1_30); - col[2] = _mm_add_epi16(stp1_2, stp1_29); - col[3] = _mm_add_epi16(stp1_3, stp1_28); - col[4] = _mm_add_epi16(stp1_4, stp1_27); - col[5] = _mm_add_epi16(stp1_5, stp1_26); - col[6] = _mm_add_epi16(stp1_6, stp1_25); - col[7] = _mm_add_epi16(stp1_7, stp1_24); - col[8] = _mm_add_epi16(stp1_8, stp1_23); - col[9] = _mm_add_epi16(stp1_9, stp1_22); - col[10] = _mm_add_epi16(stp1_10, stp1_21); - col[11] = _mm_add_epi16(stp1_11, stp1_20); - col[12] = _mm_add_epi16(stp1_12, stp1_19); - col[13] = _mm_add_epi16(stp1_13, stp1_18); - col[14] = _mm_add_epi16(stp1_14, stp1_17); - col[15] = _mm_add_epi16(stp1_15, stp1_16); - col[16] = _mm_sub_epi16(stp1_15, stp1_16); - col[17] = _mm_sub_epi16(stp1_14, stp1_17); - col[18] = _mm_sub_epi16(stp1_13, stp1_18); - col[19] = _mm_sub_epi16(stp1_12, stp1_19); - col[20] = _mm_sub_epi16(stp1_11, stp1_20); - col[21] = _mm_sub_epi16(stp1_10, stp1_21); - col[22] = _mm_sub_epi16(stp1_9, stp1_22); - col[23] = _mm_sub_epi16(stp1_8, stp1_23); - col[24] = _mm_sub_epi16(stp1_7, stp1_24); - col[25] = _mm_sub_epi16(stp1_6, stp1_25); - col[26] = _mm_sub_epi16(stp1_5, stp1_26); - col[27] = _mm_sub_epi16(stp1_4, stp1_27); - col[28] = _mm_sub_epi16(stp1_3, stp1_28); - col[29] = _mm_sub_epi16(stp1_2, stp1_29); - col[30] = _mm_sub_epi16(stp1_1, stp1_30); - col[31] = _mm_sub_epi16(stp1_0, stp1_31); - for (i = 0; i < 4; i++) { - int j; - const __m128i zero = _mm_setzero_si128(); - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(col + i * 8, in); - IDCT32_34 - - // 2_D: Calculate the results and store them to destination. - in[0] = _mm_add_epi16(stp1_0, stp1_31); - in[1] = _mm_add_epi16(stp1_1, stp1_30); - in[2] = _mm_add_epi16(stp1_2, stp1_29); - in[3] = _mm_add_epi16(stp1_3, stp1_28); - in[4] = _mm_add_epi16(stp1_4, stp1_27); - in[5] = _mm_add_epi16(stp1_5, stp1_26); - in[6] = _mm_add_epi16(stp1_6, stp1_25); - in[7] = _mm_add_epi16(stp1_7, stp1_24); - in[8] = _mm_add_epi16(stp1_8, stp1_23); - in[9] = _mm_add_epi16(stp1_9, stp1_22); - in[10] = _mm_add_epi16(stp1_10, stp1_21); - in[11] = _mm_add_epi16(stp1_11, stp1_20); - in[12] = _mm_add_epi16(stp1_12, stp1_19); - in[13] = _mm_add_epi16(stp1_13, stp1_18); - in[14] = _mm_add_epi16(stp1_14, stp1_17); - in[15] = _mm_add_epi16(stp1_15, stp1_16); - in[16] = _mm_sub_epi16(stp1_15, stp1_16); - in[17] = _mm_sub_epi16(stp1_14, stp1_17); - in[18] = _mm_sub_epi16(stp1_13, stp1_18); - in[19] = _mm_sub_epi16(stp1_12, stp1_19); - in[20] = _mm_sub_epi16(stp1_11, stp1_20); - in[21] = _mm_sub_epi16(stp1_10, stp1_21); - in[22] = _mm_sub_epi16(stp1_9, stp1_22); - in[23] = _mm_sub_epi16(stp1_8, stp1_23); - in[24] = _mm_sub_epi16(stp1_7, stp1_24); - in[25] = _mm_sub_epi16(stp1_6, stp1_25); - in[26] = _mm_sub_epi16(stp1_5, stp1_26); - in[27] = _mm_sub_epi16(stp1_4, stp1_27); - in[28] = _mm_sub_epi16(stp1_3, stp1_28); - in[29] = _mm_sub_epi16(stp1_2, stp1_29); - in[30] = _mm_sub_epi16(stp1_1, stp1_30); - in[31] = _mm_sub_epi16(stp1_0, stp1_31); - - for (j = 0; j < 32; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); - const __m128i final_rounding = _mm_set1_epi16(1 << 5); - const __m128i zero = _mm_setzero_si128(); - - // idct constants for each stage - const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64); - const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64); - const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64); - const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64); - const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64); - const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64); - const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64); - const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64); - const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64); - const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64); - const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64); - const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64); - const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64); - const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64); - const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64); - const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64); - - const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64); - const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64); - const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64); - const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64); - const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64); - const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64); - const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64); - const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64); - - const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); - const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64); - const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64); - const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64); - const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64); - const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64); - const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64); - const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64); - const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64); - const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64); - - const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64); - const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); - const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64); - const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64); - const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64); - - const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64); - - __m128i in[32], col[128], zero_idx[16]; - __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7, - stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, - stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, - stp1_23, stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, - stp1_30, stp1_31; - __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7, - stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15, - stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, - stp2_23, stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, - stp2_30, stp2_31; - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int i, j, i32; - - for (i = 0; i < 4; i++) { - i32 = (i << 5); - // First 1-D idct - // Load input data. - LOAD_DQCOEFF(in[0], input); - LOAD_DQCOEFF(in[8], input); - LOAD_DQCOEFF(in[16], input); - LOAD_DQCOEFF(in[24], input); - LOAD_DQCOEFF(in[1], input); - LOAD_DQCOEFF(in[9], input); - LOAD_DQCOEFF(in[17], input); - LOAD_DQCOEFF(in[25], input); - LOAD_DQCOEFF(in[2], input); - LOAD_DQCOEFF(in[10], input); - LOAD_DQCOEFF(in[18], input); - LOAD_DQCOEFF(in[26], input); - LOAD_DQCOEFF(in[3], input); - LOAD_DQCOEFF(in[11], input); - LOAD_DQCOEFF(in[19], input); - LOAD_DQCOEFF(in[27], input); - - LOAD_DQCOEFF(in[4], input); - LOAD_DQCOEFF(in[12], input); - LOAD_DQCOEFF(in[20], input); - LOAD_DQCOEFF(in[28], input); - LOAD_DQCOEFF(in[5], input); - LOAD_DQCOEFF(in[13], input); - LOAD_DQCOEFF(in[21], input); - LOAD_DQCOEFF(in[29], input); - LOAD_DQCOEFF(in[6], input); - LOAD_DQCOEFF(in[14], input); - LOAD_DQCOEFF(in[22], input); - LOAD_DQCOEFF(in[30], input); - LOAD_DQCOEFF(in[7], input); - LOAD_DQCOEFF(in[15], input); - LOAD_DQCOEFF(in[23], input); - LOAD_DQCOEFF(in[31], input); - - // checking if all entries are zero - zero_idx[0] = _mm_or_si128(in[0], in[1]); - zero_idx[1] = _mm_or_si128(in[2], in[3]); - zero_idx[2] = _mm_or_si128(in[4], in[5]); - zero_idx[3] = _mm_or_si128(in[6], in[7]); - zero_idx[4] = _mm_or_si128(in[8], in[9]); - zero_idx[5] = _mm_or_si128(in[10], in[11]); - zero_idx[6] = _mm_or_si128(in[12], in[13]); - zero_idx[7] = _mm_or_si128(in[14], in[15]); - zero_idx[8] = _mm_or_si128(in[16], in[17]); - zero_idx[9] = _mm_or_si128(in[18], in[19]); - zero_idx[10] = _mm_or_si128(in[20], in[21]); - zero_idx[11] = _mm_or_si128(in[22], in[23]); - zero_idx[12] = _mm_or_si128(in[24], in[25]); - zero_idx[13] = _mm_or_si128(in[26], in[27]); - zero_idx[14] = _mm_or_si128(in[28], in[29]); - zero_idx[15] = _mm_or_si128(in[30], in[31]); - - zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]); - zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]); - zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]); - zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]); - zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]); - zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]); - zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]); - zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]); - - zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]); - zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]); - zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]); - zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]); - zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]); - zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]); - zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]); - - if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) { - col[i32 + 0] = _mm_setzero_si128(); - col[i32 + 1] = _mm_setzero_si128(); - col[i32 + 2] = _mm_setzero_si128(); - col[i32 + 3] = _mm_setzero_si128(); - col[i32 + 4] = _mm_setzero_si128(); - col[i32 + 5] = _mm_setzero_si128(); - col[i32 + 6] = _mm_setzero_si128(); - col[i32 + 7] = _mm_setzero_si128(); - col[i32 + 8] = _mm_setzero_si128(); - col[i32 + 9] = _mm_setzero_si128(); - col[i32 + 10] = _mm_setzero_si128(); - col[i32 + 11] = _mm_setzero_si128(); - col[i32 + 12] = _mm_setzero_si128(); - col[i32 + 13] = _mm_setzero_si128(); - col[i32 + 14] = _mm_setzero_si128(); - col[i32 + 15] = _mm_setzero_si128(); - col[i32 + 16] = _mm_setzero_si128(); - col[i32 + 17] = _mm_setzero_si128(); - col[i32 + 18] = _mm_setzero_si128(); - col[i32 + 19] = _mm_setzero_si128(); - col[i32 + 20] = _mm_setzero_si128(); - col[i32 + 21] = _mm_setzero_si128(); - col[i32 + 22] = _mm_setzero_si128(); - col[i32 + 23] = _mm_setzero_si128(); - col[i32 + 24] = _mm_setzero_si128(); - col[i32 + 25] = _mm_setzero_si128(); - col[i32 + 26] = _mm_setzero_si128(); - col[i32 + 27] = _mm_setzero_si128(); - col[i32 + 28] = _mm_setzero_si128(); - col[i32 + 29] = _mm_setzero_si128(); - col[i32 + 30] = _mm_setzero_si128(); - col[i32 + 31] = _mm_setzero_si128(); - continue; - } - - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(in, in); - array_transpose_8x8(in + 8, in + 8); - array_transpose_8x8(in + 16, in + 16); - array_transpose_8x8(in + 24, in + 24); - - IDCT32 - - // 1_D: Store 32 intermediate results for each 8x32 block. - col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31); - col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30); - col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29); - col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28); - col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27); - col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26); - col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25); - col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24); - col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23); - col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22); - col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21); - col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20); - col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19); - col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18); - col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17); - col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16); - col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16); - col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17); - col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18); - col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19); - col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20); - col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21); - col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22); - col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23); - col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24); - col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25); - col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26); - col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27); - col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28); - col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29); - col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30); - col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31); - } - for (i = 0; i < 4; i++) { - // Second 1-D idct - j = i << 3; - - // Transpose 32x8 block to 8x32 block - array_transpose_8x8(col + j, in); - array_transpose_8x8(col + j + 32, in + 8); - array_transpose_8x8(col + j + 64, in + 16); - array_transpose_8x8(col + j + 96, in + 24); - - IDCT32 - - // 2_D: Calculate the results and store them to destination. - in[0] = _mm_add_epi16(stp1_0, stp1_31); - in[1] = _mm_add_epi16(stp1_1, stp1_30); - in[2] = _mm_add_epi16(stp1_2, stp1_29); - in[3] = _mm_add_epi16(stp1_3, stp1_28); - in[4] = _mm_add_epi16(stp1_4, stp1_27); - in[5] = _mm_add_epi16(stp1_5, stp1_26); - in[6] = _mm_add_epi16(stp1_6, stp1_25); - in[7] = _mm_add_epi16(stp1_7, stp1_24); - in[8] = _mm_add_epi16(stp1_8, stp1_23); - in[9] = _mm_add_epi16(stp1_9, stp1_22); - in[10] = _mm_add_epi16(stp1_10, stp1_21); - in[11] = _mm_add_epi16(stp1_11, stp1_20); - in[12] = _mm_add_epi16(stp1_12, stp1_19); - in[13] = _mm_add_epi16(stp1_13, stp1_18); - in[14] = _mm_add_epi16(stp1_14, stp1_17); - in[15] = _mm_add_epi16(stp1_15, stp1_16); - in[16] = _mm_sub_epi16(stp1_15, stp1_16); - in[17] = _mm_sub_epi16(stp1_14, stp1_17); - in[18] = _mm_sub_epi16(stp1_13, stp1_18); - in[19] = _mm_sub_epi16(stp1_12, stp1_19); - in[20] = _mm_sub_epi16(stp1_11, stp1_20); - in[21] = _mm_sub_epi16(stp1_10, stp1_21); - in[22] = _mm_sub_epi16(stp1_9, stp1_22); - in[23] = _mm_sub_epi16(stp1_8, stp1_23); - in[24] = _mm_sub_epi16(stp1_7, stp1_24); - in[25] = _mm_sub_epi16(stp1_6, stp1_25); - in[26] = _mm_sub_epi16(stp1_5, stp1_26); - in[27] = _mm_sub_epi16(stp1_4, stp1_27); - in[28] = _mm_sub_epi16(stp1_3, stp1_28); - in[29] = _mm_sub_epi16(stp1_2, stp1_29); - in[30] = _mm_sub_epi16(stp1_1, stp1_30); - in[31] = _mm_sub_epi16(stp1_0, stp1_31); - - for (j = 0; j < 32; ++j) { - // Final rounding and shift - in[j] = _mm_adds_epi16(in[j], final_rounding); - in[j] = _mm_srai_epi16(in[j], 6); - RECON_AND_STORE(dest + j * stride, in[j]); - } - - dest += 8; - } -} - -void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, - int stride) { - __m128i dc_value; - const __m128i zero = _mm_setzero_si128(); - int a, j; - - a = (int)dct_const_round_shift(input[0] * cospi_16_64); - a = (int)dct_const_round_shift(a * cospi_16_64); - a = ROUND_POWER_OF_TWO(a, 6); - - dc_value = _mm_set1_epi16(a); - - for (j = 0; j < 32; ++j) { - RECON_AND_STORE(dest + 0 + j * stride, dc_value); - RECON_AND_STORE(dest + 8 + j * stride, dc_value); - RECON_AND_STORE(dest + 16 + j * stride, dc_value); - RECON_AND_STORE(dest + 24 + j * stride, dc_value); - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static INLINE __m128i clamp_high_sse2(__m128i value, int bd) { - __m128i ubounded, retval; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi16(1); - const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one); - ubounded = _mm_cmpgt_epi16(value, max); - retval = _mm_andnot_si128(ubounded, value); - ubounded = _mm_and_si128(ubounded, max); - retval = _mm_or_si128(retval, ubounded); - retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero)); - return retval; -} - -void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[4 * 4]; - tran_low_t *outptr = out; - int i, j; - __m128i inptr[4]; - __m128i sign_bits[2]; - __m128i temp_mm, min_input, max_input; - int test; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - int optimised_cols = 0; - const __m128i zero = _mm_set1_epi16(0); - const __m128i eight = _mm_set1_epi16(8); - const __m128i max = _mm_set1_epi16(12043); - const __m128i min = _mm_set1_epi16(-12043); - // Load input into __m128i - inptr[0] = _mm_loadu_si128((const __m128i *)input); - inptr[1] = _mm_loadu_si128((const __m128i *)(input + 4)); - inptr[2] = _mm_loadu_si128((const __m128i *)(input + 8)); - inptr[3] = _mm_loadu_si128((const __m128i *)(input + 12)); - - // Pack to 16 bits - inptr[0] = _mm_packs_epi32(inptr[0], inptr[1]); - inptr[1] = _mm_packs_epi32(inptr[2], inptr[3]); - - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp_mm = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp_mm); - - if (!test) { - // Do the row transform - idct4_sse2(inptr); - - // Check the min & max values - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp_mm = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp_mm); - - if (test) { - transpose_4x4(inptr); - sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero); - sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero); - inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]); - inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]); - inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]); - inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]); - _mm_storeu_si128((__m128i *)outptr, inptr[0]); - _mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]); - _mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]); - _mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]); - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 4; ++i) { - vpx_highbd_idct4_c(input, outptr, bd); - input += 4; - outptr += 4; - } - } - - if (optimised_cols) { - idct4_sse2(inptr); - - // Final round and shift - inptr[0] = _mm_add_epi16(inptr[0], eight); - inptr[1] = _mm_add_epi16(inptr[1], eight); - - inptr[0] = _mm_srai_epi16(inptr[0], 4); - inptr[1] = _mm_srai_epi16(inptr[1], 4); - - // Reconstruction and Store - { - __m128i d0 = _mm_loadl_epi64((const __m128i *)dest); - __m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2)); - d0 = _mm_unpacklo_epi64( - d0, _mm_loadl_epi64((const __m128i *)(dest + stride))); - d2 = _mm_unpacklo_epi64( - d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3))); - d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd); - d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd); - // store input0 - _mm_storel_epi64((__m128i *)dest, d0); - // store input1 - d0 = _mm_srli_si128(d0, 8); - _mm_storel_epi64((__m128i *)(dest + stride), d0); - // store input2 - _mm_storel_epi64((__m128i *)(dest + stride * 2), d2); - // store input3 - d2 = _mm_srli_si128(d2, 8); - _mm_storel_epi64((__m128i *)(dest + stride * 3), d2); - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[4], temp_out[4]; - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j * 4 + i]; - vpx_highbd_idct4_c(temp_in, temp_out, bd); - for (j = 0; j < 4; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); - } - } - } -} - -void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8]; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[8]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i sixteen = _mm_set1_epi16(16); - const __m128i max = _mm_set1_epi16(6201); - const __m128i min = _mm_set1_epi16(-6201); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 8; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 8; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform - idct8_sse2(inptr); - - // Find the min & max for the column transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 8; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - array_transpose_8x8(inptr, inptr); - for (i = 0; i < 8; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); - temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 8; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - } - - if (optimised_cols) { - idct8_sse2(inptr); - - // Final round & shift and Reconstruction and Store - { - __m128i d[8]; - for (i = 0; i < 8; i++) { - inptr[i] = _mm_add_epi16(inptr[i], sixteen); - d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - inptr[i] = _mm_srai_epi16(inptr[i], 5); - d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[8], temp_out[8]; - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } - } -} - -void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[8 * 8] = { 0 }; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[8]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i sixteen = _mm_set1_epi16(16); - const __m128i max = _mm_set1_epi16(6201); - const __m128i min = _mm_set1_epi16(-6201); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 8; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - // only first 4 row has non-zero coefs - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 4; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform - idct8_sse2(inptr); - - // Find the min & max for the column transform - // N.B. Only first 4 cols contain non-zero coeffs - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 8; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - // Use fact only first 4 rows contain non-zero coeffs - array_transpose_4X8(inptr, inptr); - for (i = 0; i < 4; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits); - temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 4; ++i) { - vpx_highbd_idct8_c(input, outptr, bd); - input += 8; - outptr += 8; - } - } - - if (optimised_cols) { - idct8_sse2(inptr); - - // Final round & shift and Reconstruction and Store - { - __m128i d[8]; - for (i = 0; i < 8; i++) { - inptr[i] = _mm_add_epi16(inptr[i], sixteen); - d[i] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - inptr[i] = _mm_srai_epi16(inptr[i], 5); - d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[i]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[8], temp_out[8]; - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j * 8 + i]; - vpx_highbd_idct8_c(temp_in, temp_out, bd); - for (j = 0; j < 8; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); - } - } - } -} - -void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16]; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[32]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i rounding = _mm_set1_epi16(32); - const __m128i max = _mm_set1_epi16(3155); - const __m128i min = _mm_set1_epi16(-3155); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 16; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); - inptr[i + 16] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 32; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform - idct16_sse2(inptr, inptr + 16); - - // Find the min & max for the column transform - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 32; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - array_transpose_16x16(inptr, inptr + 16); - for (i = 0; i < 16; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); - sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); - temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 16; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - } - - if (optimised_cols) { - idct16_sse2(inptr, inptr + 16); - - // Final round & shift and Reconstruction and Store - { - __m128i d[2]; - for (i = 0; i < 16; i++) { - inptr[i ] = _mm_add_epi16(inptr[i ], rounding); - inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); - d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); - inptr[i ] = _mm_srai_epi16(inptr[i ], 6); - inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); - d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); - d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); - _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[16], temp_out[16]; - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } - } -} - -void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8, - int stride, int bd) { - tran_low_t out[16 * 16] = { 0 }; - tran_low_t *outptr = out; - int i, j, test; - __m128i inptr[32]; - __m128i min_input, max_input, temp1, temp2, sign_bits; - uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); - const __m128i zero = _mm_set1_epi16(0); - const __m128i rounding = _mm_set1_epi16(32); - const __m128i max = _mm_set1_epi16(3155); - const __m128i min = _mm_set1_epi16(-3155); - int optimised_cols = 0; - - // Load input into __m128i & pack to 16 bits - for (i = 0; i < 16; i++) { - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4)); - inptr[i] = _mm_packs_epi32(temp1, temp2); - temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8)); - temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12)); - inptr[i + 16] = _mm_packs_epi32(temp1, temp2); - } - - // Find the min & max for the row transform - // Since all non-zero dct coefficients are in upper-left 4x4 area, - // we only need to consider first 4 rows here. - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 4; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (!test) { - // Do the row transform (N.B. This transposes inptr) - idct16_sse2(inptr, inptr + 16); - - // Find the min & max for the column transform - // N.B. Only first 4 cols contain non-zero coeffs - max_input = _mm_max_epi16(inptr[0], inptr[1]); - min_input = _mm_min_epi16(inptr[0], inptr[1]); - for (i = 2; i < 16; i++) { - max_input = _mm_max_epi16(max_input, inptr[i]); - min_input = _mm_min_epi16(min_input, inptr[i]); - } - max_input = _mm_cmpgt_epi16(max_input, max); - min_input = _mm_cmplt_epi16(min_input, min); - temp1 = _mm_or_si128(max_input, min_input); - test = _mm_movemask_epi8(temp1); - - if (test) { - // Use fact only first 4 rows contain non-zero coeffs - array_transpose_8x8(inptr, inptr); - array_transpose_8x8(inptr + 8, inptr + 16); - for (i = 0; i < 4; i++) { - sign_bits = _mm_cmplt_epi16(inptr[i], zero); - temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2); - sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero); - temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits); - temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1); - _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2); - } - } else { - // Set to use the optimised transform for the column - optimised_cols = 1; - } - } else { - // Run the un-optimised row transform - for (i = 0; i < 4; ++i) { - vpx_highbd_idct16_c(input, outptr, bd); - input += 16; - outptr += 16; - } - } - - if (optimised_cols) { - idct16_sse2(inptr, inptr + 16); - - // Final round & shift and Reconstruction and Store - { - __m128i d[2]; - for (i = 0; i < 16; i++) { - inptr[i ] = _mm_add_epi16(inptr[i ], rounding); - inptr[i+16] = _mm_add_epi16(inptr[i+16], rounding); - d[0] = _mm_loadu_si128((const __m128i *)(dest + stride*i)); - d[1] = _mm_loadu_si128((const __m128i *)(dest + stride*i + 8)); - inptr[i ] = _mm_srai_epi16(inptr[i ], 6); - inptr[i+16] = _mm_srai_epi16(inptr[i+16], 6); - d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i ]), bd); - d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i+16]), bd); - // Store - _mm_storeu_si128((__m128i *)(dest + stride*i), d[0]); - _mm_storeu_si128((__m128i *)(dest + stride*i + 8), d[1]); - } - } - } else { - // Run the un-optimised column transform - tran_low_t temp_in[16], temp_out[16]; - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j * 16 + i]; - vpx_highbd_idct16_c(temp_in, temp_out, bd); - for (j = 0; j < 16; ++j) { - dest[j * stride + i] = highbd_clip_pixel_add( - dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); - } - } - } -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h deleted file mode 100644 index bd520c18e5..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_sse2.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_ -#define VPX_DSP_X86_INV_TXFM_SSE2_H_ - -#include <emmintrin.h> // SSE2 -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vpx_dsp/inv_txfm.h" -#include "vpx_dsp/x86/txfm_common_sse2.h" - -// perform 8x8 transpose -static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { - const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); - const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); - const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]); - const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]); - const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); - const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); - const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]); - const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]); - - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3); - const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3); - const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); - - res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1); - res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1); - res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3); - res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3); - res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5); - res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5); - res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7); - res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7); -} - -#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \ - { \ - const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ - \ - in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \ - in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \ - } - -static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) { - const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]); - const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]); - const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]); - const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]); - - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); - const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); - const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); - - out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4); - out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4); - out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6); - out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6); -} - -static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) { - __m128i tbuf[8]; - array_transpose_8x8(res0, res0); - array_transpose_8x8(res1, tbuf); - array_transpose_8x8(res0 + 8, res1); - array_transpose_8x8(res1 + 8, res1 + 8); - - res0[8] = tbuf[0]; - res0[9] = tbuf[1]; - res0[10] = tbuf[2]; - res0[11] = tbuf[3]; - res0[12] = tbuf[4]; - res0[13] = tbuf[5]; - res0[14] = tbuf[6]; - res0[15] = tbuf[7]; -} - -// Function to allow 8 bit optimisations to be used when profile 0 is used with -// highbitdepth enabled -static INLINE __m128i load_input_data(const tran_low_t *data) { -#if CONFIG_VP9_HIGHBITDEPTH - return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5], - data[6], data[7]); -#else - return _mm_load_si128((const __m128i *)data); -#endif -} - -static INLINE void load_buffer_8x16(const tran_low_t *input, __m128i *in) { - in[0] = load_input_data(input + 0 * 16); - in[1] = load_input_data(input + 1 * 16); - in[2] = load_input_data(input + 2 * 16); - in[3] = load_input_data(input + 3 * 16); - in[4] = load_input_data(input + 4 * 16); - in[5] = load_input_data(input + 5 * 16); - in[6] = load_input_data(input + 6 * 16); - in[7] = load_input_data(input + 7 * 16); - - in[8] = load_input_data(input + 8 * 16); - in[9] = load_input_data(input + 9 * 16); - in[10] = load_input_data(input + 10 * 16); - in[11] = load_input_data(input + 11 * 16); - in[12] = load_input_data(input + 12 * 16); - in[13] = load_input_data(input + 13 * 16); - in[14] = load_input_data(input + 14 * 16); - in[15] = load_input_data(input + 15 * 16); -} - -#define RECON_AND_STORE(dest, in_x) \ - { \ - __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \ - d0 = _mm_unpacklo_epi8(d0, zero); \ - d0 = _mm_add_epi16(in_x, d0); \ - d0 = _mm_packus_epi16(d0, d0); \ - _mm_storel_epi64((__m128i *)(dest), d0); \ - } - -static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) { - const __m128i final_rounding = _mm_set1_epi16(1<<5); - const __m128i zero = _mm_setzero_si128(); - // Final rounding and shift - in[0] = _mm_adds_epi16(in[0], final_rounding); - in[1] = _mm_adds_epi16(in[1], final_rounding); - in[2] = _mm_adds_epi16(in[2], final_rounding); - in[3] = _mm_adds_epi16(in[3], final_rounding); - in[4] = _mm_adds_epi16(in[4], final_rounding); - in[5] = _mm_adds_epi16(in[5], final_rounding); - in[6] = _mm_adds_epi16(in[6], final_rounding); - in[7] = _mm_adds_epi16(in[7], final_rounding); - in[8] = _mm_adds_epi16(in[8], final_rounding); - in[9] = _mm_adds_epi16(in[9], final_rounding); - in[10] = _mm_adds_epi16(in[10], final_rounding); - in[11] = _mm_adds_epi16(in[11], final_rounding); - in[12] = _mm_adds_epi16(in[12], final_rounding); - in[13] = _mm_adds_epi16(in[13], final_rounding); - in[14] = _mm_adds_epi16(in[14], final_rounding); - in[15] = _mm_adds_epi16(in[15], final_rounding); - - in[0] = _mm_srai_epi16(in[0], 6); - in[1] = _mm_srai_epi16(in[1], 6); - in[2] = _mm_srai_epi16(in[2], 6); - in[3] = _mm_srai_epi16(in[3], 6); - in[4] = _mm_srai_epi16(in[4], 6); - in[5] = _mm_srai_epi16(in[5], 6); - in[6] = _mm_srai_epi16(in[6], 6); - in[7] = _mm_srai_epi16(in[7], 6); - in[8] = _mm_srai_epi16(in[8], 6); - in[9] = _mm_srai_epi16(in[9], 6); - in[10] = _mm_srai_epi16(in[10], 6); - in[11] = _mm_srai_epi16(in[11], 6); - in[12] = _mm_srai_epi16(in[12], 6); - in[13] = _mm_srai_epi16(in[13], 6); - in[14] = _mm_srai_epi16(in[14], 6); - in[15] = _mm_srai_epi16(in[15], 6); - - RECON_AND_STORE(dest + 0 * stride, in[0]); - RECON_AND_STORE(dest + 1 * stride, in[1]); - RECON_AND_STORE(dest + 2 * stride, in[2]); - RECON_AND_STORE(dest + 3 * stride, in[3]); - RECON_AND_STORE(dest + 4 * stride, in[4]); - RECON_AND_STORE(dest + 5 * stride, in[5]); - RECON_AND_STORE(dest + 6 * stride, in[6]); - RECON_AND_STORE(dest + 7 * stride, in[7]); - RECON_AND_STORE(dest + 8 * stride, in[8]); - RECON_AND_STORE(dest + 9 * stride, in[9]); - RECON_AND_STORE(dest + 10 * stride, in[10]); - RECON_AND_STORE(dest + 11 * stride, in[11]); - RECON_AND_STORE(dest + 12 * stride, in[12]); - RECON_AND_STORE(dest + 13 * stride, in[13]); - RECON_AND_STORE(dest + 14 * stride, in[14]); - RECON_AND_STORE(dest + 15 * stride, in[15]); -} - -void idct4_sse2(__m128i *in); -void idct8_sse2(__m128i *in); -void idct16_sse2(__m128i *in0, __m128i *in1); -void iadst4_sse2(__m128i *in); -void iadst8_sse2(__m128i *in); -void iadst16_sse2(__m128i *in0, __m128i *in1); - -#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm b/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm deleted file mode 100644 index 20baf820f6..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm +++ /dev/null @@ -1,1793 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -; This file provides SSSE3 version of the inverse transformation. Part -; of the functions are originally derived from the ffmpeg project. -; Note that the current version applies to x86 64-bit only. - -SECTION_RODATA - -pw_11585x2: times 8 dw 23170 - -pw_m2404x2: times 8 dw -2404*2 -pw_m4756x2: times 8 dw -4756*2 -pw_m5520x2: times 8 dw -5520*2 -pw_m8423x2: times 8 dw -8423*2 -pw_m9102x2: times 8 dw -9102*2 -pw_m10394x2: times 8 dw -10394*2 -pw_m11003x2: times 8 dw -11003*2 - -pw_16364x2: times 8 dw 16364*2 -pw_16305x2: times 8 dw 16305*2 -pw_16207x2: times 8 dw 16207*2 -pw_16069x2: times 8 dw 16069*2 -pw_15893x2: times 8 dw 15893*2 -pw_15679x2: times 8 dw 15679*2 -pw_15426x2: times 8 dw 15426*2 -pw_15137x2: times 8 dw 15137*2 -pw_14811x2: times 8 dw 14811*2 -pw_14449x2: times 8 dw 14449*2 -pw_14053x2: times 8 dw 14053*2 -pw_13623x2: times 8 dw 13623*2 -pw_13160x2: times 8 dw 13160*2 -pw_12665x2: times 8 dw 12665*2 -pw_12140x2: times 8 dw 12140*2 -pw__9760x2: times 8 dw 9760*2 -pw__7723x2: times 8 dw 7723*2 -pw__7005x2: times 8 dw 7005*2 -pw__6270x2: times 8 dw 6270*2 -pw__3981x2: times 8 dw 3981*2 -pw__3196x2: times 8 dw 3196*2 -pw__1606x2: times 8 dw 1606*2 -pw___804x2: times 8 dw 804*2 - -pd_8192: times 4 dd 8192 -pw_32: times 8 dw 32 -pw_16: times 8 dw 16 - -%macro TRANSFORM_COEFFS 2 -pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 -pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1 -pw_m%1_m%2: dw -%1, -%2, -%1, -%2, -%1, -%2, -%1, -%2 -%endmacro - -TRANSFORM_COEFFS 6270, 15137 -TRANSFORM_COEFFS 3196, 16069 -TRANSFORM_COEFFS 13623, 9102 - -; constants for 32x32_34 -TRANSFORM_COEFFS 804, 16364 -TRANSFORM_COEFFS 15426, 5520 -TRANSFORM_COEFFS 3981, 15893 -TRANSFORM_COEFFS 16207, 2404 -TRANSFORM_COEFFS 1606, 16305 -TRANSFORM_COEFFS 15679, 4756 -TRANSFORM_COEFFS 11585, 11585 - -; constants for 32x32_1024 -TRANSFORM_COEFFS 12140, 11003 -TRANSFORM_COEFFS 7005, 14811 -TRANSFORM_COEFFS 14053, 8423 -TRANSFORM_COEFFS 9760, 13160 -TRANSFORM_COEFFS 12665, 10394 -TRANSFORM_COEFFS 7723, 14449 - -%macro PAIR_PP_COEFFS 2 -dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2 -%endmacro - -%macro PAIR_MP_COEFFS 2 -dpw_m%1_%2: dw -%1, -%1, -%1, -%1, %2, %2, %2, %2 -%endmacro - -%macro PAIR_MM_COEFFS 2 -dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2 -%endmacro - -PAIR_PP_COEFFS 30274, 12540 -PAIR_PP_COEFFS 6392, 32138 -PAIR_MP_COEFFS 18204, 27246 - -PAIR_PP_COEFFS 12540, 12540 -PAIR_PP_COEFFS 30274, 30274 -PAIR_PP_COEFFS 6392, 6392 -PAIR_PP_COEFFS 32138, 32138 -PAIR_MM_COEFFS 18204, 18204 -PAIR_PP_COEFFS 27246, 27246 - -SECTION .text - -%if ARCH_X86_64 -%macro SUM_SUB 3 - psubw m%3, m%1, m%2 - paddw m%1, m%2 - SWAP %2, %3 -%endmacro - -; butterfly operation -%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2 - pmaddwd m%1, m%3, %5 - pmaddwd m%2, m%3, %6 - paddd m%1, %4 - paddd m%2, %4 - psrad m%1, 14 - psrad m%2, 14 -%endmacro - -%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 - punpckhwd m%6, m%2, m%1 - MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_%3_%4] - punpcklwd m%2, m%1 - MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_%3_%4] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -%macro BUTTERFLY_4Xmm 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2 - punpckhwd m%6, m%2, m%1 - MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_m%3_m%4] - punpcklwd m%2, m%1 - MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_m%3_m%4] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -; matrix transpose -%macro INTERLEAVE_2X 4 - punpckh%1 m%4, m%2, m%3 - punpckl%1 m%2, m%3 - SWAP %3, %4 -%endmacro - -%macro TRANSPOSE8X8 9 - INTERLEAVE_2X wd, %1, %2, %9 - INTERLEAVE_2X wd, %3, %4, %9 - INTERLEAVE_2X wd, %5, %6, %9 - INTERLEAVE_2X wd, %7, %8, %9 - - INTERLEAVE_2X dq, %1, %3, %9 - INTERLEAVE_2X dq, %2, %4, %9 - INTERLEAVE_2X dq, %5, %7, %9 - INTERLEAVE_2X dq, %6, %8, %9 - - INTERLEAVE_2X qdq, %1, %5, %9 - INTERLEAVE_2X qdq, %3, %7, %9 - INTERLEAVE_2X qdq, %2, %6, %9 - INTERLEAVE_2X qdq, %4, %8, %9 - - SWAP %2, %5 - SWAP %4, %7 -%endmacro - -%macro IDCT8_1D 0 - SUM_SUB 0, 4, 9 - BUTTERFLY_4X 2, 6, 6270, 15137, m8, 9, 10 - pmulhrsw m0, m12 - pmulhrsw m4, m12 - BUTTERFLY_4X 1, 7, 3196, 16069, m8, 9, 10 - BUTTERFLY_4X 5, 3, 13623, 9102, m8, 9, 10 - - SUM_SUB 1, 5, 9 - SUM_SUB 7, 3, 9 - SUM_SUB 0, 6, 9 - SUM_SUB 4, 2, 9 - SUM_SUB 3, 5, 9 - pmulhrsw m3, m12 - pmulhrsw m5, m12 - - SUM_SUB 0, 7, 9 - SUM_SUB 4, 3, 9 - SUM_SUB 2, 5, 9 - SUM_SUB 6, 1, 9 - - SWAP 3, 6 - SWAP 1, 4 -%endmacro - -; This macro handles 8 pixels per line -%macro ADD_STORE_8P_2X 5; src1, src2, tmp1, tmp2, zero - paddw m%1, m11 - paddw m%2, m11 - psraw m%1, 5 - psraw m%2, 5 - - movh m%3, [outputq] - movh m%4, [outputq + strideq] - punpcklbw m%3, m%5 - punpcklbw m%4, m%5 - paddw m%3, m%1 - paddw m%4, m%2 - packuswb m%3, m%5 - packuswb m%4, m%5 - movh [outputq], m%3 - movh [outputq + strideq], m%4 -%endmacro - -INIT_XMM ssse3 -; full inverse 8x8 2D-DCT transform -cglobal idct8x8_64_add, 3, 5, 13, input, output, stride - mova m8, [pd_8192] - mova m11, [pw_16] - mova m12, [pw_11585x2] - - lea r3, [2 * strideq] -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [inputq + 0] - packssdw m0, [inputq + 16] - mova m1, [inputq + 32] - packssdw m1, [inputq + 48] - mova m2, [inputq + 64] - packssdw m2, [inputq + 80] - mova m3, [inputq + 96] - packssdw m3, [inputq + 112] - mova m4, [inputq + 128] - packssdw m4, [inputq + 144] - mova m5, [inputq + 160] - packssdw m5, [inputq + 176] - mova m6, [inputq + 192] - packssdw m6, [inputq + 208] - mova m7, [inputq + 224] - packssdw m7, [inputq + 240] -%else - mova m0, [inputq + 0] - mova m1, [inputq + 16] - mova m2, [inputq + 32] - mova m3, [inputq + 48] - mova m4, [inputq + 64] - mova m5, [inputq + 80] - mova m6, [inputq + 96] - mova m7, [inputq + 112] -%endif - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - IDCT8_1D - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - IDCT8_1D - - pxor m12, m12 - ADD_STORE_8P_2X 0, 1, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 2, 3, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 4, 5, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 6, 7, 9, 10, 12 - - RET - -; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero -cglobal idct8x8_12_add, 3, 5, 13, input, output, stride - mova m8, [pd_8192] - mova m11, [pw_16] - mova m12, [pw_11585x2] - - lea r3, [2 * strideq] - -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [inputq + 0] - packssdw m0, [inputq + 16] - mova m1, [inputq + 32] - packssdw m1, [inputq + 48] - mova m2, [inputq + 64] - packssdw m2, [inputq + 80] - mova m3, [inputq + 96] - packssdw m3, [inputq + 112] -%else - mova m0, [inputq + 0] - mova m1, [inputq + 16] - mova m2, [inputq + 32] - mova m3, [inputq + 48] -%endif - - punpcklwd m0, m1 - punpcklwd m2, m3 - punpckhdq m9, m0, m2 - punpckldq m0, m2 - SWAP 2, 9 - - ; m0 -> [0], [0] - ; m1 -> [1], [1] - ; m2 -> [2], [2] - ; m3 -> [3], [3] - punpckhqdq m10, m0, m0 - punpcklqdq m0, m0 - punpckhqdq m9, m2, m2 - punpcklqdq m2, m2 - SWAP 1, 10 - SWAP 3, 9 - - pmulhrsw m0, m12 - pmulhrsw m2, [dpw_30274_12540] - pmulhrsw m1, [dpw_6392_32138] - pmulhrsw m3, [dpw_m18204_27246] - - SUM_SUB 0, 2, 9 - SUM_SUB 1, 3, 9 - - punpcklqdq m9, m3, m3 - punpckhqdq m5, m3, m9 - - SUM_SUB 3, 5, 9 - punpckhqdq m5, m3 - pmulhrsw m5, m12 - - punpckhqdq m9, m1, m5 - punpcklqdq m1, m5 - SWAP 5, 9 - - SUM_SUB 0, 5, 9 - SUM_SUB 2, 1, 9 - - punpckhqdq m3, m0, m0 - punpckhqdq m4, m1, m1 - punpckhqdq m6, m5, m5 - punpckhqdq m7, m2, m2 - - punpcklwd m0, m3 - punpcklwd m7, m2 - punpcklwd m1, m4 - punpcklwd m6, m5 - - punpckhdq m4, m0, m7 - punpckldq m0, m7 - punpckhdq m10, m1, m6 - punpckldq m5, m1, m6 - - punpckhqdq m1, m0, m5 - punpcklqdq m0, m5 - punpckhqdq m3, m4, m10 - punpcklqdq m2, m4, m10 - - - pmulhrsw m0, m12 - pmulhrsw m6, m2, [dpw_30274_30274] - pmulhrsw m4, m2, [dpw_12540_12540] - - pmulhrsw m7, m1, [dpw_32138_32138] - pmulhrsw m1, [dpw_6392_6392] - pmulhrsw m5, m3, [dpw_m18204_m18204] - pmulhrsw m3, [dpw_27246_27246] - - mova m2, m0 - SUM_SUB 0, 6, 9 - SUM_SUB 2, 4, 9 - SUM_SUB 1, 5, 9 - SUM_SUB 7, 3, 9 - - SUM_SUB 3, 5, 9 - pmulhrsw m3, m12 - pmulhrsw m5, m12 - - SUM_SUB 0, 7, 9 - SUM_SUB 2, 3, 9 - SUM_SUB 4, 5, 9 - SUM_SUB 6, 1, 9 - - SWAP 3, 6 - SWAP 1, 2 - SWAP 2, 4 - - - pxor m12, m12 - ADD_STORE_8P_2X 0, 1, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 2, 3, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 4, 5, 9, 10, 12 - lea outputq, [outputq + r3] - ADD_STORE_8P_2X 6, 7, 9, 10, 12 - - RET - -%define idx0 16 * 0 -%define idx1 16 * 1 -%define idx2 16 * 2 -%define idx3 16 * 3 -%define idx4 16 * 4 -%define idx5 16 * 5 -%define idx6 16 * 6 -%define idx7 16 * 7 -%define idx8 16 * 0 -%define idx9 16 * 1 -%define idx10 16 * 2 -%define idx11 16 * 3 -%define idx12 16 * 4 -%define idx13 16 * 5 -%define idx14 16 * 6 -%define idx15 16 * 7 -%define idx16 16 * 0 -%define idx17 16 * 1 -%define idx18 16 * 2 -%define idx19 16 * 3 -%define idx20 16 * 4 -%define idx21 16 * 5 -%define idx22 16 * 6 -%define idx23 16 * 7 -%define idx24 16 * 0 -%define idx25 16 * 1 -%define idx26 16 * 2 -%define idx27 16 * 3 -%define idx28 16 * 4 -%define idx29 16 * 5 -%define idx30 16 * 6 -%define idx31 16 * 7 - -; FROM idct32x32_add_neon.asm -; -; Instead of doing the transforms stage by stage, it is done by loading -; some input values and doing as many stages as possible to minimize the -; storing/loading of intermediate results. To fit within registers, the -; final coefficients are cut into four blocks: -; BLOCK A: 16-19,28-31 -; BLOCK B: 20-23,24-27 -; BLOCK C: 8-11,12-15 -; BLOCK D: 0-3,4-7 -; Blocks A and C are straight calculation through the various stages. In -; block B, further calculations are performed using the results from -; block A. In block D, further calculations are performed using the results -; from block C and then the final calculations are done using results from -; block A and B which have been combined at the end of block B. -; - -%macro IDCT32X32_34 4 - ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, m1 - pmulhrsw m1, [pw___804x2] ; stp1_16 - mova [r4 + 0], m0 - pmulhrsw m11, [pw_16364x2] ; stp2_31 - mova [r4 + 16 * 2], m2 - mova m12, m7 - pmulhrsw m7, [pw_15426x2] ; stp1_28 - mova [r4 + 16 * 4], m4 - pmulhrsw m12, [pw_m5520x2] ; stp2_19 - mova [r4 + 16 * 6], m6 - - ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m2, m1 ; stp1_16 - mova m0, m11 ; stp1_31 - mova m4, m7 ; stp1_28 - mova m15, m12 ; stp1_19 - - ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 - BUTTERFLY_4Xmm 4, 15, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 - - ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 - SUM_SUB 0, 15, 9 ; stp2_17, stp2_18 - SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 - SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 - - ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 4, 15, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 - BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 - - ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m6, m5 - pmulhrsw m5, [pw__3981x2] ; stp1_20 - mova [stp + %4 + idx28], m12 - mova [stp + %4 + idx29], m15 - pmulhrsw m6, [pw_15893x2] ; stp2_27 - mova [stp + %4 + idx30], m2 - mova m2, m3 - pmulhrsw m3, [pw_m2404x2] ; stp1_23 - mova [stp + %4 + idx31], m11 - pmulhrsw m2, [pw_16207x2] ; stp2_24 - - ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m13, m5 ; stp1_20 - mova m14, m6 ; stp1_27 - mova m15, m3 ; stp1_23 - mova m11, m2 ; stp1_24 - - ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 - BUTTERFLY_4Xmm 11, 15, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 - - ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 - SUM_SUB 15, 14, 9 ; stp2_22, stp2_21 - SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 - SUM_SUB 11, 13, 9 ; stp2_25, stp2_26 - - ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 - BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 - - ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 3, 9 ; stp2_16, stp2_23 - SUM_SUB 0, 15, 9 ; stp2_17, stp2_22 - SUM_SUB 4, 14, 9 ; stp2_18, stp2_21 - SUM_SUB 7, 5, 9 ; stp2_19, stp2_20 - mova [stp + %3 + idx16], m1 - mova [stp + %3 + idx17], m0 - mova [stp + %3 + idx18], m4 - mova [stp + %3 + idx19], m7 - - mova m4, [stp + %4 + idx28] - mova m7, [stp + %4 + idx29] - mova m10, [stp + %4 + idx30] - mova m12, [stp + %4 + idx31] - SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 - SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 - SUM_SUB 10, 11, 9 ; stp2_30, stp2_25 - SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 - mova [stp + %4 + idx28], m4 - mova [stp + %4 + idx29], m7 - mova [stp + %4 + idx30], m10 - mova [stp + %4 + idx31], m12 - - ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 6, 5, 9 - pmulhrsw m6, m10 ; stp1_27 - pmulhrsw m5, m10 ; stp1_20 - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_26 - pmulhrsw m14, m10 ; stp1_21 - SUM_SUB 11, 15, 9 - pmulhrsw m11, m10 ; stp1_25 - pmulhrsw m15, m10 ; stp1_22 - SUM_SUB 2, 3, 9 - pmulhrsw m2, m10 ; stp1_24 - pmulhrsw m3, m10 ; stp1_23 -%else - BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 - SWAP 6, 5 - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 - SWAP 13, 14 - BUTTERFLY_4X 11, 15, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 - SWAP 11, 15 - BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 - SWAP 2, 3 -%endif - - mova [stp + %4 + idx24], m2 - mova [stp + %4 + idx25], m11 - mova [stp + %4 + idx26], m13 - mova [stp + %4 + idx27], m6 - - ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 2] - mova m6, [rsp + transposed_in + 16 * 6] - - mova m1, m0 - pmulhrsw m0, [pw__1606x2] ; stp1_8 - mova [stp + %3 + idx20], m5 - mova [stp + %3 + idx21], m14 - pmulhrsw m1, [pw_16305x2] ; stp2_15 - mova [stp + %3 + idx22], m15 - mova m7, m6 - pmulhrsw m7, [pw_m4756x2] ; stp2_11 - mova [stp + %3 + idx23], m3 - pmulhrsw m6, [pw_15679x2] ; stp1_12 - - ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m3, m0 ; stp1_8 - mova m2, m1 ; stp1_15 - - ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 - mova m4, m7 ; stp1_11 - mova m5, m6 ; stp1_12 - BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 - - ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 - SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 - SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 - SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 - - ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 5, 4, 9 - pmulhrsw m5, m10 ; stp1_13 - pmulhrsw m4, m10 ; stp1_10 - SUM_SUB 6, 7, 9 - pmulhrsw m6, m10 ; stp1_12 - pmulhrsw m7, m10 ; stp1_11 -%else - BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 - SWAP 5, 4 - BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 - SWAP 6, 7 -%endif - - ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova [stp + %2 + idx8], m0 - mova [stp + %2 + idx9], m2 - mova [stp + %2 + idx10], m4 - mova [stp + %2 + idx11], m7 - - ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, [rsp + transposed_in + 16 * 4] - mova m12, m11 - pmulhrsw m11, [pw__3196x2] ; stp1_4 - pmulhrsw m12, [pw_16069x2] ; stp1_7 - - ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 0] - mova m10, [pw_11585x2] - pmulhrsw m0, m10 ; stp1_1 - - mova m14, m11 ; stp1_4 - mova m13, m12 ; stp1_7 - - ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_6 - pmulhrsw m14, m10 ; stp1_5 -%else - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 - SWAP 13, 14 -%endif - mova m7, m0 ; stp1_0 = stp1_1 - mova m4, m0 ; stp1_1 - mova m2, m7 ; stp1_0 - - ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 - SUM_SUB 7, 13, 9 ; stp1_1, stp1_6 - SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 - SUM_SUB 4, 11, 9 ; stp1_3, stp1_4 - - ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 1, 9 ; stp1_0, stp1_15 - SUM_SUB 7, 3, 9 ; stp1_1, stp1_14 - SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 - SUM_SUB 4, 6, 9 ; stp1_3, stp1_12 - - ; 0-3, 28-31 final stage - mova m15, [stp + %4 + idx30] - mova m10, [stp + %4 + idx31] - SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 - SUM_SUB 7, 15, 9 ; stp1_1, stp1_30 - mova [stp + %1 + idx0], m0 - mova [stp + %1 + idx1], m7 - mova [stp + %4 + idx30], m15 - mova [stp + %4 + idx31], m10 - mova m7, [stp + %4 + idx28] - mova m0, [stp + %4 + idx29] - SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 - SUM_SUB 4, 7, 9 ; stp1_3, stp1_28 - mova [stp + %1 + idx2], m2 - mova [stp + %1 + idx3], m4 - mova [stp + %4 + idx28], m7 - mova [stp + %4 + idx29], m0 - - ; 12-15, 16-19 final stage - mova m0, [stp + %3 + idx16] - mova m7, [stp + %3 + idx17] - mova m2, [stp + %3 + idx18] - mova m4, [stp + %3 + idx19] - SUM_SUB 1, 0, 9 ; stp1_15, stp1_16 - SUM_SUB 3, 7, 9 ; stp1_14, stp1_17 - SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 - SUM_SUB 6, 4, 9 ; stp1_12, stp1_19 - mova [stp + %2 + idx12], m6 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m3 - mova [stp + %2 + idx15], m1 - mova [stp + %3 + idx16], m0 - mova [stp + %3 + idx17], m7 - mova [stp + %3 + idx18], m2 - mova [stp + %3 + idx19], m4 - - mova m4, [stp + %2 + idx8] - mova m5, [stp + %2 + idx9] - mova m6, [stp + %2 + idx10] - mova m7, [stp + %2 + idx11] - SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 - SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 - SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 - SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 - - ; 4-7, 24-27 final stage - mova m0, [stp + %4 + idx27] - mova m1, [stp + %4 + idx26] - mova m2, [stp + %4 + idx25] - mova m3, [stp + %4 + idx24] - SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 - SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 - SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 - SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 - mova [stp + %4 + idx27], m0 - mova [stp + %4 + idx26], m1 - mova [stp + %4 + idx25], m2 - mova [stp + %4 + idx24], m3 - mova [stp + %1 + idx4], m11 - mova [stp + %1 + idx5], m14 - mova [stp + %1 + idx6], m13 - mova [stp + %1 + idx7], m12 - - ; 8-11, 20-23 final stage - mova m0, [stp + %3 + idx20] - mova m1, [stp + %3 + idx21] - mova m2, [stp + %3 + idx22] - mova m3, [stp + %3 + idx23] - SUM_SUB 7, 0, 9 ; stp1_11, stp_20 - SUM_SUB 6, 1, 9 ; stp1_10, stp_21 - SUM_SUB 5, 2, 9 ; stp1_9, stp_22 - SUM_SUB 4, 3, 9 ; stp1_8, stp_23 - mova [stp + %2 + idx8], m4 - mova [stp + %2 + idx9], m5 - mova [stp + %2 + idx10], m6 - mova [stp + %2 + idx11], m7 - mova [stp + %3 + idx20], m0 - mova [stp + %3 + idx21], m1 - mova [stp + %3 + idx22], m2 - mova [stp + %3 + idx23], m3 -%endmacro - -%macro RECON_AND_STORE 1 - mova m11, [pw_32] - lea stp, [rsp + %1] - mov r6, 32 - pxor m8, m8 -%%recon_and_store: - mova m0, [stp + 16 * 32 * 0] - mova m1, [stp + 16 * 32 * 1] - mova m2, [stp + 16 * 32 * 2] - mova m3, [stp + 16 * 32 * 3] - add stp, 16 - - paddw m0, m11 - paddw m1, m11 - paddw m2, m11 - paddw m3, m11 - psraw m0, 6 - psraw m1, 6 - psraw m2, 6 - psraw m3, 6 - movh m4, [outputq + 0] - movh m5, [outputq + 8] - movh m6, [outputq + 16] - movh m7, [outputq + 24] - punpcklbw m4, m8 - punpcklbw m5, m8 - punpcklbw m6, m8 - punpcklbw m7, m8 - paddw m0, m4 - paddw m1, m5 - paddw m2, m6 - paddw m3, m7 - packuswb m0, m1 - packuswb m2, m3 - mova [outputq + 0], m0 - mova [outputq + 16], m2 - lea outputq, [outputq + strideq] - dec r6 - jnz %%recon_and_store -%endmacro - -%define i32x32_size 16*32*5 -%define pass_two_start 16*32*0 -%define transposed_in 16*32*4 -%define pass_one_start 16*32*0 -%define stp r8 - -INIT_XMM ssse3 -cglobal idct32x32_34_add, 3, 11, 16, i32x32_size, input, output, stride - mova m8, [pd_8192] - lea stp, [rsp + pass_one_start] - -idct32x32_34: - mov r3, inputq - lea r4, [rsp + transposed_in] - -idct32x32_34_transpose: -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [r3 + 0] - packssdw m0, [r3 + 16] - mova m1, [r3 + 32 * 4] - packssdw m1, [r3 + 32 * 4 + 16] - mova m2, [r3 + 32 * 8] - packssdw m2, [r3 + 32 * 8 + 16] - mova m3, [r3 + 32 * 12] - packssdw m3, [r3 + 32 * 12 + 16] - mova m4, [r3 + 32 * 16] - packssdw m4, [r3 + 32 * 16 + 16] - mova m5, [r3 + 32 * 20] - packssdw m5, [r3 + 32 * 20 + 16] - mova m6, [r3 + 32 * 24] - packssdw m6, [r3 + 32 * 24 + 16] - mova m7, [r3 + 32 * 28] - packssdw m7, [r3 + 32 * 28 + 16] -%else - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 4] - mova m2, [r3 + 16 * 8] - mova m3, [r3 + 16 * 12] - mova m4, [r3 + 16 * 16] - mova m5, [r3 + 16 * 20] - mova m6, [r3 + 16 * 24] - mova m7, [r3 + 16 * 28] -%endif - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - IDCT32X32_34 16*0, 16*32, 16*64, 16*96 - lea stp, [stp + 16 * 8] - mov r6, 4 - lea stp, [rsp + pass_one_start] - lea r9, [rsp + pass_one_start] - -idct32x32_34_2: - lea r4, [rsp + transposed_in] - mov r3, r9 - -idct32x32_34_transpose_2: - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 1] - mova m2, [r3 + 16 * 2] - mova m3, [r3 + 16 * 3] - mova m4, [r3 + 16 * 4] - mova m5, [r3 + 16 * 5] - mova m6, [r3 + 16 * 6] - mova m7, [r3 + 16 * 7] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - IDCT32X32_34 16*0, 16*8, 16*16, 16*24 - - lea stp, [stp + 16 * 32] - add r9, 16 * 32 - dec r6 - jnz idct32x32_34_2 - - RECON_AND_STORE pass_two_start - - RET - -%macro IDCT32X32_135 4 - ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m1, [rsp + transposed_in + 16 * 1] - mova m11, m1 - pmulhrsw m1, [pw___804x2] ; stp1_16 - pmulhrsw m11, [pw_16364x2] ; stp2_31 - - mova m7, [rsp + transposed_in + 16 * 7] - mova m12, m7 - pmulhrsw m7, [pw_15426x2] ; stp1_28 - pmulhrsw m12, [pw_m5520x2] ; stp2_19 - - mova m3, [rsp + transposed_in + 16 * 9] - mova m4, m3 - pmulhrsw m3, [pw__7005x2] ; stp1_18 - pmulhrsw m4, [pw_14811x2] ; stp2_29 - - mova m0, [rsp + transposed_in + 16 * 15] - mova m2, m0 - pmulhrsw m0, [pw_12140x2] ; stp1_30 - pmulhrsw m2, [pw_m11003x2] ; stp2_17 - - ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 - SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 - SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 - SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 - - ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 - BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 - - ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 - SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 - SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 - SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 - - ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 - BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 - - mova [stp + %3 + idx16], m1 - mova [stp + %3 + idx17], m0 - mova [stp + %3 + idx18], m4 - mova [stp + %3 + idx19], m7 - mova [stp + %4 + idx28], m12 - mova [stp + %4 + idx29], m3 - mova [stp + %4 + idx30], m2 - mova [stp + %4 + idx31], m11 - - ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m2, [rsp + transposed_in + 16 * 3] - mova m3, m2 - pmulhrsw m3, [pw_m2404x2] ; stp1_23 - pmulhrsw m2, [pw_16207x2] ; stp2_24 - - mova m5, [rsp + transposed_in + 16 * 5] - mova m6, m5 - pmulhrsw m5, [pw__3981x2] ; stp1_20 - pmulhrsw m6, [pw_15893x2] ; stp2_27 - - mova m14, [rsp + transposed_in + 16 * 11] - mova m13, m14 - pmulhrsw m13, [pw_m8423x2] ; stp1_21 - pmulhrsw m14, [pw_14053x2] ; stp2_26 - - mova m0, [rsp + transposed_in + 16 * 13] - mova m1, m0 - pmulhrsw m0, [pw__9760x2] ; stp1_22 - pmulhrsw m1, [pw_13160x2] ; stp2_25 - - ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 - SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 - SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 - SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 - - ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 - BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 - - ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 - SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 - SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 - SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 - - ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 - BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 - - ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %3 + idx16] - mova m7, [stp + %3 + idx17] - mova m11, [stp + %3 + idx18] - mova m12, [stp + %3 + idx19] - SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 - SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 - SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 - SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 - mova [stp + %3 + idx16], m4 - mova [stp + %3 + idx17], m7 - mova [stp + %3 + idx18], m11 - mova [stp + %3 + idx19], m12 - - mova m4, [stp + %4 + idx28] - mova m7, [stp + %4 + idx29] - mova m11, [stp + %4 + idx30] - mova m12, [stp + %4 + idx31] - SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 - SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 - SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 - SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 - mova [stp + %4 + idx28], m4 - mova [stp + %4 + idx29], m7 - mova [stp + %4 + idx30], m11 - mova [stp + %4 + idx31], m12 - - ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 6, 5, 9 - pmulhrsw m6, m10 ; stp1_27 - pmulhrsw m5, m10 ; stp1_20 - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_26 - pmulhrsw m14, m10 ; stp1_21 - SUM_SUB 1, 0, 9 - pmulhrsw m1, m10 ; stp1_25 - pmulhrsw m0, m10 ; stp1_22 - SUM_SUB 2, 3, 9 - pmulhrsw m2, m10 ; stp1_25 - pmulhrsw m3, m10 ; stp1_22 -%else - BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 - SWAP 6, 5 - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 - SWAP 13, 14 - BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 - SWAP 1, 0 - BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 - SWAP 2, 3 -%endif - mova [stp + %3 + idx20], m5 - mova [stp + %3 + idx21], m14 - mova [stp + %3 + idx22], m0 - mova [stp + %3 + idx23], m3 - mova [stp + %4 + idx24], m2 - mova [stp + %4 + idx25], m1 - mova [stp + %4 + idx26], m13 - mova [stp + %4 + idx27], m6 - - ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 2] - mova m1, m0 - pmulhrsw m0, [pw__1606x2] ; stp1_8 - pmulhrsw m1, [pw_16305x2] ; stp2_15 - - mova m6, [rsp + transposed_in + 16 * 6] - mova m7, m6 - pmulhrsw m7, [pw_m4756x2] ; stp2_11 - pmulhrsw m6, [pw_15679x2] ; stp1_12 - - mova m4, [rsp + transposed_in + 16 * 10] - mova m5, m4 - pmulhrsw m4, [pw__7723x2] ; stp1_10 - pmulhrsw m5, [pw_14449x2] ; stp2_13 - - mova m2, [rsp + transposed_in + 16 * 14] - mova m3, m2 - pmulhrsw m3, [pw_m10394x2] ; stp1_9 - pmulhrsw m2, [pw_12665x2] ; stp2_14 - - ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 - SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 - SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 - SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 - - ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 - BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 - - ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 - SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 - SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 - SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 - - ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 5, 4, 9 - pmulhrsw m5, m10 ; stp1_13 - pmulhrsw m4, m10 ; stp1_10 - SUM_SUB 6, 7, 9 - pmulhrsw m6, m10 ; stp1_12 - pmulhrsw m7, m10 ; stp1_11 -%else - BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 - SWAP 5, 4 - BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 - SWAP 6, 7 -%endif - ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova [stp + %2 + idx8], m0 - mova [stp + %2 + idx9], m2 - mova [stp + %2 + idx10], m4 - mova [stp + %2 + idx11], m7 - mova [stp + %2 + idx12], m6 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m3 - mova [stp + %2 + idx15], m1 - - ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, [rsp + transposed_in + 16 * 4] - mova m12, m11 - pmulhrsw m11, [pw__3196x2] ; stp1_4 - pmulhrsw m12, [pw_16069x2] ; stp1_7 - - mova m13, [rsp + transposed_in + 16 * 12] - mova m14, m13 - pmulhrsw m13, [pw_13623x2] ; stp1_6 - pmulhrsw m14, [pw_m9102x2] ; stp1_5 - - ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 0] - mova m2, [rsp + transposed_in + 16 * 8] - pmulhrsw m0, [pw_11585x2] ; stp1_1 - mova m3, m2 - pmulhrsw m2, [pw__6270x2] ; stp1_2 - pmulhrsw m3, [pw_15137x2] ; stp1_3 - - SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 - SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 - - ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_6 - pmulhrsw m14, m10 ; stp1_5 -%else - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 - SWAP 13, 14 -%endif - mova m1, m0 ; stp1_0 = stp1_1 - SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 - SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 - - ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 - SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 - SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 - SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 - - ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %2 + idx12] - mova m5, [stp + %2 + idx13] - mova m6, [stp + %2 + idx14] - mova m7, [stp + %2 + idx15] - SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 - SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 - SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 - SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 - - ; 0-3, 28-31 final stage - mova m10, [stp + %4 + idx31] - mova m15, [stp + %4 + idx30] - SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 - SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 - mova [stp + %1 + idx0], m0 - mova [stp + %1 + idx1], m1 - mova [stp + %4 + idx31], m10 - mova [stp + %4 + idx30], m15 - mova m0, [stp + %4 + idx29] - mova m1, [stp + %4 + idx28] - SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 - SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 - mova [stp + %1 + idx2], m2 - mova [stp + %1 + idx3], m3 - mova [stp + %4 + idx29], m0 - mova [stp + %4 + idx28], m1 - - ; 12-15, 16-19 final stage - mova m0, [stp + %3 + idx16] - mova m1, [stp + %3 + idx17] - mova m2, [stp + %3 + idx18] - mova m3, [stp + %3 + idx19] - SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 - SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 - SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 - SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 - mova [stp + %2 + idx12], m4 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m6 - mova [stp + %2 + idx15], m7 - mova [stp + %3 + idx16], m0 - mova [stp + %3 + idx17], m1 - mova [stp + %3 + idx18], m2 - mova [stp + %3 + idx19], m3 - - mova m4, [stp + %2 + idx8] - mova m5, [stp + %2 + idx9] - mova m6, [stp + %2 + idx10] - mova m7, [stp + %2 + idx11] - SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 - SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 - SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 - SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 - - ; 4-7, 24-27 final stage - mova m3, [stp + %4 + idx24] - mova m2, [stp + %4 + idx25] - mova m1, [stp + %4 + idx26] - mova m0, [stp + %4 + idx27] - SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 - SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 - SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 - SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 - mova [stp + %4 + idx24], m3 - mova [stp + %4 + idx25], m2 - mova [stp + %4 + idx26], m1 - mova [stp + %4 + idx27], m0 - mova [stp + %1 + idx4], m11 - mova [stp + %1 + idx5], m14 - mova [stp + %1 + idx6], m13 - mova [stp + %1 + idx7], m12 - - ; 8-11, 20-23 final stage - mova m0, [stp + %3 + idx20] - mova m1, [stp + %3 + idx21] - mova m2, [stp + %3 + idx22] - mova m3, [stp + %3 + idx23] - SUM_SUB 7, 0, 9 ; stp1_11, stp_20 - SUM_SUB 6, 1, 9 ; stp1_10, stp_21 - SUM_SUB 5, 2, 9 ; stp1_9, stp_22 - SUM_SUB 4, 3, 9 ; stp1_8, stp_23 - mova [stp + %2 + idx8], m4 - mova [stp + %2 + idx9], m5 - mova [stp + %2 + idx10], m6 - mova [stp + %2 + idx11], m7 - mova [stp + %3 + idx20], m0 - mova [stp + %3 + idx21], m1 - mova [stp + %3 + idx22], m2 - mova [stp + %3 + idx23], m3 -%endmacro - -INIT_XMM ssse3 -cglobal idct32x32_135_add, 3, 11, 16, i32x32_size, input, output, stride - mova m8, [pd_8192] - mov r6, 2 - lea stp, [rsp + pass_one_start] - -idct32x32_135: - mov r3, inputq - lea r4, [rsp + transposed_in] - mov r7, 2 - -idct32x32_135_transpose: -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [r3 + 0] - packssdw m0, [r3 + 16] - mova m1, [r3 + 32 * 4] - packssdw m1, [r3 + 32 * 4 + 16] - mova m2, [r3 + 32 * 8] - packssdw m2, [r3 + 32 * 8 + 16] - mova m3, [r3 + 32 * 12] - packssdw m3, [r3 + 32 * 12 + 16] - mova m4, [r3 + 32 * 16] - packssdw m4, [r3 + 32 * 16 + 16] - mova m5, [r3 + 32 * 20] - packssdw m5, [r3 + 32 * 20 + 16] - mova m6, [r3 + 32 * 24] - packssdw m6, [r3 + 32 * 24 + 16] - mova m7, [r3 + 32 * 28] - packssdw m7, [r3 + 32 * 28 + 16] -%else - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 4] - mova m2, [r3 + 16 * 8] - mova m3, [r3 + 16 * 12] - mova m4, [r3 + 16 * 16] - mova m5, [r3 + 16 * 20] - mova m6, [r3 + 16 * 24] - mova m7, [r3 + 16 * 28] -%endif - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 - -%if CONFIG_VP9_HIGHBITDEPTH - add r3, 32 -%else - add r3, 16 -%endif - add r4, 16 * 8 - dec r7 - jne idct32x32_135_transpose - - IDCT32X32_135 16*0, 16*32, 16*64, 16*96 - lea stp, [stp + 16 * 8] -%if CONFIG_VP9_HIGHBITDEPTH - lea inputq, [inputq + 32 * 32] -%else - lea inputq, [inputq + 16 * 32] -%endif - dec r6 - jnz idct32x32_135 - - mov r6, 4 - lea stp, [rsp + pass_one_start] - lea r9, [rsp + pass_one_start] - -idct32x32_135_2: - lea r4, [rsp + transposed_in] - mov r3, r9 - mov r7, 2 - -idct32x32_135_transpose_2: - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 1] - mova m2, [r3 + 16 * 2] - mova m3, [r3 + 16 * 3] - mova m4, [r3 + 16 * 4] - mova m5, [r3 + 16 * 5] - mova m6, [r3 + 16 * 6] - mova m7, [r3 + 16 * 7] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 - - add r3, 16 * 8 - add r4, 16 * 8 - dec r7 - jne idct32x32_135_transpose_2 - - IDCT32X32_135 16*0, 16*8, 16*16, 16*24 - - lea stp, [stp + 16 * 32] - add r9, 16 * 32 - dec r6 - jnz idct32x32_135_2 - - RECON_AND_STORE pass_two_start - - RET - -%macro IDCT32X32_1024 4 - ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m1, [rsp + transposed_in + 16 * 1] - mova m11, [rsp + transposed_in + 16 * 31] - BUTTERFLY_4X 1, 11, 804, 16364, m8, 9, 10 ; stp1_16, stp1_31 - - mova m0, [rsp + transposed_in + 16 * 15] - mova m2, [rsp + transposed_in + 16 * 17] - BUTTERFLY_4X 2, 0, 12140, 11003, m8, 9, 10 ; stp1_17, stp1_30 - - mova m7, [rsp + transposed_in + 16 * 7] - mova m12, [rsp + transposed_in + 16 * 25] - BUTTERFLY_4X 12, 7, 15426, 5520, m8, 9, 10 ; stp1_19, stp1_28 - - mova m3, [rsp + transposed_in + 16 * 9] - mova m4, [rsp + transposed_in + 16 * 23] - BUTTERFLY_4X 3, 4, 7005, 14811, m8, 9, 10 ; stp1_18, stp1_29 - - ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 2, 9 ; stp2_16, stp2_17 - SUM_SUB 12, 3, 9 ; stp2_19, stp2_18 - SUM_SUB 7, 4, 9 ; stp2_28, stp2_29 - SUM_SUB 11, 0, 9 ; stp2_31, stp2_30 - - ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30 - BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18 - - ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 1, 12, 9 ; stp2_16, stp2_19 - SUM_SUB 0, 3, 9 ; stp2_17, stp2_18 - SUM_SUB 11, 7, 9 ; stp2_31, stp2_28 - SUM_SUB 2, 4, 9 ; stp2_30, stp2_29 - - ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29 - BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28 - - mova [stp + %3 + idx16], m1 - mova [stp + %3 + idx17], m0 - mova [stp + %3 + idx18], m4 - mova [stp + %3 + idx19], m7 - mova [stp + %4 + idx28], m12 - mova [stp + %4 + idx29], m3 - mova [stp + %4 + idx30], m2 - mova [stp + %4 + idx31], m11 - - ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m5, [rsp + transposed_in + 16 * 5] - mova m6, [rsp + transposed_in + 16 * 27] - BUTTERFLY_4X 5, 6, 3981, 15893, m8, 9, 10 ; stp1_20, stp1_27 - - mova m13, [rsp + transposed_in + 16 * 21] - mova m14, [rsp + transposed_in + 16 * 11] - BUTTERFLY_4X 13, 14, 14053, 8423, m8, 9, 10 ; stp1_21, stp1_26 - - mova m0, [rsp + transposed_in + 16 * 13] - mova m1, [rsp + transposed_in + 16 * 19] - BUTTERFLY_4X 0, 1, 9760, 13160, m8, 9, 10 ; stp1_22, stp1_25 - - mova m2, [rsp + transposed_in + 16 * 3] - mova m3, [rsp + transposed_in + 16 * 29] - BUTTERFLY_4X 3, 2, 16207, 2404, m8, 9, 10 ; stp1_23, stp1_24 - - ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 5, 13, 9 ; stp2_20, stp2_21 - SUM_SUB 3, 0, 9 ; stp2_23, stp2_22 - SUM_SUB 2, 1, 9 ; stp2_24, stp2_25 - SUM_SUB 6, 14, 9 ; stp2_27, stp2_26 - - ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26 - BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22 - - ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 3, 5, 9 ; stp2_23, stp2_20 - SUM_SUB 0, 14, 9 ; stp2_22, stp2_21 - SUM_SUB 2, 6, 9 ; stp2_24, stp2_27 - SUM_SUB 1, 13, 9 ; stp2_25, stp2_26 - - ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20 - BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21 - - ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %3 + idx16] - mova m7, [stp + %3 + idx17] - mova m11, [stp + %3 + idx18] - mova m12, [stp + %3 + idx19] - SUM_SUB 4, 3, 9 ; stp2_16, stp2_23 - SUM_SUB 7, 0, 9 ; stp2_17, stp2_22 - SUM_SUB 11, 14, 9 ; stp2_18, stp2_21 - SUM_SUB 12, 5, 9 ; stp2_19, stp2_20 - mova [stp + %3 + idx16], m4 - mova [stp + %3 + idx17], m7 - mova [stp + %3 + idx18], m11 - mova [stp + %3 + idx19], m12 - - mova m4, [stp + %4 + idx28] - mova m7, [stp + %4 + idx29] - mova m11, [stp + %4 + idx30] - mova m12, [stp + %4 + idx31] - SUM_SUB 4, 6, 9 ; stp2_28, stp2_27 - SUM_SUB 7, 13, 9 ; stp2_29, stp2_26 - SUM_SUB 11, 1, 9 ; stp2_30, stp2_25 - SUM_SUB 12, 2, 9 ; stp2_31, stp2_24 - mova [stp + %4 + idx28], m4 - mova [stp + %4 + idx29], m7 - mova [stp + %4 + idx30], m11 - mova [stp + %4 + idx31], m12 - - ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 6, 5, 9 - pmulhrsw m6, m10 ; stp1_27 - pmulhrsw m5, m10 ; stp1_20 - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_26 - pmulhrsw m14, m10 ; stp1_21 - SUM_SUB 1, 0, 9 - pmulhrsw m1, m10 ; stp1_25 - pmulhrsw m0, m10 ; stp1_22 - SUM_SUB 2, 3, 9 - pmulhrsw m2, m10 ; stp1_25 - pmulhrsw m3, m10 ; stp1_22 -%else - BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27 - SWAP 6, 5 - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26 - SWAP 13, 14 - BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25 - SWAP 1, 0 - BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24 - SWAP 2, 3 -%endif - mova [stp + %3 + idx20], m5 - mova [stp + %3 + idx21], m14 - mova [stp + %3 + idx22], m0 - mova [stp + %3 + idx23], m3 - mova [stp + %4 + idx24], m2 - mova [stp + %4 + idx25], m1 - mova [stp + %4 + idx26], m13 - mova [stp + %4 + idx27], m6 - - ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 2] - mova m1, [rsp + transposed_in + 16 * 30] - BUTTERFLY_4X 0, 1, 1606, 16305, m8, 9, 10 ; stp1_8, stp1_15 - - mova m2, [rsp + transposed_in + 16 * 14] - mova m3, [rsp + transposed_in + 16 * 18] - BUTTERFLY_4X 3, 2, 12665, 10394, m8, 9, 10 ; stp1_9, stp1_14 - - mova m4, [rsp + transposed_in + 16 * 10] - mova m5, [rsp + transposed_in + 16 * 22] - BUTTERFLY_4X 4, 5, 7723, 14449, m8, 9, 10 ; stp1_10, stp1_13 - - mova m6, [rsp + transposed_in + 16 * 6] - mova m7, [rsp + transposed_in + 16 * 26] - BUTTERFLY_4X 7, 6, 15679, 4756, m8, 9, 10 ; stp1_11, stp1_12 - - ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 3, 9 ; stp1_8, stp1_9 - SUM_SUB 7, 4, 9 ; stp1_11, stp1_10 - SUM_SUB 6, 5, 9 ; stp1_12, stp1_13 - SUM_SUB 1, 2, 9 ; stp1_15, stp1_14 - - ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14 - BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10 - - ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 7, 9 ; stp1_8, stp1_11 - SUM_SUB 2, 4, 9 ; stp1_9, stp1_10 - SUM_SUB 1, 6, 9 ; stp1_15, stp1_12 - SUM_SUB 3, 5, 9 ; stp1_14, stp1_13 - - ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 5, 4, 9 - pmulhrsw m5, m10 ; stp1_13 - pmulhrsw m4, m10 ; stp1_10 - SUM_SUB 6, 7, 9 - pmulhrsw m6, m10 ; stp1_12 - pmulhrsw m7, m10 ; stp1_11 -%else - BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13 - SWAP 5, 4 - BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12 - SWAP 6, 7 -%endif - ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova [stp + %2 + idx8], m0 - mova [stp + %2 + idx9], m2 - mova [stp + %2 + idx10], m4 - mova [stp + %2 + idx11], m7 - mova [stp + %2 + idx12], m6 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m3 - mova [stp + %2 + idx15], m1 - - ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; - ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m11, [rsp + transposed_in + 16 * 4] - mova m12, [rsp + transposed_in + 16 * 28] - BUTTERFLY_4X 11, 12, 3196, 16069, m8, 9, 10 ; stp1_4, stp1_7 - - mova m13, [rsp + transposed_in + 16 * 12] - mova m14, [rsp + transposed_in + 16 * 20] - BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_5, stp1_6 - - ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m0, [rsp + transposed_in + 16 * 0] - mova m1, [rsp + transposed_in + 16 * 16] - -%if 0 ; overflow occurs in SUM_SUB when using test streams - mova m10, [pw_11585x2] - SUM_SUB 0, 1, 9 - pmulhrsw m0, m10 ; stp1_1 - pmulhrsw m1, m10 ; stp1_0 -%else - BUTTERFLY_4X 0, 1, 11585, 11585, m8, 9, 10 ; stp1_1, stp1_0 - SWAP 0, 1 -%endif - mova m2, [rsp + transposed_in + 16 * 8] - mova m3, [rsp + transposed_in + 16 * 24] - BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_2, stp1_3 - - mova m10, [pw_11585x2] - SUM_SUB 11, 14, 9 ; stp1_4, stp1_5 - SUM_SUB 12, 13, 9 ; stp1_7, stp1_6 - - ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -%if 0 ; overflow occurs in SUM_SUB when using test streams - SUM_SUB 13, 14, 9 - pmulhrsw m13, m10 ; stp1_6 - pmulhrsw m14, m10 ; stp1_5 -%else - BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6 - SWAP 13, 14 -%endif - SUM_SUB 0, 3, 9 ; stp1_0, stp1_3 - SUM_SUB 1, 2, 9 ; stp1_1, stp1_2 - - ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SUM_SUB 0, 12, 9 ; stp1_0, stp1_7 - SUM_SUB 1, 13, 9 ; stp1_1, stp1_6 - SUM_SUB 2, 14, 9 ; stp1_2, stp1_5 - SUM_SUB 3, 11, 9 ; stp1_3, stp1_4 - - ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - mova m4, [stp + %2 + idx12] - mova m5, [stp + %2 + idx13] - mova m6, [stp + %2 + idx14] - mova m7, [stp + %2 + idx15] - SUM_SUB 0, 7, 9 ; stp1_0, stp1_15 - SUM_SUB 1, 6, 9 ; stp1_1, stp1_14 - SUM_SUB 2, 5, 9 ; stp1_2, stp1_13 - SUM_SUB 3, 4, 9 ; stp1_3, stp1_12 - - ; 0-3, 28-31 final stage - mova m10, [stp + %4 + idx31] - mova m15, [stp + %4 + idx30] - SUM_SUB 0, 10, 9 ; stp1_0, stp1_31 - SUM_SUB 1, 15, 9 ; stp1_1, stp1_30 - mova [stp + %1 + idx0], m0 - mova [stp + %1 + idx1], m1 - mova [stp + %4 + idx31], m10 - mova [stp + %4 + idx30], m15 - mova m0, [stp + %4 + idx29] - mova m1, [stp + %4 + idx28] - SUM_SUB 2, 0, 9 ; stp1_2, stp1_29 - SUM_SUB 3, 1, 9 ; stp1_3, stp1_28 - mova [stp + %1 + idx2], m2 - mova [stp + %1 + idx3], m3 - mova [stp + %4 + idx29], m0 - mova [stp + %4 + idx28], m1 - - ; 12-15, 16-19 final stage - mova m0, [stp + %3 + idx16] - mova m1, [stp + %3 + idx17] - mova m2, [stp + %3 + idx18] - mova m3, [stp + %3 + idx19] - SUM_SUB 7, 0, 9 ; stp1_15, stp1_16 - SUM_SUB 6, 1, 9 ; stp1_14, stp1_17 - SUM_SUB 5, 2, 9 ; stp1_13, stp1_18 - SUM_SUB 4, 3, 9 ; stp1_12, stp1_19 - mova [stp + %2 + idx12], m4 - mova [stp + %2 + idx13], m5 - mova [stp + %2 + idx14], m6 - mova [stp + %2 + idx15], m7 - mova [stp + %3 + idx16], m0 - mova [stp + %3 + idx17], m1 - mova [stp + %3 + idx18], m2 - mova [stp + %3 + idx19], m3 - - mova m4, [stp + %2 + idx8] - mova m5, [stp + %2 + idx9] - mova m6, [stp + %2 + idx10] - mova m7, [stp + %2 + idx11] - SUM_SUB 11, 7, 9 ; stp1_4, stp1_11 - SUM_SUB 14, 6, 9 ; stp1_5, stp1_10 - SUM_SUB 13, 5, 9 ; stp1_6, stp1_9 - SUM_SUB 12, 4, 9 ; stp1_7, stp1_8 - - ; 4-7, 24-27 final stage - mova m3, [stp + %4 + idx24] - mova m2, [stp + %4 + idx25] - mova m1, [stp + %4 + idx26] - mova m0, [stp + %4 + idx27] - SUM_SUB 12, 3, 9 ; stp1_7, stp1_24 - SUM_SUB 13, 2, 9 ; stp1_6, stp1_25 - SUM_SUB 14, 1, 9 ; stp1_5, stp1_26 - SUM_SUB 11, 0, 9 ; stp1_4, stp1_27 - mova [stp + %4 + idx24], m3 - mova [stp + %4 + idx25], m2 - mova [stp + %4 + idx26], m1 - mova [stp + %4 + idx27], m0 - mova [stp + %1 + idx4], m11 - mova [stp + %1 + idx5], m14 - mova [stp + %1 + idx6], m13 - mova [stp + %1 + idx7], m12 - - ; 8-11, 20-23 final stage - mova m0, [stp + %3 + idx20] - mova m1, [stp + %3 + idx21] - mova m2, [stp + %3 + idx22] - mova m3, [stp + %3 + idx23] - SUM_SUB 7, 0, 9 ; stp1_11, stp_20 - SUM_SUB 6, 1, 9 ; stp1_10, stp_21 - SUM_SUB 5, 2, 9 ; stp1_9, stp_22 - SUM_SUB 4, 3, 9 ; stp1_8, stp_23 - mova [stp + %2 + idx8], m4 - mova [stp + %2 + idx9], m5 - mova [stp + %2 + idx10], m6 - mova [stp + %2 + idx11], m7 - mova [stp + %3 + idx20], m0 - mova [stp + %3 + idx21], m1 - mova [stp + %3 + idx22], m2 - mova [stp + %3 + idx23], m3 -%endmacro - -INIT_XMM ssse3 -cglobal idct32x32_1024_add, 3, 11, 16, i32x32_size, input, output, stride - mova m8, [pd_8192] - mov r6, 4 - lea stp, [rsp + pass_one_start] - -idct32x32_1024: - mov r3, inputq - lea r4, [rsp + transposed_in] - mov r7, 4 - -idct32x32_1024_transpose: -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [r3 + 0] - packssdw m0, [r3 + 16] - mova m1, [r3 + 32 * 4] - packssdw m1, [r3 + 32 * 4 + 16] - mova m2, [r3 + 32 * 8] - packssdw m2, [r3 + 32 * 8 + 16] - mova m3, [r3 + 32 * 12] - packssdw m3, [r3 + 32 * 12 + 16] - mova m4, [r3 + 32 * 16] - packssdw m4, [r3 + 32 * 16 + 16] - mova m5, [r3 + 32 * 20] - packssdw m5, [r3 + 32 * 20 + 16] - mova m6, [r3 + 32 * 24] - packssdw m6, [r3 + 32 * 24 + 16] - mova m7, [r3 + 32 * 28] - packssdw m7, [r3 + 32 * 28 + 16] -%else - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 4] - mova m2, [r3 + 16 * 8] - mova m3, [r3 + 16 * 12] - mova m4, [r3 + 16 * 16] - mova m5, [r3 + 16 * 20] - mova m6, [r3 + 16 * 24] - mova m7, [r3 + 16 * 28] -%endif - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 -%if CONFIG_VP9_HIGHBITDEPTH - add r3, 32 -%else - add r3, 16 -%endif - add r4, 16 * 8 - dec r7 - jne idct32x32_1024_transpose - - IDCT32X32_1024 16*0, 16*32, 16*64, 16*96 - - lea stp, [stp + 16 * 8] -%if CONFIG_VP9_HIGHBITDEPTH - lea inputq, [inputq + 32 * 32] -%else - lea inputq, [inputq + 16 * 32] -%endif - dec r6 - jnz idct32x32_1024 - - mov r6, 4 - lea stp, [rsp + pass_one_start] - lea r9, [rsp + pass_one_start] - -idct32x32_1024_2: - lea r4, [rsp + transposed_in] - mov r3, r9 - mov r7, 4 - -idct32x32_1024_transpose_2: - mova m0, [r3 + 0] - mova m1, [r3 + 16 * 1] - mova m2, [r3 + 16 * 2] - mova m3, [r3 + 16 * 3] - mova m4, [r3 + 16 * 4] - mova m5, [r3 + 16 * 5] - mova m6, [r3 + 16 * 6] - mova m7, [r3 + 16 * 7] - - TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - - mova [r4 + 0], m0 - mova [r4 + 16 * 1], m1 - mova [r4 + 16 * 2], m2 - mova [r4 + 16 * 3], m3 - mova [r4 + 16 * 4], m4 - mova [r4 + 16 * 5], m5 - mova [r4 + 16 * 6], m6 - mova [r4 + 16 * 7], m7 - - add r3, 16 * 8 - add r4, 16 * 8 - dec r7 - jne idct32x32_1024_transpose_2 - - IDCT32X32_1024 16*0, 16*8, 16*16, 16*24 - - lea stp, [stp + 16 * 32] - add r9, 16 * 32 - dec r6 - jnz idct32x32_1024_2 - - RECON_AND_STORE pass_two_start - - RET -%endif diff --git a/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm deleted file mode 100644 index fbbcd76bd7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/inv_wht_sse2.asm +++ /dev/null @@ -1,109 +0,0 @@ -; -; Copyright (c) 2015 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro REORDER_INPUTS 0 - ; a c d b to a b c d - SWAP 1, 3, 2 -%endmacro - -%macro TRANSFORM_COLS 0 - ; input: - ; m0 a - ; m1 b - ; m2 c - ; m3 d - paddw m0, m2 - psubw m3, m1 - - ; wide subtract - punpcklwd m4, m0 - punpcklwd m5, m3 - psrad m4, 16 - psrad m5, 16 - psubd m4, m5 - psrad m4, 1 - packssdw m4, m4 ; e - - psubw m5, m4, m1 ; b - psubw m4, m2 ; c - psubw m0, m5 - paddw m3, m4 - ; m0 a - SWAP 1, 5 ; m1 b - SWAP 2, 4 ; m2 c - ; m3 d -%endmacro - -%macro TRANSPOSE_4X4 0 - punpcklwd m0, m2 - punpcklwd m1, m3 - mova m2, m0 - punpcklwd m0, m1 - punpckhwd m2, m1 - pshufd m1, m0, 0x0e - pshufd m3, m2, 0x0e -%endmacro - -; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3 -%macro TRANSPOSE_4X4_WIDE 0 - mova m3, m0 - punpcklwd m0, m1 - punpckhwd m3, m1 - mova m2, m0 - punpcklwd m0, m3 - punpckhwd m2, m3 - pshufd m1, m0, 0x0e - pshufd m3, m2, 0x0e -%endmacro - -%macro ADD_STORE_4P_2X 5 ; src1, src2, tmp1, tmp2, zero - movd m%3, [outputq] - movd m%4, [outputq + strideq] - punpcklbw m%3, m%5 - punpcklbw m%4, m%5 - paddw m%1, m%3 - paddw m%2, m%4 - packuswb m%1, m%5 - packuswb m%2, m%5 - movd [outputq], m%1 - movd [outputq + strideq], m%2 -%endmacro - -INIT_XMM sse2 -cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride -%if CONFIG_VP9_HIGHBITDEPTH - mova m0, [inputq + 0] - packssdw m0, [inputq + 16] - mova m1, [inputq + 32] - packssdw m1, [inputq + 48] -%else - mova m0, [inputq + 0] - mova m1, [inputq + 16] -%endif - psraw m0, 2 - psraw m1, 2 - - TRANSPOSE_4X4_WIDE - REORDER_INPUTS - TRANSFORM_COLS - TRANSPOSE_4X4 - REORDER_INPUTS - TRANSFORM_COLS - - pxor m4, m4 - ADD_STORE_4P_2X 0, 1, 5, 6, 4 - lea outputq, [outputq + 2 * strideq] - ADD_STORE_4P_2X 2, 3, 5, 6, 4 - - RET diff --git a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c deleted file mode 100644 index be1087c1e9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_avx2.c +++ /dev/null @@ -1,979 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <immintrin.h> /* AVX2 */ - -#include "./vpx_dsp_rtcd.h" -#include "vpx_ports/mem.h" - -void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - __m128i mask, hev, flat, flat2; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; - __m128i abs_p1p0; - - const __m128i thresh = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _thresh[0])); - const __m128i limit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _limit[0])); - const __m128i blimit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _blimit[0])); - - q4p4 = _mm_loadl_epi64((__m128i *) (s - 5 * p)); - q4p4 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *) (s + 4 * p))); - q3p3 = _mm_loadl_epi64((__m128i *) (s - 4 * p)); - q3p3 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *) (s + 3 * p))); - q2p2 = _mm_loadl_epi64((__m128i *) (s - 3 * p)); - q2p2 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *) (s + 2 * p))); - q1p1 = _mm_loadl_epi64((__m128i *) (s - 2 * p)); - q1p1 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *) (s + 1 * p))); - p1q1 = _mm_shuffle_epi32(q1p1, 78); - q0p0 = _mm_loadl_epi64((__m128i *) (s - 1 * p)); - q0p0 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *) (s - 0 * p))); - p0q0 = _mm_shuffle_epi32(q0p0, 78); - - { - __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; - abs_p1p0 = _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), - _mm_subs_epu8(q0p0, q1p1)); - abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - fe = _mm_set1_epi8(0xfe); - ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - abs_p0q0 = _mm_or_si128(_mm_subs_epu8(q0p0, p0q0), - _mm_subs_epu8(p0q0, q0p0)); - abs_p1q1 = _mm_or_si128(_mm_subs_epu8(q1p1, p1q1), - _mm_subs_epu8(p1q1, q1p1)); - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q2p2, q1p1), - _mm_subs_epu8(q1p1, q2p2)), - _mm_or_si128(_mm_subs_epu8(q3p3, q2p2), - _mm_subs_epu8(q2p2, q3p3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i t1 = _mm_set1_epi16(0x1); - __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); - __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); - __m128i qs0 = _mm_xor_si128(p0q0, t80); - __m128i qs1 = _mm_xor_si128(p1q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; - __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; - - filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, qs0ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - filter1 = _mm_unpacklo_epi8(zero, filter1); - filter1 = _mm_srai_epi16(filter1, 0xB); - filter2 = _mm_unpacklo_epi8(zero, filter2); - filter2 = _mm_srai_epi16(filter2, 0xB); - - /* Filter1 >> 3 */ - filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); - qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); - - /* filt >> 1 */ - filt = _mm_adds_epi16(filter1, t1); - filt = _mm_srai_epi16(filt, 1); - filt = _mm_andnot_si128( - _mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), filt); - filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); - qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); - // loopfilter done - - { - __m128i work; - flat = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q2p2, q0p0), - _mm_subs_epu8(q0p0, q2p2)), - _mm_or_si128(_mm_subs_epu8(q3p3, q0p0), - _mm_subs_epu8(q0p0, q3p3))); - flat = _mm_max_epu8(abs_p1p0, flat); - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - - q5p5 = _mm_loadl_epi64((__m128i *) (s - 6 * p)); - q5p5 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q5p5), - (__m64 *) (s + 5 * p))); - - q6p6 = _mm_loadl_epi64((__m128i *) (s - 7 * p)); - q6p6 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q6p6), - (__m64 *) (s + 6 * p))); - - flat2 = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q4p4, q0p0), - _mm_subs_epu8(q0p0, q4p4)), - _mm_or_si128(_mm_subs_epu8(q5p5, q0p0), - _mm_subs_epu8(q0p0, q5p5))); - - q7p7 = _mm_loadl_epi64((__m128i *) (s - 8 * p)); - q7p7 = _mm_castps_si128( - _mm_loadh_pi(_mm_castsi128_ps(q7p7), - (__m64 *) (s + 7 * p))); - - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q6p6, q0p0), - _mm_subs_epu8(q0p0, q6p6)), - _mm_or_si128(_mm_subs_epu8(q7p7, q0p0), - _mm_subs_epu8(q0p0, q7p7))); - - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - { - const __m128i eight = _mm_set1_epi16(8); - const __m128i four = _mm_set1_epi16(4); - __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; - __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; - __m128i pixelFilter_p, pixelFilter_q; - __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; - __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; - - p7_16 = _mm_unpacklo_epi8(q7p7, zero); - p6_16 = _mm_unpacklo_epi8(q6p6, zero); - p5_16 = _mm_unpacklo_epi8(q5p5, zero); - p4_16 = _mm_unpacklo_epi8(q4p4, zero); - p3_16 = _mm_unpacklo_epi8(q3p3, zero); - p2_16 = _mm_unpacklo_epi8(q2p2, zero); - p1_16 = _mm_unpacklo_epi8(q1p1, zero); - p0_16 = _mm_unpacklo_epi8(q0p0, zero); - q0_16 = _mm_unpackhi_epi8(q0p0, zero); - q1_16 = _mm_unpackhi_epi8(q1p1, zero); - q2_16 = _mm_unpackhi_epi8(q2p2, zero); - q3_16 = _mm_unpackhi_epi8(q3p3, zero); - q4_16 = _mm_unpackhi_epi8(q4p4, zero); - q5_16 = _mm_unpackhi_epi8(q5p5, zero); - q6_16 = _mm_unpackhi_epi8(q6p6, zero); - q7_16 = _mm_unpackhi_epi8(q7p7, zero); - - pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), - _mm_add_epi16(p4_16, p3_16)); - pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), - _mm_add_epi16(q4_16, q3_16)); - - pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, - _mm_add_epi16(p2_16, p1_16)); - pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, - _mm_add_epi16(q2_16, q1_16)); - pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - pixelFilter_p = _mm_add_epi16(eight, - _mm_add_epi16(pixelFilter_p, pixelFilter_q)); - pixetFilter_p2p1p0 = _mm_add_epi16(four, - _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7_16, p0_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7_16, q0_16)), - 4); - flat2_q0p0 = _mm_packus_epi16(res_p, res_q); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(p3_16, p0_16)), 3); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(q3_16, q0_16)), 3); - - flat_q0p0 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(p7_16, p7_16); - sum_q7 = _mm_add_epi16(q7_16, q7_16); - sum_p3 = _mm_add_epi16(p3_16, p3_16); - sum_q3 = _mm_add_epi16(q3_16, q3_16); - - pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1_16)), - 4); - flat2_q1p1 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p1_16)), 3); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q1_16)), 3); - flat_q1p1 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - sum_p3 = _mm_add_epi16(sum_p3, p3_16); - sum_q3 = _mm_add_epi16(sum_q3, q3_16); - - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2_16)), - 4); - flat2_q2p2 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); - - res_p = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p2_16)), 3); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q2_16)), 3); - flat_q2p2 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3_16)), - 4); - flat2_q3p3 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4_16)), - 4); - flat2_q4p4 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5_16)), - 4); - flat2_q5p5 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); - res_p = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6_16)), - 4); - res_q = _mm_srli_epi16( - _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6_16)), - 4); - flat2_q6p6 = _mm_packus_epi16(res_p, res_q); - } - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - flat = _mm_shuffle_epi32(flat, 68); - flat2 = _mm_shuffle_epi32(flat2, 68); - - q2p2 = _mm_andnot_si128(flat, q2p2); - flat_q2p2 = _mm_and_si128(flat, flat_q2p2); - q2p2 = _mm_or_si128(q2p2, flat_q2p2); - - qs1ps1 = _mm_andnot_si128(flat, qs1ps1); - flat_q1p1 = _mm_and_si128(flat, flat_q1p1); - q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); - - qs0ps0 = _mm_andnot_si128(flat, qs0ps0); - flat_q0p0 = _mm_and_si128(flat, flat_q0p0); - q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); - - q6p6 = _mm_andnot_si128(flat2, q6p6); - flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); - q6p6 = _mm_or_si128(q6p6, flat2_q6p6); - _mm_storel_epi64((__m128i *) (s - 7 * p), q6p6); - _mm_storeh_pi((__m64 *) (s + 6 * p), _mm_castsi128_ps(q6p6)); - - q5p5 = _mm_andnot_si128(flat2, q5p5); - flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); - q5p5 = _mm_or_si128(q5p5, flat2_q5p5); - _mm_storel_epi64((__m128i *) (s - 6 * p), q5p5); - _mm_storeh_pi((__m64 *) (s + 5 * p), _mm_castsi128_ps(q5p5)); - - q4p4 = _mm_andnot_si128(flat2, q4p4); - flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); - q4p4 = _mm_or_si128(q4p4, flat2_q4p4); - _mm_storel_epi64((__m128i *) (s - 5 * p), q4p4); - _mm_storeh_pi((__m64 *) (s + 4 * p), _mm_castsi128_ps(q4p4)); - - q3p3 = _mm_andnot_si128(flat2, q3p3); - flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); - q3p3 = _mm_or_si128(q3p3, flat2_q3p3); - _mm_storel_epi64((__m128i *) (s - 4 * p), q3p3); - _mm_storeh_pi((__m64 *) (s + 3 * p), _mm_castsi128_ps(q3p3)); - - q2p2 = _mm_andnot_si128(flat2, q2p2); - flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); - q2p2 = _mm_or_si128(q2p2, flat2_q2p2); - _mm_storel_epi64((__m128i *) (s - 3 * p), q2p2); - _mm_storeh_pi((__m64 *) (s + 2 * p), _mm_castsi128_ps(q2p2)); - - q1p1 = _mm_andnot_si128(flat2, q1p1); - flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); - q1p1 = _mm_or_si128(q1p1, flat2_q1p1); - _mm_storel_epi64((__m128i *) (s - 2 * p), q1p1); - _mm_storeh_pi((__m64 *) (s + 1 * p), _mm_castsi128_ps(q1p1)); - - q0p0 = _mm_andnot_si128(flat2, q0p0); - flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); - q0p0 = _mm_or_si128(q0p0, flat2_q0p0); - _mm_storel_epi64((__m128i *) (s - 1 * p), q0p0); - _mm_storeh_pi((__m64 *) (s - 0 * p), _mm_castsi128_ps(q0p0)); - } -} - -DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { - 0, 128, 1, 128, 2, 128, 3, 128, 4, 128, 5, 128, 6, 128, 7, 128, - 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 -}; - -void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - __m128i mask, hev, flat, flat2; - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - __m128i p7, p6, p5; - __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; - __m128i q5, q6, q7; - __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, - q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, - p256_0, q256_0; - - const __m128i thresh = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _thresh[0])); - const __m128i limit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _limit[0])); - const __m128i blimit = _mm_broadcastb_epi8( - _mm_cvtsi32_si128((int) _blimit[0])); - - p256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 5 * p))); - p256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 4 * p))); - p256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 3 * p))); - p256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 2 * p))); - p256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 1 * p))); - q256_0 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 0 * p))); - q256_1 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 1 * p))); - q256_2 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 2 * p))); - q256_3 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 3 * p))); - q256_4 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 4 * p))); - - p4 = _mm256_castsi256_si128(p256_4); - p3 = _mm256_castsi256_si128(p256_3); - p2 = _mm256_castsi256_si128(p256_2); - p1 = _mm256_castsi256_si128(p256_1); - p0 = _mm256_castsi256_si128(p256_0); - q0 = _mm256_castsi256_si128(q256_0); - q1 = _mm256_castsi256_si128(q256_1); - q2 = _mm256_castsi256_si128(q256_2); - q3 = _mm256_castsi256_si128(q256_3); - q4 = _mm256_castsi256_si128(q256_4); - - { - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), - _mm_subs_epu8(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), - _mm_subs_epu8(q0, q1)); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), - _mm_subs_epu8(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), - _mm_subs_epu8(q1, p1)); - __m128i work; - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p2, p1), _mm_subs_epu8(p1, p2)), - _mm_or_si128(_mm_subs_epu8(p3, p2), _mm_subs_epu8(p2, p3))); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), - _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - - __m128i ps1 = _mm_xor_si128(p1, t80); - __m128i ps0 = _mm_xor_si128(p0, t80); - __m128i qs0 = _mm_xor_si128(q0, t80); - __m128i qs1 = _mm_xor_si128(q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - __m128i flat2_p6, flat2_p5, flat2_p4, flat2_p3, flat2_p2, flat2_p1, - flat2_p0, flat2_q0, flat2_q1, flat2_q2, flat2_q3, flat2_q4, - flat2_q5, flat2_q6, flat_p2, flat_p1, flat_p0, flat_q0, flat_q1, - flat_q2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - /* Filter1 >> 3 */ - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - - /* Filter2 >> 3 */ - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - - /* filt >> 1 */ - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - filt = _mm_andnot_si128(hev, filt); - ps1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - qs1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - // loopfilter done - - { - __m128i work; - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p2, p0), _mm_subs_epu8(p0, p2)), - _mm_or_si128(_mm_subs_epu8(q2, q0), _mm_subs_epu8(q0, q2))); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p3, p0), _mm_subs_epu8(p0, p3)), - _mm_or_si128(_mm_subs_epu8(q3, q0), _mm_subs_epu8(q0, q3))); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p4, p0), _mm_subs_epu8(p0, p4)), - _mm_or_si128(_mm_subs_epu8(q4, q0), _mm_subs_epu8(q0, q4))); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - - p256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 6 * p))); - q256_5 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 5 * p))); - p5 = _mm256_castsi256_si128(p256_5); - q5 = _mm256_castsi256_si128(q256_5); - flat2 = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p5, p0), _mm_subs_epu8(p0, p5)), - _mm_or_si128(_mm_subs_epu8(q5, q0), _mm_subs_epu8(q0, q5))); - - flat2 = _mm_max_epu8(work, flat2); - p256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 7 * p))); - q256_6 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 6 * p))); - p6 = _mm256_castsi256_si128(p256_6); - q6 = _mm256_castsi256_si128(q256_6); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p6, p0), _mm_subs_epu8(p0, p6)), - _mm_or_si128(_mm_subs_epu8(q6, q0), _mm_subs_epu8(q0, q6))); - - flat2 = _mm_max_epu8(work, flat2); - - p256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s - 8 * p))); - q256_7 = _mm256_castpd_si256(_mm256_broadcast_pd( - (__m128d const *)(s + 7 * p))); - p7 = _mm256_castsi256_si128(p256_7); - q7 = _mm256_castsi256_si128(q256_7); - work = _mm_max_epu8( - _mm_or_si128(_mm_subs_epu8(p7, p0), _mm_subs_epu8(p0, p7)), - _mm_or_si128(_mm_subs_epu8(q7, q0), _mm_subs_epu8(q0, q7))); - - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - { - const __m256i eight = _mm256_set1_epi16(8); - const __m256i four = _mm256_set1_epi16(4); - __m256i pixelFilter_p, pixelFilter_q, pixetFilter_p2p1p0, - pixetFilter_q2q1q0, sum_p7, sum_q7, sum_p3, sum_q3, res_p, - res_q; - - const __m256i filter = _mm256_load_si256( - (__m256i const *)filt_loopfilter_avx2); - p256_7 = _mm256_shuffle_epi8(p256_7, filter); - p256_6 = _mm256_shuffle_epi8(p256_6, filter); - p256_5 = _mm256_shuffle_epi8(p256_5, filter); - p256_4 = _mm256_shuffle_epi8(p256_4, filter); - p256_3 = _mm256_shuffle_epi8(p256_3, filter); - p256_2 = _mm256_shuffle_epi8(p256_2, filter); - p256_1 = _mm256_shuffle_epi8(p256_1, filter); - p256_0 = _mm256_shuffle_epi8(p256_0, filter); - q256_0 = _mm256_shuffle_epi8(q256_0, filter); - q256_1 = _mm256_shuffle_epi8(q256_1, filter); - q256_2 = _mm256_shuffle_epi8(q256_2, filter); - q256_3 = _mm256_shuffle_epi8(q256_3, filter); - q256_4 = _mm256_shuffle_epi8(q256_4, filter); - q256_5 = _mm256_shuffle_epi8(q256_5, filter); - q256_6 = _mm256_shuffle_epi8(q256_6, filter); - q256_7 = _mm256_shuffle_epi8(q256_7, filter); - - pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5), - _mm256_add_epi16(p256_4, p256_3)); - pixelFilter_q = _mm256_add_epi16(_mm256_add_epi16(q256_6, q256_5), - _mm256_add_epi16(q256_4, q256_3)); - - pixetFilter_p2p1p0 = _mm256_add_epi16(p256_0, - _mm256_add_epi16(p256_2, p256_1)); - pixelFilter_p = _mm256_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm256_add_epi16(q256_0, - _mm256_add_epi16(q256_2, q256_1)); - pixelFilter_q = _mm256_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - - pixelFilter_p = _mm256_add_epi16(eight, - _mm256_add_epi16(pixelFilter_p, pixelFilter_q)); - - pixetFilter_p2p1p0 = _mm256_add_epi16(four, - _mm256_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(p256_7, p256_0)), 4); - - flat2_p0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(q256_7, q256_0)), 4); - - flat2_q0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(p256_3, p256_0)), 3); - - flat_p0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(q256_3, q256_0)), 3); - - flat_q0 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(p256_7, p256_7); - - sum_q7 = _mm256_add_epi16(q256_7, q256_7); - - sum_p3 = _mm256_add_epi16(p256_3, p256_3); - - sum_q3 = _mm256_add_epi16(q256_3, q256_3); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_p, p256_6); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_6); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_1)), 4); - - flat2_p1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_1)), 4); - - flat2_q1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_p2p1p0, p256_2); - - pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_2); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(sum_p3, p256_1)), 3); - - flat_p1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_q2q1q0, - _mm256_add_epi16(sum_q3, q256_1)), 3); - - flat_q1 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - sum_p3 = _mm256_add_epi16(sum_p3, p256_3); - - sum_q3 = _mm256_add_epi16(sum_q3, q256_3); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_5); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_5); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_2)), 4); - - flat2_p2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_2)), 4); - - flat2_q2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_1); - - pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_q2q1q0, p256_1); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_p2p1p0, - _mm256_add_epi16(sum_p3, p256_2)), 3); - - flat_p2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixetFilter_q2q1q0, - _mm256_add_epi16(sum_q3, q256_2)), 3); - - flat_q2 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_4); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_4); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_3)), 4); - - flat2_p3 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_3)), 4); - - flat2_q3 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_3); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_3); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_4)), 4); - - flat2_p4 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_4)), 4); - - flat2_q4 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_2); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_2); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_5)), 4); - - flat2_p5 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_5)), 4); - - flat2_q5 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - - sum_p7 = _mm256_add_epi16(sum_p7, p256_7); - - sum_q7 = _mm256_add_epi16(sum_q7, q256_7); - - pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_1); - - pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_1); - - res_p = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_p, - _mm256_add_epi16(sum_p7, p256_6)), 4); - - flat2_p6 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), - 168)); - - res_q = _mm256_srli_epi16( - _mm256_add_epi16(pixelFilter_q, - _mm256_add_epi16(sum_q7, q256_6)), 4); - - flat2_q6 = _mm256_castsi256_si128( - _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), - 168)); - } - - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - p2 = _mm_andnot_si128(flat, p2); - flat_p2 = _mm_and_si128(flat, flat_p2); - p2 = _mm_or_si128(flat_p2, p2); - - p1 = _mm_andnot_si128(flat, ps1); - flat_p1 = _mm_and_si128(flat, flat_p1); - p1 = _mm_or_si128(flat_p1, p1); - - p0 = _mm_andnot_si128(flat, ps0); - flat_p0 = _mm_and_si128(flat, flat_p0); - p0 = _mm_or_si128(flat_p0, p0); - - q0 = _mm_andnot_si128(flat, qs0); - flat_q0 = _mm_and_si128(flat, flat_q0); - q0 = _mm_or_si128(flat_q0, q0); - - q1 = _mm_andnot_si128(flat, qs1); - flat_q1 = _mm_and_si128(flat, flat_q1); - q1 = _mm_or_si128(flat_q1, q1); - - q2 = _mm_andnot_si128(flat, q2); - flat_q2 = _mm_and_si128(flat, flat_q2); - q2 = _mm_or_si128(flat_q2, q2); - - p6 = _mm_andnot_si128(flat2, p6); - flat2_p6 = _mm_and_si128(flat2, flat2_p6); - p6 = _mm_or_si128(flat2_p6, p6); - _mm_storeu_si128((__m128i *) (s - 7 * p), p6); - - p5 = _mm_andnot_si128(flat2, p5); - flat2_p5 = _mm_and_si128(flat2, flat2_p5); - p5 = _mm_or_si128(flat2_p5, p5); - _mm_storeu_si128((__m128i *) (s - 6 * p), p5); - - p4 = _mm_andnot_si128(flat2, p4); - flat2_p4 = _mm_and_si128(flat2, flat2_p4); - p4 = _mm_or_si128(flat2_p4, p4); - _mm_storeu_si128((__m128i *) (s - 5 * p), p4); - - p3 = _mm_andnot_si128(flat2, p3); - flat2_p3 = _mm_and_si128(flat2, flat2_p3); - p3 = _mm_or_si128(flat2_p3, p3); - _mm_storeu_si128((__m128i *) (s - 4 * p), p3); - - p2 = _mm_andnot_si128(flat2, p2); - flat2_p2 = _mm_and_si128(flat2, flat2_p2); - p2 = _mm_or_si128(flat2_p2, p2); - _mm_storeu_si128((__m128i *) (s - 3 * p), p2); - - p1 = _mm_andnot_si128(flat2, p1); - flat2_p1 = _mm_and_si128(flat2, flat2_p1); - p1 = _mm_or_si128(flat2_p1, p1); - _mm_storeu_si128((__m128i *) (s - 2 * p), p1); - - p0 = _mm_andnot_si128(flat2, p0); - flat2_p0 = _mm_and_si128(flat2, flat2_p0); - p0 = _mm_or_si128(flat2_p0, p0); - _mm_storeu_si128((__m128i *) (s - 1 * p), p0); - - q0 = _mm_andnot_si128(flat2, q0); - flat2_q0 = _mm_and_si128(flat2, flat2_q0); - q0 = _mm_or_si128(flat2_q0, q0); - _mm_storeu_si128((__m128i *) (s - 0 * p), q0); - - q1 = _mm_andnot_si128(flat2, q1); - flat2_q1 = _mm_and_si128(flat2, flat2_q1); - q1 = _mm_or_si128(flat2_q1, q1); - _mm_storeu_si128((__m128i *) (s + 1 * p), q1); - - q2 = _mm_andnot_si128(flat2, q2); - flat2_q2 = _mm_and_si128(flat2, flat2_q2); - q2 = _mm_or_si128(flat2_q2, q2); - _mm_storeu_si128((__m128i *) (s + 2 * p), q2); - - q3 = _mm_andnot_si128(flat2, q3); - flat2_q3 = _mm_and_si128(flat2, flat2_q3); - q3 = _mm_or_si128(flat2_q3, q3); - _mm_storeu_si128((__m128i *) (s + 3 * p), q3); - - q4 = _mm_andnot_si128(flat2, q4); - flat2_q4 = _mm_and_si128(flat2, flat2_q4); - q4 = _mm_or_si128(flat2_q4, q4); - _mm_storeu_si128((__m128i *) (s + 4 * p), q4); - - q5 = _mm_andnot_si128(flat2, q5); - flat2_q5 = _mm_and_si128(flat2, flat2_q5); - q5 = _mm_or_si128(flat2_q5, q5); - _mm_storeu_si128((__m128i *) (s + 5 * p), q5); - - q6 = _mm_andnot_si128(flat2, q6); - flat2_q6 = _mm_and_si128(flat2, flat2_q6); - q6 = _mm_or_si128(flat2_q6, q6); - _mm_storeu_si128((__m128i *) (s + 6 * p), q6); - } -} diff --git a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c b/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c deleted file mode 100644 index 739adf31d0..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/loopfilter_sse2.c +++ /dev/null @@ -1,1776 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <emmintrin.h> // SSE2 - -#include "./vpx_dsp_rtcd.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/emmintrin_compat.h" - -static INLINE __m128i abs_diff(__m128i a, __m128i b) { - return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); -} - -// filter_mask and hev_mask -#define FILTER_HEV_MASK do { \ - /* (abs(q1 - q0), abs(p1 - p0) */ \ - __m128i flat = abs_diff(q1p1, q0p0); \ - /* abs(p1 - q1), abs(p0 - q0) */ \ - const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0); \ - __m128i abs_p0q0, abs_p1q1, work; \ - \ - /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \ - hev = _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ - hev = _mm_cmpgt_epi16(hev, thresh); \ - hev = _mm_packs_epi16(hev, hev); \ - \ - /* const int8_t mask = filter_mask(*limit, *blimit, */ \ - /* p3, p2, p1, p0, q0, q1, q2, q3); */ \ - abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */\ - abs_p1q1 = _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */\ - abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9); \ - abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */ \ - /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \ - mask = _mm_adds_epu8(abs_p0q0, abs_p1q1); \ - /* abs(p3 - p2), abs(p2 - p1) */ \ - work = abs_diff(p3p2, p2p1); \ - flat = _mm_max_epu8(work, flat); \ - /* abs(q3 - q2), abs(q2 - q1) */ \ - work = abs_diff(q3q2, q2q1); \ - flat = _mm_max_epu8(work, flat); \ - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ - mask = _mm_unpacklo_epi64(mask, flat); \ - mask = _mm_subs_epu8(mask, limit); \ - mask = _mm_cmpeq_epi8(mask, zero); \ - mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \ -} while (0) - -#define FILTER4 do { \ - const __m128i t3t4 = _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, \ - 4, 4, 4, 4, 4, 4, 4, 4); \ - const __m128i t80 = _mm_set1_epi8(0x80); \ - __m128i filter, filter2filter1, work; \ - \ - ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \ - qs1qs0 = _mm_xor_si128(q1q0, t80); \ - \ - /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */ \ - work = _mm_subs_epi8(ps1ps0, qs1qs0); \ - filter = _mm_and_si128(_mm_srli_si128(work, 8), hev); \ - /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */ \ - filter = _mm_subs_epi8(filter, work); \ - filter = _mm_subs_epi8(filter, work); \ - filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */ \ - filter = _mm_and_si128(filter, mask); /* & mask */ \ - filter = _mm_unpacklo_epi64(filter, filter); \ - \ - /* filter1 = signed_char_clamp(filter + 4) >> 3; */ \ - /* filter2 = signed_char_clamp(filter + 3) >> 3; */ \ - filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */ \ - filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1); \ - filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1); \ - filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */ \ - filter = _mm_srai_epi16(filter, 11); /* >> 3 */ \ - filter2filter1 = _mm_packs_epi16(filter2filter1, filter); \ - \ - /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */ \ - filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */ \ - filter = _mm_unpacklo_epi8(filter, filter); \ - filter = _mm_srai_epi16(filter, 9); /* round */ \ - filter = _mm_packs_epi16(filter, filter); \ - filter = _mm_andnot_si128(hev, filter); \ - \ - hev = _mm_unpackhi_epi64(filter2filter1, filter); \ - filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter); \ - \ - /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */ \ - qs1qs0 = _mm_subs_epi8(qs1qs0, filter2filter1); \ - /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */ \ - ps1ps0 = _mm_adds_epi8(ps1ps0, hev); \ - qs1qs0 = _mm_xor_si128(qs1qs0, t80); /* ^ 0x80 */ \ - ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \ -} while (0) - -void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */, - const uint8_t *_blimit, const uint8_t *_limit, - const uint8_t *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i limit = - _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), - _mm_loadl_epi64((const __m128i *)_limit)); - const __m128i thresh = - _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); - const __m128i ff = _mm_cmpeq_epi8(zero, zero); - __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; - __m128i mask, hev; - - p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), - _mm_loadl_epi64((__m128i *)(s - 4 * p))); - q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 1 * p))); - q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - _mm_loadl_epi64((__m128i *)(s + 0 * p))); - q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 3 * p))); - p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); - p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); - q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); - q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); - - FILTER_HEV_MASK; - FILTER4; - - _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0)); // *op1 - _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0); // *op0 - _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0); // *oq0 - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0)); // *oq1 -} - -void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */, - const uint8_t *_blimit, const uint8_t *_limit, - const uint8_t *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i limit = - _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit), - _mm_loadl_epi64((const __m128i *)_limit)); - const __m128i thresh = - _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero); - const __m128i ff = _mm_cmpeq_epi8(zero, zero); - __m128i x0, x1, x2, x3; - __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; - __m128i mask, hev; - - // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 - q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 1 * p - 4))); - - // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 - x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 3 * p - 4))); - - // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 - x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 5 * p - 4))); - - // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 - x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * p - 4)), - _mm_loadl_epi64((__m128i *)(s + 7 * p - 4))); - - // Transpose 8x8 - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - p1p0 = _mm_unpacklo_epi16(q1q0, x1); - // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 - x0 = _mm_unpacklo_epi16(x2, x3); - // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 - p3p2 = _mm_unpacklo_epi32(p1p0, x0); - // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 - p1p0 = _mm_unpackhi_epi32(p1p0, x0); - p3p2 = _mm_unpackhi_epi64(p3p2, _mm_slli_si128(p3p2, 8)); // swap lo and high - p1p0 = _mm_unpackhi_epi64(p1p0, _mm_slli_si128(p1p0, 8)); // swap lo and high - - // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 - q1q0 = _mm_unpackhi_epi16(q1q0, x1); - // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 - x2 = _mm_unpackhi_epi16(x2, x3); - // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 - q3q2 = _mm_unpackhi_epi32(q1q0, x2); - // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 - q1q0 = _mm_unpacklo_epi32(q1q0, x2); - - q0p0 = _mm_unpacklo_epi64(p1p0, q1q0); - q1p1 = _mm_unpackhi_epi64(p1p0, q1q0); - p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); - p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); - q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); - - FILTER_HEV_MASK; - FILTER4; - - // Transpose 8x4 to 4x8 - // qs1qs0: 20 21 22 23 24 25 26 27 30 31 32 33 34 34 36 37 - // ps1ps0: 10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07 - // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 - ps1ps0 = _mm_unpackhi_epi64(ps1ps0, _mm_slli_si128(ps1ps0, 8)); - // 10 30 11 31 12 32 13 33 14 34 15 35 16 36 17 37 - x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0); - // 00 20 01 21 02 22 03 23 04 24 05 25 06 26 07 27 - ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0); - // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 - qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0); - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); - - *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - ps1ps0 = _mm_srli_si128(ps1ps0, 4); - *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0); - - *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0); - qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0); - qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0); - qs1qs0 = _mm_srli_si128(qs1qs0, 4); - *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0); -} - -void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); - __m128i mask, hev, flat, flat2; - __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; - __m128i abs_p1p0; - - q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * p)); - q4p4 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q4p4), - (__m64 *)(s + 4 * p))); - q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * p)); - q3p3 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q3p3), - (__m64 *)(s + 3 * p))); - q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * p)); - q2p2 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q2p2), - (__m64 *)(s + 2 * p))); - q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * p)); - q1p1 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q1p1), - (__m64 *)(s + 1 * p))); - p1q1 = _mm_shuffle_epi32(q1p1, 78); - q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * p)); - q0p0 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q0p0), - (__m64 *)(s - 0 * p))); - p0q0 = _mm_shuffle_epi32(q0p0, 78); - - { - __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; - abs_p1p0 = abs_diff(q1p1, q0p0); - abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - fe = _mm_set1_epi8(0xfe); - ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - abs_p0q0 = abs_diff(q0p0, p0q0); - abs_p1q1 = abs_diff(q1p1, p1q1); - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epu8(abs_diff(q2p2, q1p1), - abs_diff(q3p3, q2p2)); - mask = _mm_max_epu8(work, mask); - mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i t1 = _mm_set1_epi16(0x1); - __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); - __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); - __m128i qs0 = _mm_xor_si128(p0q0, t80); - __m128i qs1 = _mm_xor_si128(p1q1, t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; - __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; - - filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, qs0ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - filter1 = _mm_unpacklo_epi8(zero, filter1); - filter1 = _mm_srai_epi16(filter1, 0xB); - filter2 = _mm_unpacklo_epi8(zero, filter2); - filter2 = _mm_srai_epi16(filter2, 0xB); - - // Filter1 >> 3 - filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); - qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); - - // filt >> 1 - filt = _mm_adds_epi16(filter1, t1); - filt = _mm_srai_epi16(filt, 1); - filt = _mm_andnot_si128(_mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), - filt); - filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); - qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); - // loopfilter done - - { - __m128i work; - flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); - flat = _mm_max_epu8(abs_p1p0, flat); - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - - q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * p)); - q5p5 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q5p5), - (__m64 *)(s + 5 * p))); - - q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * p)); - q6p6 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q6p6), - (__m64 *)(s + 6 * p))); - flat2 = _mm_max_epu8(abs_diff(q4p4, q0p0), abs_diff(q5p5, q0p0)); - - q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * p)); - q7p7 = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(q7p7), - (__m64 *)(s + 7 * p))); - work = _mm_max_epu8(abs_diff(q6p6, q0p0), abs_diff(q7p7, q0p0)); - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // flat and wide flat calculations - { - const __m128i eight = _mm_set1_epi16(8); - const __m128i four = _mm_set1_epi16(4); - __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; - __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; - __m128i pixelFilter_p, pixelFilter_q; - __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; - __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; - - p7_16 = _mm_unpacklo_epi8(q7p7, zero);; - p6_16 = _mm_unpacklo_epi8(q6p6, zero); - p5_16 = _mm_unpacklo_epi8(q5p5, zero); - p4_16 = _mm_unpacklo_epi8(q4p4, zero); - p3_16 = _mm_unpacklo_epi8(q3p3, zero); - p2_16 = _mm_unpacklo_epi8(q2p2, zero); - p1_16 = _mm_unpacklo_epi8(q1p1, zero); - p0_16 = _mm_unpacklo_epi8(q0p0, zero); - q0_16 = _mm_unpackhi_epi8(q0p0, zero); - q1_16 = _mm_unpackhi_epi8(q1p1, zero); - q2_16 = _mm_unpackhi_epi8(q2p2, zero); - q3_16 = _mm_unpackhi_epi8(q3p3, zero); - q4_16 = _mm_unpackhi_epi8(q4p4, zero); - q5_16 = _mm_unpackhi_epi8(q5p5, zero); - q6_16 = _mm_unpackhi_epi8(q6p6, zero); - q7_16 = _mm_unpackhi_epi8(q7p7, zero); - - pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), - _mm_add_epi16(p4_16, p3_16)); - pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), - _mm_add_epi16(q4_16, q3_16)); - - pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); - pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); - - pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); - pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); - pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, - pixelFilter_q)); - pixetFilter_p2p1p0 = _mm_add_epi16(four, - _mm_add_epi16(pixetFilter_p2p1p0, - pixetFilter_q2q1q0)); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(p7_16, p0_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(q7_16, q0_16)), 4); - flat2_q0p0 = _mm_packus_epi16(res_p, res_q); - res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(p3_16, p0_16)), 3); - res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(q3_16, q0_16)), 3); - - flat_q0p0 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(p7_16, p7_16); - sum_q7 = _mm_add_epi16(q7_16, q7_16); - sum_p3 = _mm_add_epi16(p3_16, p3_16); - sum_q3 = _mm_add_epi16(q3_16, q3_16); - - pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p1_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q1_16)), 4); - flat2_q1p1 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p1_16)), 3); - res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q1_16)), 3); - flat_q1p1 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - sum_p3 = _mm_add_epi16(sum_p3, p3_16); - sum_q3 = _mm_add_epi16(sum_q3, q3_16); - - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p2_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q2_16)), 4); - flat2_q2p2 = _mm_packus_epi16(res_p, res_q); - - pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); - pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); - - res_p = _mm_srli_epi16(_mm_add_epi16(pixetFilter_p2p1p0, - _mm_add_epi16(sum_p3, p2_16)), 3); - res_q = _mm_srli_epi16(_mm_add_epi16(pixetFilter_q2q1q0, - _mm_add_epi16(sum_q3, q2_16)), 3); - flat_q2p2 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p3_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q3_16)), 4); - flat2_q3p3 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p4_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q4_16)), 4); - flat2_q4p4 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p5_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q5_16)), 4); - flat2_q5p5 = _mm_packus_epi16(res_p, res_q); - - sum_p7 = _mm_add_epi16(sum_p7, p7_16); - sum_q7 = _mm_add_epi16(sum_q7, q7_16); - pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); - pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); - res_p = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, - _mm_add_epi16(sum_p7, p6_16)), 4); - res_q = _mm_srli_epi16(_mm_add_epi16(pixelFilter_q, - _mm_add_epi16(sum_q7, q6_16)), 4); - flat2_q6p6 = _mm_packus_epi16(res_p, res_q); - } - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - flat = _mm_shuffle_epi32(flat, 68); - flat2 = _mm_shuffle_epi32(flat2, 68); - - q2p2 = _mm_andnot_si128(flat, q2p2); - flat_q2p2 = _mm_and_si128(flat, flat_q2p2); - q2p2 = _mm_or_si128(q2p2, flat_q2p2); - - qs1ps1 = _mm_andnot_si128(flat, qs1ps1); - flat_q1p1 = _mm_and_si128(flat, flat_q1p1); - q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); - - qs0ps0 = _mm_andnot_si128(flat, qs0ps0); - flat_q0p0 = _mm_and_si128(flat, flat_q0p0); - q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); - - q6p6 = _mm_andnot_si128(flat2, q6p6); - flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); - q6p6 = _mm_or_si128(q6p6, flat2_q6p6); - _mm_storel_epi64((__m128i *)(s - 7 * p), q6p6); - _mm_storeh_pi((__m64 *)(s + 6 * p), _mm_castsi128_ps(q6p6)); - - q5p5 = _mm_andnot_si128(flat2, q5p5); - flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); - q5p5 = _mm_or_si128(q5p5, flat2_q5p5); - _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5); - _mm_storeh_pi((__m64 *)(s + 5 * p), _mm_castsi128_ps(q5p5)); - - q4p4 = _mm_andnot_si128(flat2, q4p4); - flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); - q4p4 = _mm_or_si128(q4p4, flat2_q4p4); - _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4); - _mm_storeh_pi((__m64 *)(s + 4 * p), _mm_castsi128_ps(q4p4)); - - q3p3 = _mm_andnot_si128(flat2, q3p3); - flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); - q3p3 = _mm_or_si128(q3p3, flat2_q3p3); - _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3); - _mm_storeh_pi((__m64 *)(s + 3 * p), _mm_castsi128_ps(q3p3)); - - q2p2 = _mm_andnot_si128(flat2, q2p2); - flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); - q2p2 = _mm_or_si128(q2p2, flat2_q2p2); - _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2); - _mm_storeh_pi((__m64 *)(s + 2 * p), _mm_castsi128_ps(q2p2)); - - q1p1 = _mm_andnot_si128(flat2, q1p1); - flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); - q1p1 = _mm_or_si128(q1p1, flat2_q1p1); - _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1); - _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(q1p1)); - - q0p0 = _mm_andnot_si128(flat2, q0p0); - flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); - q0p0 = _mm_or_si128(q0p0, flat2_q0p0); - _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0); - _mm_storeh_pi((__m64 *)(s - 0 * p), _mm_castsi128_ps(q0p0)); - } -} - -static INLINE __m128i filter_add2_sub2(const __m128i *const total, - const __m128i *const a1, - const __m128i *const a2, - const __m128i *const s1, - const __m128i *const s2) { - __m128i x = _mm_add_epi16(*a1, *total); - x = _mm_add_epi16(_mm_sub_epi16(x, _mm_add_epi16(*s1, *s2)), *a2); - return x; -} - -static INLINE __m128i filter8_mask(const __m128i *const flat, - const __m128i *const other_filt, - const __m128i *const f8_lo, - const __m128i *const f8_hi) { - const __m128i f8 = _mm_packus_epi16(_mm_srli_epi16(*f8_lo, 3), - _mm_srli_epi16(*f8_hi, 3)); - const __m128i result = _mm_and_si128(*flat, f8); - return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); -} - -static INLINE __m128i filter16_mask(const __m128i *const flat, - const __m128i *const other_filt, - const __m128i *const f_lo, - const __m128i *const f_hi) { - const __m128i f = _mm_packus_epi16(_mm_srli_epi16(*f_lo, 4), - _mm_srli_epi16(*f_hi, 4)); - const __m128i result = _mm_and_si128(*flat, f); - return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); -} - -void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - const __m128i zero = _mm_set1_epi16(0); - const __m128i one = _mm_set1_epi8(1); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); - __m128i mask, hev, flat, flat2; - __m128i p7, p6, p5; - __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; - __m128i q5, q6, q7; - - __m128i op2, op1, op0, oq0, oq1, oq2; - - __m128i max_abs_p1p0q1q0; - - p7 = _mm_loadu_si128((__m128i *)(s - 8 * p)); - p6 = _mm_loadu_si128((__m128i *)(s - 7 * p)); - p5 = _mm_loadu_si128((__m128i *)(s - 6 * p)); - p4 = _mm_loadu_si128((__m128i *)(s - 5 * p)); - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - q4 = _mm_loadu_si128((__m128i *)(s + 4 * p)); - q5 = _mm_loadu_si128((__m128i *)(s + 5 * p)); - q6 = _mm_loadu_si128((__m128i *)(s + 6 * p)); - q7 = _mm_loadu_si128((__m128i *)(s + 7 * p)); - - { - const __m128i abs_p1p0 = abs_diff(p1, p0); - const __m128i abs_q1q0 = abs_diff(q1, q0); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(zero, zero); - __m128i abs_p0q0 = abs_diff(p0, q0); - __m128i abs_p1q1 = abs_diff(p1, q1); - __m128i work; - max_abs_p1p0q1q0 = _mm_max_epu8(abs_p1p0, abs_q1q0); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(max_abs_p1p0q1q0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8(abs_diff(p2, p1), abs_diff(p3, p2)); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8(abs_diff(q2, q1), abs_diff(q3, q2)); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - { - __m128i work; - work = _mm_max_epu8(abs_diff(p2, p0), abs_diff(q2, q0)); - flat = _mm_max_epu8(work, max_abs_p1p0q1q0); - work = _mm_max_epu8(abs_diff(p3, p0), abs_diff(q3, q0)); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8(abs_diff(p4, p0), abs_diff(q4, q0)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - flat2 = _mm_max_epu8(abs_diff(p5, p0), abs_diff(q5, q0)); - flat2 = _mm_max_epu8(work, flat2); - work = _mm_max_epu8(abs_diff(p6, p0), abs_diff(q6, q0)); - flat2 = _mm_max_epu8(work, flat2); - work = _mm_max_epu8(abs_diff(p7, p0), abs_diff(q7, q0)); - flat2 = _mm_max_epu8(work, flat2); - flat2 = _mm_subs_epu8(flat2, one); - flat2 = _mm_cmpeq_epi8(flat2, zero); - flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // filter4 - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - const __m128i ff = _mm_cmpeq_epi8(t4, t4); - - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - op1 = _mm_xor_si128(p1, t80); - op0 = _mm_xor_si128(p0, t80); - oq0 = _mm_xor_si128(q0, t80); - oq1 = _mm_xor_si128(q1, t80); - - hev = _mm_subs_epu8(max_abs_p1p0q1q0, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - filt = _mm_and_si128(_mm_subs_epi8(op1, oq1), hev); - - work_a = _mm_subs_epi8(oq0, op0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - oq0 = _mm_xor_si128(_mm_subs_epi8(oq0, filter1), t80); - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - op0 = _mm_xor_si128(_mm_adds_epi8(op0, filter2), t80); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - filt = _mm_andnot_si128(hev, filt); - op1 = _mm_xor_si128(_mm_adds_epi8(op1, filt), t80); - oq1 = _mm_xor_si128(_mm_subs_epi8(oq1, filt), t80); - // loopfilter done - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // filter8 - { - const __m128i four = _mm_set1_epi16(4); - const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); - const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); - const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); - const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); - const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); - const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); - const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); - const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); - - const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); - const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); - const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); - const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); - const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); - const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); - const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); - const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); - __m128i f8_lo, f8_hi; - - f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, four), - _mm_add_epi16(p3_lo, p2_lo)); - f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f8_lo), - _mm_add_epi16(p2_lo, p1_lo)); - f8_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f8_lo); - - f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, four), - _mm_add_epi16(p3_hi, p2_hi)); - f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f8_hi), - _mm_add_epi16(p2_hi, p1_hi)); - f8_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f8_hi); - - op2 = filter8_mask(&flat, &p2, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q1_lo, &p1_lo, &p2_lo, &p3_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q1_hi, &p1_hi, &p2_hi, &p3_hi); - op1 = filter8_mask(&flat, &op1, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q2_lo, &p0_lo, &p1_lo, &p3_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q2_hi, &p0_hi, &p1_hi, &p3_hi); - op0 = filter8_mask(&flat, &op0, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q0_lo, &p0_lo, &p3_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q0_hi, &p0_hi, &p3_hi); - oq0 = filter8_mask(&flat, &oq0, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q1_lo, &q0_lo, &p2_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q1_hi, &q0_hi, &p2_hi); - oq1 = filter8_mask(&flat, &oq1, &f8_lo, &f8_hi); - - f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q2_lo, &q1_lo, &p1_lo); - f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q2_hi, &q1_hi, &p1_hi); - oq2 = filter8_mask(&flat, &q2, &f8_lo, &f8_hi); - } - - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // wide flat calculations - { - const __m128i eight = _mm_set1_epi16(8); - const __m128i p7_lo = _mm_unpacklo_epi8(p7, zero); - const __m128i p6_lo = _mm_unpacklo_epi8(p6, zero); - const __m128i p5_lo = _mm_unpacklo_epi8(p5, zero); - const __m128i p4_lo = _mm_unpacklo_epi8(p4, zero); - const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); - const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); - const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); - const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); - const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); - const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); - const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); - const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); - const __m128i q4_lo = _mm_unpacklo_epi8(q4, zero); - const __m128i q5_lo = _mm_unpacklo_epi8(q5, zero); - const __m128i q6_lo = _mm_unpacklo_epi8(q6, zero); - const __m128i q7_lo = _mm_unpacklo_epi8(q7, zero); - - const __m128i p7_hi = _mm_unpackhi_epi8(p7, zero); - const __m128i p6_hi = _mm_unpackhi_epi8(p6, zero); - const __m128i p5_hi = _mm_unpackhi_epi8(p5, zero); - const __m128i p4_hi = _mm_unpackhi_epi8(p4, zero); - const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); - const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); - const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); - const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); - const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); - const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); - const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); - const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); - const __m128i q4_hi = _mm_unpackhi_epi8(q4, zero); - const __m128i q5_hi = _mm_unpackhi_epi8(q5, zero); - const __m128i q6_hi = _mm_unpackhi_epi8(q6, zero); - const __m128i q7_hi = _mm_unpackhi_epi8(q7, zero); - - __m128i f_lo; - __m128i f_hi; - - f_lo = _mm_sub_epi16(_mm_slli_epi16(p7_lo, 3), p7_lo); // p7 * 7 - f_lo = _mm_add_epi16(_mm_slli_epi16(p6_lo, 1), - _mm_add_epi16(p4_lo, f_lo)); - f_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f_lo), - _mm_add_epi16(p2_lo, p1_lo)); - f_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f_lo); - f_lo = _mm_add_epi16(_mm_add_epi16(p5_lo, eight), f_lo); - - f_hi = _mm_sub_epi16(_mm_slli_epi16(p7_hi, 3), p7_hi); // p7 * 7 - f_hi = _mm_add_epi16(_mm_slli_epi16(p6_hi, 1), - _mm_add_epi16(p4_hi, f_hi)); - f_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f_hi), - _mm_add_epi16(p2_hi, p1_hi)); - f_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f_hi); - f_hi = _mm_add_epi16(_mm_add_epi16(p5_hi, eight), f_hi); - - p6 = filter16_mask(&flat2, &p6, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 7 * p), p6); - - f_lo = filter_add2_sub2(&f_lo, &q1_lo, &p5_lo, &p6_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q1_hi, &p5_hi, &p6_hi, &p7_hi); - p5 = filter16_mask(&flat2, &p5, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 6 * p), p5); - - f_lo = filter_add2_sub2(&f_lo, &q2_lo, &p4_lo, &p5_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q2_hi, &p4_hi, &p5_hi, &p7_hi); - p4 = filter16_mask(&flat2, &p4, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 5 * p), p4); - - f_lo = filter_add2_sub2(&f_lo, &q3_lo, &p3_lo, &p4_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q3_hi, &p3_hi, &p4_hi, &p7_hi); - p3 = filter16_mask(&flat2, &p3, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 4 * p), p3); - - f_lo = filter_add2_sub2(&f_lo, &q4_lo, &p2_lo, &p3_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q4_hi, &p2_hi, &p3_hi, &p7_hi); - op2 = filter16_mask(&flat2, &op2, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 3 * p), op2); - - f_lo = filter_add2_sub2(&f_lo, &q5_lo, &p1_lo, &p2_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q5_hi, &p1_hi, &p2_hi, &p7_hi); - op1 = filter16_mask(&flat2, &op1, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 2 * p), op1); - - f_lo = filter_add2_sub2(&f_lo, &q6_lo, &p0_lo, &p1_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q6_hi, &p0_hi, &p1_hi, &p7_hi); - op0 = filter16_mask(&flat2, &op0, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 1 * p), op0); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q0_lo, &p0_lo, &p7_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q0_hi, &p0_hi, &p7_hi); - oq0 = filter16_mask(&flat2, &oq0, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s - 0 * p), oq0); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q1_lo, &p6_lo, &q0_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q1_hi, &p6_hi, &q0_hi); - oq1 = filter16_mask(&flat2, &oq1, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 1 * p), oq1); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q2_lo, &p5_lo, &q1_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q2_hi, &p5_hi, &q1_hi); - oq2 = filter16_mask(&flat2, &oq2, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 2 * p), oq2); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q3_lo, &p4_lo, &q2_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q3_hi, &p4_hi, &q2_hi); - q3 = filter16_mask(&flat2, &q3, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 3 * p), q3); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q4_lo, &p3_lo, &q3_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q4_hi, &p3_hi, &q3_hi); - q4 = filter16_mask(&flat2, &q4, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 4 * p), q4); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q5_lo, &p2_lo, &q4_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q5_hi, &p2_hi, &q4_hi); - q5 = filter16_mask(&flat2, &q5, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 5 * p), q5); - - f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q6_lo, &p1_lo, &q5_lo); - f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q6_hi, &p1_hi, &q5_hi); - q6 = filter16_mask(&flat2, &q6, &f_lo, &f_hi); - _mm_storeu_si128((__m128i *)(s + 6 * p), q6); - } - // wide flat - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - } -} - -void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh) { - DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); - const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = _mm_load_si128((const __m128i *)_blimit); - const __m128i limit = _mm_load_si128((const __m128i *)_limit); - const __m128i thresh = _mm_load_si128((const __m128i *)_thresh); - __m128i mask, hev, flat; - __m128i p3, p2, p1, p0, q0, q1, q2, q3; - __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; - - q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)), - _mm_loadl_epi64((__m128i *)(s + 3 * p))); - q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)), - _mm_loadl_epi64((__m128i *)(s + 2 * p))); - q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - _mm_loadl_epi64((__m128i *)(s + 1 * p))); - q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - _mm_loadl_epi64((__m128i *)(s - 0 * p))); - p1q1 = _mm_shuffle_epi32(q1p1, 78); - p0q0 = _mm_shuffle_epi32(q0p0, 78); - - { - // filter_mask and hev_mask - const __m128i one = _mm_set1_epi8(1); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(fe, fe); - __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; - abs_p1p0 = abs_diff(q1p1, q0p0); - abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); - - abs_p0q0 = abs_diff(q0p0, p0q0); - abs_p1q1 = abs_diff(q1p1, p1q1); - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(abs_p1p0, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - - work = _mm_max_epu8(abs_diff(q2p2, q1p1), - abs_diff(q3p3, q2p2)); - mask = _mm_max_epu8(work, mask); - mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - - // flat_mask4 - - flat = _mm_max_epu8(abs_diff(q2p2, q0p0), - abs_diff(q3p3, q0p0)); - flat = _mm_max_epu8(abs_p1p0, flat); - flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - } - - { - const __m128i four = _mm_set1_epi16(4); - unsigned char *src = s; - { - __m128i workp_a, workp_b, workp_shft; - p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); - p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); - p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); - p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); - q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); - q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); - q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); - q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); - - workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); - workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); - workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op2[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op1[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op0[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq0[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq1[0], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq2[0], - _mm_packus_epi16(workp_shft, workp_shft)); - } - } - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i ps1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - filter1 = _mm_unpacklo_epi8(zero, filter1); - filter1 = _mm_srai_epi16(filter1, 11); - filter1 = _mm_packs_epi16(filter1, filter1); - - // Filter2 >> 3 - filter2 = _mm_unpacklo_epi8(zero, filter2); - filter2 = _mm_srai_epi16(filter2, 11); - filter2 = _mm_packs_epi16(filter2, zero); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - filt = _mm_unpacklo_epi8(zero, filt); - filt = _mm_srai_epi16(filt, 9); - filt = _mm_packs_epi16(filt, zero); - - filt = _mm_andnot_si128(hev, filt); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - q0 = _mm_loadl_epi64((__m128i *)flat_oq0); - work_a = _mm_andnot_si128(flat, work_a); - q0 = _mm_and_si128(flat, q0); - q0 = _mm_or_si128(work_a, q0); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - q1 = _mm_loadl_epi64((__m128i *)flat_oq1); - work_a = _mm_andnot_si128(flat, work_a); - q1 = _mm_and_si128(flat, q1); - q1 = _mm_or_si128(work_a, q1); - - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q2 = _mm_loadl_epi64((__m128i *)flat_oq2); - work_a = _mm_andnot_si128(flat, work_a); - q2 = _mm_and_si128(flat, q2); - q2 = _mm_or_si128(work_a, q2); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - p0 = _mm_loadl_epi64((__m128i *)flat_op0); - work_a = _mm_andnot_si128(flat, work_a); - p0 = _mm_and_si128(flat, p0); - p0 = _mm_or_si128(work_a, p0); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - p1 = _mm_loadl_epi64((__m128i *)flat_op1); - work_a = _mm_andnot_si128(flat, work_a); - p1 = _mm_and_si128(flat, p1); - p1 = _mm_or_si128(work_a, p1); - - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p2 = _mm_loadl_epi64((__m128i *)flat_op2); - work_a = _mm_andnot_si128(flat, work_a); - p2 = _mm_and_si128(flat, p2); - p2 = _mm_or_si128(work_a, p2); - - _mm_storel_epi64((__m128i *)(s - 3 * p), p2); - _mm_storel_epi64((__m128i *)(s - 2 * p), p1); - _mm_storel_epi64((__m128i *)(s - 1 * p), p0); - _mm_storel_epi64((__m128i *)(s + 0 * p), q0); - _mm_storel_epi64((__m128i *)(s + 1 * p), q1); - _mm_storel_epi64((__m128i *)(s + 2 * p), q2); - } -} - -void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, - const uint8_t *_blimit0, - const uint8_t *_limit0, - const uint8_t *_thresh0, - const uint8_t *_blimit1, - const uint8_t *_limit1, - const uint8_t *_thresh1) { - DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); - DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); - const __m128i zero = _mm_set1_epi16(0); - const __m128i blimit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), - _mm_load_si128((const __m128i *)_blimit1)); - const __m128i limit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), - _mm_load_si128((const __m128i *)_limit1)); - const __m128i thresh = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), - _mm_load_si128((const __m128i *)_thresh1)); - - __m128i mask, hev, flat; - __m128i p3, p2, p1, p0, q0, q1, q2, q3; - - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - { - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), - _mm_subs_epu8(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), - _mm_subs_epu8(q0, q1)); - const __m128i one = _mm_set1_epi8(1); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), - _mm_subs_epu8(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), - _mm_subs_epu8(q1, p1)); - __m128i work; - - // filter_mask and hev_mask - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), - _mm_subs_epu8(p1, p2)), - _mm_or_si128(_mm_subs_epu8(p3, p2), - _mm_subs_epu8(p2, p3))); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), - _mm_subs_epu8(q1, q2)), - _mm_or_si128(_mm_subs_epu8(q3, q2), - _mm_subs_epu8(q2, q3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - - // flat_mask4 - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p0), - _mm_subs_epu8(p0, p2)), - _mm_or_si128(_mm_subs_epu8(q2, q0), - _mm_subs_epu8(q0, q2))); - flat = _mm_max_epu8(work, flat); - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p3, p0), - _mm_subs_epu8(p0, p3)), - _mm_or_si128(_mm_subs_epu8(q3, q0), - _mm_subs_epu8(q0, q3))); - flat = _mm_max_epu8(work, flat); - flat = _mm_subs_epu8(flat, one); - flat = _mm_cmpeq_epi8(flat, zero); - flat = _mm_and_si128(flat, mask); - } - { - const __m128i four = _mm_set1_epi16(4); - unsigned char *src = s; - int i = 0; - - do { - __m128i workp_a, workp_b, workp_shft; - p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * p)), zero); - p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * p)), zero); - p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * p)), zero); - p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * p)), zero); - q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * p)), zero); - q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * p)), zero); - q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * p)), zero); - q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * p)), zero); - - workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); - workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); - workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op2[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op1[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_op0[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq0[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq1[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); - workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); - workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); - _mm_storel_epi64((__m128i *)&flat_oq2[i * 8], - _mm_packus_epi16(workp_shft, workp_shft)); - - src += 8; - } while (++i < 2); - } - // lp filter - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - - const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - - filt = _mm_andnot_si128(hev, filt); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - q0 = _mm_load_si128((__m128i *)flat_oq0); - work_a = _mm_andnot_si128(flat, work_a); - q0 = _mm_and_si128(flat, q0); - q0 = _mm_or_si128(work_a, q0); - - work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - q1 = _mm_load_si128((__m128i *)flat_oq1); - work_a = _mm_andnot_si128(flat, work_a); - q1 = _mm_and_si128(flat, q1); - q1 = _mm_or_si128(work_a, q1); - - work_a = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q2 = _mm_load_si128((__m128i *)flat_oq2); - work_a = _mm_andnot_si128(flat, work_a); - q2 = _mm_and_si128(flat, q2); - q2 = _mm_or_si128(work_a, q2); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - p0 = _mm_load_si128((__m128i *)flat_op0); - work_a = _mm_andnot_si128(flat, work_a); - p0 = _mm_and_si128(flat, p0); - p0 = _mm_or_si128(work_a, p0); - - work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - p1 = _mm_load_si128((__m128i *)flat_op1); - work_a = _mm_andnot_si128(flat, work_a); - p1 = _mm_and_si128(flat, p1); - p1 = _mm_or_si128(work_a, p1); - - work_a = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p2 = _mm_load_si128((__m128i *)flat_op2); - work_a = _mm_andnot_si128(flat, work_a); - p2 = _mm_and_si128(flat, p2); - p2 = _mm_or_si128(work_a, p2); - - _mm_storeu_si128((__m128i *)(s - 3 * p), p2); - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); - _mm_storeu_si128((__m128i *)(s + 2 * p), q2); - } -} - -void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, - const unsigned char *_blimit0, - const unsigned char *_limit0, - const unsigned char *_thresh0, - const unsigned char *_blimit1, - const unsigned char *_limit1, - const unsigned char *_thresh1) { - const __m128i blimit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), - _mm_load_si128((const __m128i *)_blimit1)); - const __m128i limit = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_limit0), - _mm_load_si128((const __m128i *)_limit1)); - const __m128i thresh = - _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_thresh0), - _mm_load_si128((const __m128i *)_thresh1)); - const __m128i zero = _mm_set1_epi16(0); - __m128i p3, p2, p1, p0, q0, q1, q2, q3; - __m128i mask, hev, flat; - - p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); - p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); - p1 = _mm_loadu_si128((__m128i *)(s - 2 * p)); - p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); - q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); - q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); - q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); - q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); - - // filter_mask and hev_mask - { - const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), - _mm_subs_epu8(p0, p1)); - const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), - _mm_subs_epu8(q0, q1)); - const __m128i fe = _mm_set1_epi8(0xfe); - const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); - __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), - _mm_subs_epu8(q0, p0)); - __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), - _mm_subs_epu8(q1, p1)); - __m128i work; - - flat = _mm_max_epu8(abs_p1p0, abs_q1q0); - hev = _mm_subs_epu8(flat, thresh); - hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); - - abs_p0q0 =_mm_adds_epu8(abs_p0q0, abs_p0q0); - abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); - mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); - mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); - // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; - mask = _mm_max_epu8(flat, mask); - // mask |= (abs(p1 - p0) > limit) * -1; - // mask |= (abs(q1 - q0) > limit) * -1; - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(p2, p1), - _mm_subs_epu8(p1, p2)), - _mm_or_si128(_mm_subs_epu8(p3, p2), - _mm_subs_epu8(p2, p3))); - mask = _mm_max_epu8(work, mask); - work = _mm_max_epu8(_mm_or_si128(_mm_subs_epu8(q2, q1), - _mm_subs_epu8(q1, q2)), - _mm_or_si128(_mm_subs_epu8(q3, q2), - _mm_subs_epu8(q2, q3))); - mask = _mm_max_epu8(work, mask); - mask = _mm_subs_epu8(mask, limit); - mask = _mm_cmpeq_epi8(mask, zero); - } - - // filter4 - { - const __m128i t4 = _mm_set1_epi8(4); - const __m128i t3 = _mm_set1_epi8(3); - const __m128i t80 = _mm_set1_epi8(0x80); - const __m128i te0 = _mm_set1_epi8(0xe0); - const __m128i t1f = _mm_set1_epi8(0x1f); - const __m128i t1 = _mm_set1_epi8(0x1); - const __m128i t7f = _mm_set1_epi8(0x7f); - - const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * p)), - t80); - const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * p)), - t80); - const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * p)), - t80); - const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * p)), - t80); - __m128i filt; - __m128i work_a; - __m128i filter1, filter2; - - filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); - work_a = _mm_subs_epi8(qs0, ps0); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - filt = _mm_adds_epi8(filt, work_a); - // (vpx_filter + 3 * (qs0 - ps0)) & mask - filt = _mm_and_si128(filt, mask); - - filter1 = _mm_adds_epi8(filt, t4); - filter2 = _mm_adds_epi8(filt, t3); - - // Filter1 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter1); - filter1 = _mm_srli_epi16(filter1, 3); - work_a = _mm_and_si128(work_a, te0); - filter1 = _mm_and_si128(filter1, t1f); - filter1 = _mm_or_si128(filter1, work_a); - - // Filter2 >> 3 - work_a = _mm_cmpgt_epi8(zero, filter2); - filter2 = _mm_srli_epi16(filter2, 3); - work_a = _mm_and_si128(work_a, te0); - filter2 = _mm_and_si128(filter2, t1f); - filter2 = _mm_or_si128(filter2, work_a); - - // filt >> 1 - filt = _mm_adds_epi8(filter1, t1); - work_a = _mm_cmpgt_epi8(zero, filt); - filt = _mm_srli_epi16(filt, 1); - work_a = _mm_and_si128(work_a, t80); - filt = _mm_and_si128(filt, t7f); - filt = _mm_or_si128(filt, work_a); - - filt = _mm_andnot_si128(hev, filt); - - q0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); - q1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); - p0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); - p1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); - - _mm_storeu_si128((__m128i *)(s - 2 * p), p1); - _mm_storeu_si128((__m128i *)(s - 1 * p), p0); - _mm_storeu_si128((__m128i *)(s + 0 * p), q0); - _mm_storeu_si128((__m128i *)(s + 1 * p), q1); - } -} - -static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, - int in_p, unsigned char *out, int out_p) { - __m128i x0, x1, x2, x3, x4, x5, x6, x7; - __m128i x8, x9, x10, x11, x12, x13, x14, x15; - - // 2-way interleave w/hoisting of unpacks - x0 = _mm_loadl_epi64((__m128i *)in0); // 1 - x1 = _mm_loadl_epi64((__m128i *)(in0 + in_p)); // 3 - x0 = _mm_unpacklo_epi8(x0, x1); // 1 - - x2 = _mm_loadl_epi64((__m128i *)(in0 + 2 * in_p)); // 5 - x3 = _mm_loadl_epi64((__m128i *)(in0 + 3*in_p)); // 7 - x1 = _mm_unpacklo_epi8(x2, x3); // 2 - - x4 = _mm_loadl_epi64((__m128i *)(in0 + 4*in_p)); // 9 - x5 = _mm_loadl_epi64((__m128i *)(in0 + 5*in_p)); // 11 - x2 = _mm_unpacklo_epi8(x4, x5); // 3 - - x6 = _mm_loadl_epi64((__m128i *)(in0 + 6*in_p)); // 13 - x7 = _mm_loadl_epi64((__m128i *)(in0 + 7*in_p)); // 15 - x3 = _mm_unpacklo_epi8(x6, x7); // 4 - x4 = _mm_unpacklo_epi16(x0, x1); // 9 - - x8 = _mm_loadl_epi64((__m128i *)in1); // 2 - x9 = _mm_loadl_epi64((__m128i *)(in1 + in_p)); // 4 - x8 = _mm_unpacklo_epi8(x8, x9); // 5 - x5 = _mm_unpacklo_epi16(x2, x3); // 10 - - x10 = _mm_loadl_epi64((__m128i *)(in1 + 2 * in_p)); // 6 - x11 = _mm_loadl_epi64((__m128i *)(in1 + 3*in_p)); // 8 - x9 = _mm_unpacklo_epi8(x10, x11); // 6 - - x12 = _mm_loadl_epi64((__m128i *)(in1 + 4*in_p)); // 10 - x13 = _mm_loadl_epi64((__m128i *)(in1 + 5*in_p)); // 12 - x10 = _mm_unpacklo_epi8(x12, x13); // 7 - x12 = _mm_unpacklo_epi16(x8, x9); // 11 - - x14 = _mm_loadl_epi64((__m128i *)(in1 + 6*in_p)); // 14 - x15 = _mm_loadl_epi64((__m128i *)(in1 + 7*in_p)); // 16 - x11 = _mm_unpacklo_epi8(x14, x15); // 8 - x13 = _mm_unpacklo_epi16(x10, x11); // 12 - - x6 = _mm_unpacklo_epi32(x4, x5); // 13 - x7 = _mm_unpackhi_epi32(x4, x5); // 14 - x14 = _mm_unpacklo_epi32(x12, x13); // 15 - x15 = _mm_unpackhi_epi32(x12, x13); // 16 - - // Store first 4-line result - _mm_storeu_si128((__m128i *)out, _mm_unpacklo_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + out_p), _mm_unpackhi_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + 2 * out_p), _mm_unpacklo_epi64(x7, x15)); - _mm_storeu_si128((__m128i *)(out + 3 * out_p), _mm_unpackhi_epi64(x7, x15)); - - x4 = _mm_unpackhi_epi16(x0, x1); - x5 = _mm_unpackhi_epi16(x2, x3); - x12 = _mm_unpackhi_epi16(x8, x9); - x13 = _mm_unpackhi_epi16(x10, x11); - - x6 = _mm_unpacklo_epi32(x4, x5); - x7 = _mm_unpackhi_epi32(x4, x5); - x14 = _mm_unpacklo_epi32(x12, x13); - x15 = _mm_unpackhi_epi32(x12, x13); - - // Store second 4-line result - _mm_storeu_si128((__m128i *)(out + 4 * out_p), _mm_unpacklo_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + 5 * out_p), _mm_unpackhi_epi64(x6, x14)); - _mm_storeu_si128((__m128i *)(out + 6 * out_p), _mm_unpacklo_epi64(x7, x15)); - _mm_storeu_si128((__m128i *)(out + 7 * out_p), _mm_unpackhi_epi64(x7, x15)); -} - -static INLINE void transpose(unsigned char *src[], int in_p, - unsigned char *dst[], int out_p, - int num_8x8_to_transpose) { - int idx8x8 = 0; - __m128i x0, x1, x2, x3, x4, x5, x6, x7; - do { - unsigned char *in = src[idx8x8]; - unsigned char *out = dst[idx8x8]; - - x0 = _mm_loadl_epi64((__m128i *)(in + 0*in_p)); // 00 01 02 03 04 05 06 07 - x1 = _mm_loadl_epi64((__m128i *)(in + 1*in_p)); // 10 11 12 13 14 15 16 17 - // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 - x0 = _mm_unpacklo_epi8(x0, x1); - - x2 = _mm_loadl_epi64((__m128i *)(in + 2*in_p)); // 20 21 22 23 24 25 26 27 - x3 = _mm_loadl_epi64((__m128i *)(in + 3*in_p)); // 30 31 32 33 34 35 36 37 - // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 - x1 = _mm_unpacklo_epi8(x2, x3); - - x4 = _mm_loadl_epi64((__m128i *)(in + 4*in_p)); // 40 41 42 43 44 45 46 47 - x5 = _mm_loadl_epi64((__m128i *)(in + 5*in_p)); // 50 51 52 53 54 55 56 57 - // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 - x2 = _mm_unpacklo_epi8(x4, x5); - - x6 = _mm_loadl_epi64((__m128i *)(in + 6*in_p)); // 60 61 62 63 64 65 66 67 - x7 = _mm_loadl_epi64((__m128i *)(in + 7*in_p)); // 70 71 72 73 74 75 76 77 - // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 - x3 = _mm_unpacklo_epi8(x6, x7); - - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - x4 = _mm_unpacklo_epi16(x0, x1); - // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 - x5 = _mm_unpacklo_epi16(x2, x3); - // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 - x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 0*out_p), - _mm_castsi128_pd(x6)); // 00 10 20 30 40 50 60 70 - _mm_storeh_pd((double *)(out + 1*out_p), - _mm_castsi128_pd(x6)); // 01 11 21 31 41 51 61 71 - // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 - x7 = _mm_unpackhi_epi32(x4, x5); - _mm_storel_pd((double *)(out + 2*out_p), - _mm_castsi128_pd(x7)); // 02 12 22 32 42 52 62 72 - _mm_storeh_pd((double *)(out + 3*out_p), - _mm_castsi128_pd(x7)); // 03 13 23 33 43 53 63 73 - - // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 - x4 = _mm_unpackhi_epi16(x0, x1); - // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 - x5 = _mm_unpackhi_epi16(x2, x3); - // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 - x6 = _mm_unpacklo_epi32(x4, x5); - _mm_storel_pd((double *)(out + 4*out_p), - _mm_castsi128_pd(x6)); // 04 14 24 34 44 54 64 74 - _mm_storeh_pd((double *)(out + 5*out_p), - _mm_castsi128_pd(x6)); // 05 15 25 35 45 55 65 75 - // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 - x7 = _mm_unpackhi_epi32(x4, x5); - - _mm_storel_pd((double *)(out + 6*out_p), - _mm_castsi128_pd(x7)); // 06 16 26 36 46 56 66 76 - _mm_storeh_pd((double *)(out + 7*out_p), - _mm_castsi128_pd(x7)); // 07 17 27 37 47 57 67 77 - } while (++idx8x8 < num_8x8_to_transpose); -} - -void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); - unsigned char *src[2]; - unsigned char *dst[2]; - - // Transpose 8x16 - transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - - // Loop filtering - vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, - blimit1, limit1, thresh1); - src[0] = t_dst; - src[1] = t_dst + 8; - dst[0] = s - 4; - dst[1] = s - 4 + p * 8; - - // Transpose back - transpose(src, 16, dst, p, 2); -} - -void vpx_lpf_vertical_8_sse2(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh) { - DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); - unsigned char *src[1]; - unsigned char *dst[1]; - - // Transpose 8x8 - src[0] = s - 4; - dst[0] = t_dst; - - transpose(src, p, dst, 8, 1); - - // Loop filtering - vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); - - src[0] = t_dst; - dst[0] = s - 4; - - // Transpose back - transpose(src, 8, dst, p, 1); -} - -void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); - unsigned char *src[2]; - unsigned char *dst[2]; - - // Transpose 8x16 - transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); - - // Loop filtering - vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, - blimit1, limit1, thresh1); - src[0] = t_dst; - src[1] = t_dst + 8; - - dst[0] = s - 4; - dst[1] = s - 4 + p * 8; - - // Transpose back - transpose(src, 16, dst, p, 2); -} - -void vpx_lpf_vertical_16_sse2(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh) { - DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 16]); - unsigned char *src[2]; - unsigned char *dst[2]; - - src[0] = s - 8; - src[1] = s; - dst[0] = t_dst; - dst[1] = t_dst + 8 * 8; - - // Transpose 16x8 - transpose(src, p, dst, 8, 2); - - // Loop filtering - vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); - - src[0] = t_dst; - src[1] = t_dst + 8 * 8; - dst[0] = s - 8; - dst[1] = s; - - // Transpose back - transpose(src, 8, dst, p, 2); -} - -void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p, - const uint8_t *blimit, const uint8_t *limit, - const uint8_t *thresh) { - DECLARE_ALIGNED(16, unsigned char, t_dst[256]); - - // Transpose 16x16 - transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); - transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); - - // Loop filtering - vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); - - // Transpose back - transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); - transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p); -} diff --git a/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h b/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h deleted file mode 100644 index 536b206876..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/txfm_common_sse2.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_DSP_X86_TXFM_COMMON_SSE2_H_ -#define VPX_DSP_X86_TXFM_COMMON_SSE2_H_ - -#include <emmintrin.h> -#include "vpx/vpx_integer.h" - -#define pair_set_epi16(a, b) \ - _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ - (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) - -#define dual_set_epi16(a, b) \ - _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ - (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) - -#define octa_set_epi16(a, b, c, d, e, f, g, h) \ - _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ - (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) - -#endif // VPX_DSP_X86_TXFM_COMMON_SSE2_H_ diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c deleted file mode 100644 index 422b0fc422..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_asm_stubs.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "./vpx_config.h" -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/convolve.h" - -#if HAVE_SSE2 -filter8_1dfunction vpx_filter_block1d16_v8_sse2; -filter8_1dfunction vpx_filter_block1d16_h8_sse2; -filter8_1dfunction vpx_filter_block1d8_v8_sse2; -filter8_1dfunction vpx_filter_block1d8_h8_sse2; -filter8_1dfunction vpx_filter_block1d4_v8_sse2; -filter8_1dfunction vpx_filter_block1d4_h8_sse2; -filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2; - -filter8_1dfunction vpx_filter_block1d16_v2_sse2; -filter8_1dfunction vpx_filter_block1d16_h2_sse2; -filter8_1dfunction vpx_filter_block1d8_v2_sse2; -filter8_1dfunction vpx_filter_block1d8_h2_sse2; -filter8_1dfunction vpx_filter_block1d4_v2_sse2; -filter8_1dfunction vpx_filter_block1d4_h2_sse2; -filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2; -filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2; - -// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); -FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); -FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); - -// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, sse2); -FUN_CONV_2D(avg_ , sse2); - -#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; - -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; -highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; - -// void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_vert_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src, -// ptrdiff_t src_stride, -// uint8_t *dst, -// ptrdiff_t dst_stride, -// const int16_t *filter_x, -// int x_step_q4, -// const int16_t *filter_y, -// int y_step_q4, -// int w, int h, int bd); -HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); -HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); -HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); -HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, - sse2); - -// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h, int bd); -// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h, int bd); -HIGH_FUN_CONV_2D(, sse2); -HIGH_FUN_CONV_2D(avg_ , sse2); -#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -#endif // HAVE_SSE2 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm deleted file mode 100644 index abc0270655..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm +++ /dev/null @@ -1,228 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro convolve_fn 1-2 -%ifidn %1, avg -%define AUX_XMM_REGS 4 -%else -%define AUX_XMM_REGS 0 -%endif -%ifidn %2, highbd -%define pavg pavgw -cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ - dst, dst_stride, \ - fx, fxs, fy, fys, w, h, bd -%else -%define pavg pavgb -cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ - dst, dst_stride, \ - fx, fxs, fy, fys, w, h -%endif - mov r4d, dword wm -%ifidn %2, highbd - shl r4d, 1 - shl srcq, 1 - shl src_strideq, 1 - shl dstq, 1 - shl dst_strideq, 1 -%else - cmp r4d, 4 - je .w4 -%endif - cmp r4d, 8 - je .w8 - cmp r4d, 16 - je .w16 - cmp r4d, 32 - je .w32 -%ifidn %2, highbd - cmp r4d, 64 - je .w64 - - mov r4d, dword hm -.loop128: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - movu m0, [srcq+64] - movu m1, [srcq+80] - movu m2, [srcq+96] - movu m3, [srcq+112] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq+64] - pavg m1, [dstq+80] - pavg m2, [dstq+96] - pavg m3, [dstq+112] -%endif - mova [dstq+64], m0 - mova [dstq+80], m1 - mova [dstq+96], m2 - mova [dstq+112], m3 - add dstq, dst_strideq - dec r4d - jnz .loop128 - RET -%endif - -.w64 - mov r4d, dword hm -.loop64: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - add dstq, dst_strideq - dec r4d - jnz .loop64 - RET - -.w32: - mov r4d, dword hm -.loop32: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+src_strideq] - movu m3, [srcq+src_strideq+16] - lea srcq, [srcq+src_strideq*2] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq +16] - pavg m2, [dstq+dst_strideq] - pavg m3, [dstq+dst_strideq+16] -%endif - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+dst_strideq ], m2 - mova [dstq+dst_strideq+16], m3 - lea dstq, [dstq+dst_strideq*2] - sub r4d, 2 - jnz .loop32 - RET - -.w16: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop16: - movu m0, [srcq] - movu m1, [srcq+src_strideq] - movu m2, [srcq+src_strideq*2] - movu m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+dst_strideq] - pavg m2, [dstq+dst_strideq*2] - pavg m3, [dstq+r6q] -%endif - mova [dstq ], m0 - mova [dstq+dst_strideq ], m1 - mova [dstq+dst_strideq*2], m2 - mova [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop16 - RET - -.w8: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop8: - movh m0, [srcq] - movh m1, [srcq+src_strideq] - movh m2, [srcq+src_strideq*2] - movh m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - movh m4, [dstq] - movh m5, [dstq+dst_strideq] - movh m6, [dstq+dst_strideq*2] - movh m7, [dstq+r6q] - pavg m0, m4 - pavg m1, m5 - pavg m2, m6 - pavg m3, m7 -%endif - movh [dstq ], m0 - movh [dstq+dst_strideq ], m1 - movh [dstq+dst_strideq*2], m2 - movh [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop8 - RET - -%ifnidn %2, highbd -.w4: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop4: - movd m0, [srcq] - movd m1, [srcq+src_strideq] - movd m2, [srcq+src_strideq*2] - movd m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - movd m4, [dstq] - movd m5, [dstq+dst_strideq] - movd m6, [dstq+dst_strideq*2] - movd m7, [dstq+r6q] - pavg m0, m4 - pavg m1, m5 - pavg m2, m6 - pavg m3, m7 -%endif - movd [dstq ], m0 - movd [dstq+dst_strideq ], m1 - movd [dstq+dst_strideq*2], m2 - movd [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop4 - RET -%endif -%endmacro - -INIT_XMM sse2 -convolve_fn copy -convolve_fn avg -%if CONFIG_VP9_HIGHBITDEPTH -convolve_fn copy, highbd -convolve_fn avg, highbd -%endif diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c deleted file mode 100644 index d8a92354c9..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Due to a header conflict between math.h and intrinsics includes with ceil() -// in certain configurations under vs9 this include needs to precede -// immintrin.h. - -#include <immintrin.h> - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/x86/convolve.h" -#include "vpx_ports/mem.h" - -// filters for 16_h8 and 16_v8 -DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -}; - -DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 -}; - -#if defined(__clang__) -// -- GODOT start - -# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ - (!defined(__MACPORTS__) && defined(__APPLE__) && \ - ((__clang_major__ == 4 && __clang_minor__ <= 2) || \ - (__clang_major__ == 5 && __clang_minor__ == 0))) -// -- GODOT end -- -# define MM256_BROADCASTSI128_SI256(x) \ - _mm_broadcastsi128_si256((__m128i const *)&(x)) -# else // clang > 3.3, and not 5.0 on macosx. -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -# endif // clang <= 3.3 -#elif defined(__GNUC__) -# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6) -# define MM256_BROADCASTSI128_SI256(x) \ - _mm_broadcastsi128_si256((__m128i const *)&(x)) -# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7 -# define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x) -# else // gcc > 4.7 -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -# endif // gcc <= 4.6 -#else // !(gcc || clang) -# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) -#endif // __clang__ - -static void vpx_filter_block1d16_h8_avx2(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i filtersReg; - __m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m256i firstFilters, secondFilters, thirdFilters, forthFilters; - __m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3; - __m256i srcReg32b1, srcReg32b2, filtersReg32; - unsigned int i; - ptrdiff_t src_stride, dst_stride; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - // have the same data in both lanes of a 256 bit register - filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 256 bit register - firstFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 256 bit register - secondFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 256 bit register - thirdFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 256 bit register - forthFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x706u)); - - filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2); - filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2); - filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2); - filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2); - - // multiple the size of the source and destination stride by two - src_stride = src_pixels_per_line << 1; - dst_stride = output_pitch << 1; - for (i = output_height; i > 1; i-=2) { - // load the 2 strides of source - srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr - 3))); - srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm_loadu_si128((const __m128i *) - (src_ptr+src_pixels_per_line-3)), 1); - - // filter the source buffer - srcRegFilt32b1_1= _mm256_shuffle_epi8(srcReg32b1, filt1Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2); - - // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b1, filt2Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b1, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, - _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - // reading 2 strides of the next 16 bytes - // (part of it was being read by earlier read) - srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + 5))); - srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm_loadu_si128((const __m128i *) - (src_ptr+src_pixels_per_line+5)), 1); - - // add and saturate the results together - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, - _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - // filter the source buffer - srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg); - srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters); - - // add and saturate the results together - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2); - - // filter the source buffer - srcRegFilt32b3= _mm256_shuffle_epi8(srcReg32b2, filt2Reg); - srcRegFilt32b2= _mm256_shuffle_epi8(srcReg32b2, filt3Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters); - srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters); - - // add and saturate the results together - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, - _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2)); - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, - _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2)); - - - srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64); - - srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7); - srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, - srcRegFilt32b2_1); - - src_ptr+=src_stride; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, - _mm256_castsi256_si128(srcRegFilt32b1_1)); - - // save the next 16 bits - _mm_store_si128((__m128i*)(output_ptr+output_pitch), - _mm256_extractf128_si256(srcRegFilt32b1_1, 1)); - output_ptr+=dst_stride; - } - - // if the number of strides is odd. - // process only 16 bytes - if (i > 0) { - __m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1; - __m128i srcRegFilt2, srcRegFilt3; - - srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt1Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt4Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, - _mm256_castsi256_si128(firstFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt2Reg)); - srcRegFilt2= _mm_shuffle_epi8(srcReg1, - _mm256_castsi256_si128(filt3Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(secondFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - - // reading the next 16 bytes - // (part of it was being read by earlier read) - srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); - - // add and saturate the results together - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - // filter the source buffer - srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt1Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt4Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, - _mm256_castsi256_si128(firstFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2); - - // filter the source buffer - srcRegFilt3 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt2Reg)); - srcRegFilt2 = _mm_shuffle_epi8(srcReg2, - _mm256_castsi256_si128(filt3Reg)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, - _mm256_castsi256_si128(secondFilters)); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_min_epi16(srcRegFilt3, srcRegFilt2)); - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm_max_epi16(srcRegFilt3, srcRegFilt2)); - - - srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, - _mm256_castsi256_si128(addFilterReg64)); - - srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, - _mm256_castsi256_si128(addFilterReg64)); - - // shift by 7 bit each 16 bit - srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7); - srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1); - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1); - } -} - -static void vpx_filter_block1d16_v8_avx2(const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i filtersReg; - __m256i addFilterReg64; - __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5; - __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10; - __m256i srcReg32b11, srcReg32b12, filtersReg32; - __m256i firstFilters, secondFilters, thirdFilters, forthFilters; - unsigned int i; - ptrdiff_t src_stride, dst_stride; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm256_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the - // same data in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - // have the same data in both lanes of a 256 bit register - filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 256 bit register - firstFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 256 bit register - secondFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 256 bit register - thirdFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 256 bit register - forthFilters = _mm256_shuffle_epi8(filtersReg32, - _mm256_set1_epi16(0x706u)); - - // multiple the size of the source and destination stride by two - src_stride = src_pitch << 1; - dst_stride = out_pitch << 1; - - // load 16 bytes 7 times in stride of src_pitch - srcReg32b1 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr))); - srcReg32b2 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch))); - srcReg32b3 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2))); - srcReg32b4 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3))); - srcReg32b5 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4))); - srcReg32b6 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5))); - srcReg32b7 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6))); - - // have each consecutive loads on the same 256 register - srcReg32b1 = _mm256_inserti128_si256(srcReg32b1, - _mm256_castsi256_si128(srcReg32b2), 1); - srcReg32b2 = _mm256_inserti128_si256(srcReg32b2, - _mm256_castsi256_si128(srcReg32b3), 1); - srcReg32b3 = _mm256_inserti128_si256(srcReg32b3, - _mm256_castsi256_si128(srcReg32b4), 1); - srcReg32b4 = _mm256_inserti128_si256(srcReg32b4, - _mm256_castsi256_si128(srcReg32b5), 1); - srcReg32b5 = _mm256_inserti128_si256(srcReg32b5, - _mm256_castsi256_si128(srcReg32b6), 1); - srcReg32b6 = _mm256_inserti128_si256(srcReg32b6, - _mm256_castsi256_si128(srcReg32b7), 1); - - // merge every two consecutive registers except the last one - srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2); - srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2); - - // save - srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4); - - // save - srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4); - - // save - srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6); - - // save - srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6); - - - for (i = output_height; i > 1; i-=2) { - // load the last 2 loads of 16 bytes and have every two - // consecutive loads in the same 256 bit register - srcReg32b8 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7))); - srcReg32b7 = _mm256_inserti128_si256(srcReg32b7, - _mm256_castsi256_si128(srcReg32b8), 1); - srcReg32b9 = _mm256_castsi128_si256( - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8))); - srcReg32b8 = _mm256_inserti128_si256(srcReg32b8, - _mm256_castsi256_si128(srcReg32b9), 1); - - // merge every two consecutive registers - // save - srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8); - srcReg32b7 = _mm256_unpackhi_epi8(srcReg32b7, srcReg32b8); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters); - - // add and saturate the results together - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters); - srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters); - - // add and saturate the results together - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, - _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, - _mm256_max_epi16(srcReg32b8, srcReg32b12)); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters); - srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters); - - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6); - - // multiply 2 adjacent elements with the filter and add the result - srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters); - srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters); - - // add and saturate the results together - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_min_epi16(srcReg32b8, srcReg32b12)); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, - _mm256_max_epi16(srcReg32b8, srcReg32b12)); - - srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64); - srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7); - srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcReg32b1 = _mm256_packus_epi16(srcReg32b10, srcReg32b1); - - src_ptr+=src_stride; - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, - _mm256_castsi256_si128(srcReg32b1)); - - // save the next 16 bits - _mm_store_si128((__m128i*)(output_ptr+out_pitch), - _mm256_extractf128_si256(srcReg32b1, 1)); - - output_ptr+=dst_stride; - - // save part of the registers for next strides - srcReg32b10 = srcReg32b11; - srcReg32b1 = srcReg32b3; - srcReg32b11 = srcReg32b2; - srcReg32b3 = srcReg32b5; - srcReg32b2 = srcReg32b4; - srcReg32b5 = srcReg32b7; - srcReg32b7 = srcReg32b9; - } - if (i > 0) { - __m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5; - __m128i srcRegFilt6, srcRegFilt7, srcRegFilt8; - // load the last 16 bytes - srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); - - // merge the last 2 results together - srcRegFilt4 = _mm_unpacklo_epi8( - _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); - srcRegFilt7 = _mm_unpackhi_epi8( - _mm256_castsi256_si128(srcReg32b7), srcRegFilt8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10), - _mm256_castsi256_si128(firstFilters)); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, - _mm256_castsi256_si128(forthFilters)); - srcRegFilt3 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b1), - _mm256_castsi256_si128(firstFilters)); - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, - _mm256_castsi256_si128(forthFilters)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, srcRegFilt7); - - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11), - _mm256_castsi256_si128(secondFilters)); - srcRegFilt5 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b3), - _mm256_castsi256_si128(secondFilters)); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2), - _mm256_castsi256_si128(thirdFilters)); - srcRegFilt7 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b5), - _mm256_castsi256_si128(thirdFilters)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt4, srcRegFilt6)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm_min_epi16(srcRegFilt5, srcRegFilt7)); - - // add and saturate the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt4, srcRegFilt6)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm_max_epi16(srcRegFilt5, srcRegFilt7)); - - - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm256_castsi256_si128(addFilterReg64)); - srcRegFilt3 = _mm_adds_epi16(srcRegFilt3, - _mm256_castsi256_si128(addFilterReg64)); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt3); - - // save 16 bytes - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - } -} - -#if HAVE_AVX2 && HAVE_SSSE3 -filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -#if ARCH_X86_64 -filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; -#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3 -#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3 -#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3 -#else // ARCH_X86 -filter8_1dfunction vpx_filter_block1d8_v8_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_ssse3; -#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3 -#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3 -#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3 -#endif // ARCH_X86_64 -filter8_1dfunction vpx_filter_block1d16_v2_ssse3; -filter8_1dfunction vpx_filter_block1d16_h2_ssse3; -filter8_1dfunction vpx_filter_block1d8_v2_ssse3; -filter8_1dfunction vpx_filter_block1d8_h2_ssse3; -filter8_1dfunction vpx_filter_block1d4_v2_ssse3; -filter8_1dfunction vpx_filter_block1d4_h2_ssse3; -#define vpx_filter_block1d4_v8_avx2 vpx_filter_block1d4_v8_ssse3 -#define vpx_filter_block1d16_v2_avx2 vpx_filter_block1d16_v2_ssse3 -#define vpx_filter_block1d16_h2_avx2 vpx_filter_block1d16_h2_ssse3 -#define vpx_filter_block1d8_v2_avx2 vpx_filter_block1d8_v2_ssse3 -#define vpx_filter_block1d8_h2_avx2 vpx_filter_block1d8_h2_ssse3 -#define vpx_filter_block1d4_v2_avx2 vpx_filter_block1d4_v2_ssse3 -#define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3 -// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); - -// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, avx2); -#endif // HAVE_AX2 && HAVE_SSSE3 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c deleted file mode 100644 index 6fd52087c7..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ /dev/null @@ -1,915 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Due to a header conflict between math.h and intrinsics includes with ceil() -// in certain configurations under vs9 this include needs to precede -// tmmintrin.h. - -#include <tmmintrin.h> - -#include "./vpx_dsp_rtcd.h" -#include "vpx_dsp/vpx_filter.h" -#include "vpx_dsp/x86/convolve.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_ports/emmintrin_compat.h" - -// filters only for the 4_h8 convolution -DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -// filters for 8_h8 and 16_h8 -DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = { - 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = { - 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 -}; - -DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = { - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 -}; - -// These are reused by the avx2 intrinsics. -filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; - -void vpx_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i firstFilters, secondFilters, shuffle1, shuffle2; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, srcReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 =_mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter into the first lane - firstFilters = _mm_shufflelo_epi16(filtersReg, 0); - // duplicate only the third 16 bit in the filter into the first lane - secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); - // duplicate only the seconds 16 bits in the filter into the second lane - // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3 - firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); - // duplicate only the forth 16 bits in the filter into the second lane - // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7 - secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); - - // loading the local filters - shuffle1 =_mm_load_si128((__m128i const *)filt1_4_h8); - shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1); - srcRegFilt2= _mm_shuffle_epi8(srcReg, shuffle2); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // extract the higher half of the lane - srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8); - srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8); - - minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2); - - // add and saturate all the results together - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - src_ptr+=src_pixels_per_line; - - // save only 4 bytes - *((int*)&output_ptr[0])= _mm_cvtsi128_si32(srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vpx_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pixels_per_line, - uint8_t *output_ptr, - ptrdiff_t output_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg; - __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4; - __m128i addFilterReg64, filtersReg, minReg; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits (first and second byte) - // across 128 bit register - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits (third and forth byte) - // across 128 bit register - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits (fifth and sixth byte) - // across 128 bit register - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits (seventh and eighth byte) - // across 128 bit register - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - filt1Reg = _mm_load_si128((__m128i const *)filt1_global); - filt2Reg = _mm_load_si128((__m128i const *)filt2_global); - filt3Reg = _mm_load_si128((__m128i const *)filt3_global); - filt4Reg = _mm_load_si128((__m128i const *)filt4_global); - - for (i = 0; i < output_height; i++) { - srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); - - // filter the source buffer - srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg); - srcRegFilt2= _mm_shuffle_epi8(srcReg, filt2Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); - - // filter the source buffer - srcRegFilt3= _mm_shuffle_epi8(srcReg, filt3Reg); - srcRegFilt4= _mm_shuffle_epi8(srcReg, filt4Reg); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters); - srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters); - - // add and saturate all the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4); - - srcRegFilt2= _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bits - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pixels_per_line; - - // save only 8 bytes - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=output_pitch; - } -} - -void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i addFilterReg64, filtersReg, minReg; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5; - __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7; - __m128i srcReg8; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - // load the first 7 rows of 8 bytes - srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr); - srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); - srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); - srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); - srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); - srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); - srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); - - for (i = 0; i < output_height; i++) { - // load the last 8 bytes - srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); - - // merge the result together - srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2); - srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4); - - // merge the result together - srcRegFilt2 = _mm_unpacklo_epi8(srcReg5, srcReg6); - srcRegFilt5 = _mm_unpacklo_epi8(srcReg7, srcReg8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters); - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters); - - // add and saturate the results together - minReg = _mm_min_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5); - srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits - srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); - - src_ptr+=src_pitch; - - // shift down a row - srcReg1 = srcReg2; - srcReg2 = srcReg3; - srcReg3 = srcReg4; - srcReg4 = srcReg5; - srcReg5 = srcReg6; - srcReg6 = srcReg7; - srcReg7 = srcReg8; - - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)&output_ptr[0], srcRegFilt1); - - output_ptr+=out_pitch; - } -} - -filter8_1dfunction vpx_filter_block1d16_v8_ssse3; -filter8_1dfunction vpx_filter_block1d16_h8_ssse3; -filter8_1dfunction vpx_filter_block1d8_v8_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_ssse3; -filter8_1dfunction vpx_filter_block1d4_v8_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_ssse3; -filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3; - -filter8_1dfunction vpx_filter_block1d16_v2_ssse3; -filter8_1dfunction vpx_filter_block1d16_h2_ssse3; -filter8_1dfunction vpx_filter_block1d8_v2_ssse3; -filter8_1dfunction vpx_filter_block1d8_h2_ssse3; -filter8_1dfunction vpx_filter_block1d4_v2_ssse3; -filter8_1dfunction vpx_filter_block1d4_h2_ssse3; -filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3; -filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; - -// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); -FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); -FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); -FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, - ssse3); - -#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \ - out0, out1, out2, out3, out4, out5, out6, out7) { \ - const __m128i tr0_0 = _mm_unpacklo_epi8(in0, in1); \ - const __m128i tr0_1 = _mm_unpacklo_epi8(in2, in3); \ - const __m128i tr0_2 = _mm_unpacklo_epi8(in4, in5); \ - const __m128i tr0_3 = _mm_unpacklo_epi8(in6, in7); \ - \ - const __m128i tr1_0 = _mm_unpacklo_epi16(tr0_0, tr0_1); \ - const __m128i tr1_1 = _mm_unpackhi_epi16(tr0_0, tr0_1); \ - const __m128i tr1_2 = _mm_unpacklo_epi16(tr0_2, tr0_3); \ - const __m128i tr1_3 = _mm_unpackhi_epi16(tr0_2, tr0_3); \ - \ - const __m128i tr2_0 = _mm_unpacklo_epi32(tr1_0, tr1_2); \ - const __m128i tr2_1 = _mm_unpackhi_epi32(tr1_0, tr1_2); \ - const __m128i tr2_2 = _mm_unpacklo_epi32(tr1_1, tr1_3); \ - const __m128i tr2_3 = _mm_unpackhi_epi32(tr1_1, tr1_3); \ - \ - out0 = _mm_unpacklo_epi64(tr2_0, tr2_0); \ - out1 = _mm_unpackhi_epi64(tr2_0, tr2_0); \ - out2 = _mm_unpacklo_epi64(tr2_1, tr2_1); \ - out3 = _mm_unpackhi_epi64(tr2_1, tr2_1); \ - out4 = _mm_unpacklo_epi64(tr2_2, tr2_2); \ - out5 = _mm_unpackhi_epi64(tr2_2, tr2_2); \ - out6 = _mm_unpacklo_epi64(tr2_3, tr2_3); \ - out7 = _mm_unpackhi_epi64(tr2_3, tr2_3); \ -} - -static void filter_horiz_w8_ssse3(const uint8_t *src_x, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *x_filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)x_filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_loadl_epi64((const __m128i *)src_x); - const __m128i B = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch)); - const __m128i C = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 2)); - const __m128i D = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 3)); - const __m128i E = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 4)); - const __m128i F = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 5)); - const __m128i G = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 6)); - const __m128i H = _mm_loadl_epi64((const __m128i *)(src_x + src_pitch * 7)); - // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17 - const __m128i tr0_0 = _mm_unpacklo_epi16(A, B); - // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 - const __m128i tr0_1 = _mm_unpacklo_epi16(C, D); - // 40 41 50 51 42 43 52 53 44 45 54 55 46 47 56 57 - const __m128i tr0_2 = _mm_unpacklo_epi16(E, F); - // 60 61 70 71 62 63 72 73 64 65 74 75 66 67 76 77 - const __m128i tr0_3 = _mm_unpacklo_epi16(G, H); - // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 - const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 - const __m128i tr1_1 = _mm_unpackhi_epi32(tr0_0, tr0_1); - // 40 41 50 51 60 61 70 71 42 43 52 53 62 63 72 73 - const __m128i tr1_2 = _mm_unpacklo_epi32(tr0_2, tr0_3); - // 44 45 54 55 64 65 74 75 46 47 56 57 66 67 76 77 - const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); - // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 - const __m128i s1s0 = _mm_unpacklo_epi64(tr1_0, tr1_2); - const __m128i s3s2 = _mm_unpackhi_epi64(tr1_0, tr1_2); - const __m128i s5s4 = _mm_unpacklo_epi64(tr1_1, tr1_3); - const __m128i s7s6 = _mm_unpackhi_epi64(tr1_1, tr1_3); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)dst, temp); -} - -static void transpose8x8_to_dst(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride) { - __m128i A, B, C, D, E, F, G, H; - - A = _mm_loadl_epi64((const __m128i *)src); - B = _mm_loadl_epi64((const __m128i *)(src + src_stride)); - C = _mm_loadl_epi64((const __m128i *)(src + src_stride * 2)); - D = _mm_loadl_epi64((const __m128i *)(src + src_stride * 3)); - E = _mm_loadl_epi64((const __m128i *)(src + src_stride * 4)); - F = _mm_loadl_epi64((const __m128i *)(src + src_stride * 5)); - G = _mm_loadl_epi64((const __m128i *)(src + src_stride * 6)); - H = _mm_loadl_epi64((const __m128i *)(src + src_stride * 7)); - - TRANSPOSE_8X8(A, B, C, D, E, F, G, H, - A, B, C, D, E, F, G, H); - - _mm_storel_epi64((__m128i*)dst, A); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 1), B); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 2), C); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 3), D); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 4), E); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 5), F); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 6), G); - _mm_storel_epi64((__m128i*)(dst + dst_stride * 7), H); -} - -static void scaledconvolve_horiz_w8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]); - int x, y, z; - src -= SUBPEL_TAPS / 2 - 1; - - // This function processes 8x8 areas. The intermediate height is not always - // a multiple of 8, so force it to be a multiple of 8 here. - y = h + (8 - (h & 0x7)); - - do { - int x_q4 = x0_q4; - for (x = 0; x < w; x += 8) { - // process 8 src_x steps - for (z = 0; z < 8; ++z) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - if (x_q4 & SUBPEL_MASK) { - filter_horiz_w8_ssse3(src_x, src_stride, temp + (z * 8), x_filter); - } else { - int i; - for (i = 0; i < 8; ++i) { - temp[z * 8 + i] = src_x[i * src_stride + 3]; - } - } - x_q4 += x_step_q4; - } - - // transpose the 8x8 filters values back to dst - transpose8x8_to_dst(temp, 8, dst + x, dst_stride); - } - - src += src_stride * 8; - dst += dst_stride * 8; - } while (y -= 8); -} - -static void filter_horiz_w4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_loadl_epi64((const __m128i *)src_ptr); - const __m128i B = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); - const __m128i C = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); - const __m128i D = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); - // TRANSPOSE... - // 00 01 02 03 04 05 06 07 - // 10 11 12 13 14 15 16 17 - // 20 21 22 23 24 25 26 27 - // 30 31 32 33 34 35 36 37 - // - // TO - // - // 00 10 20 30 - // 01 11 21 31 - // 02 12 22 32 - // 03 13 23 33 - // 04 14 24 34 - // 05 15 25 35 - // 06 16 26 36 - // 07 17 27 37 - // - // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17 - const __m128i tr0_0 = _mm_unpacklo_epi16(A, B); - // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37 - const __m128i tr0_1 = _mm_unpacklo_epi16(C, D); - // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33 - const __m128i s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37 - const __m128i s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1); - // 02 03 12 13 22 23 32 33 - const __m128i s3s2 = _mm_srli_si128(s1s0, 8); - // 06 07 16 17 26 27 36 37 - const __m128i s7s6 = _mm_srli_si128(s5s4, 8); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 4 bytes - *(int *)dst = _mm_cvtsi128_si32(temp); -} - -static void transpose4x4_to_dst(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride) { - __m128i A = _mm_cvtsi32_si128(*(const int *)src); - __m128i B = _mm_cvtsi32_si128(*(const int *)(src + src_stride)); - __m128i C = _mm_cvtsi32_si128(*(const int *)(src + src_stride * 2)); - __m128i D = _mm_cvtsi32_si128(*(const int *)(src + src_stride * 3)); - // 00 10 01 11 02 12 03 13 - const __m128i tr0_0 = _mm_unpacklo_epi8(A, B); - // 20 30 21 31 22 32 23 33 - const __m128i tr0_1 = _mm_unpacklo_epi8(C, D); - // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 - A = _mm_unpacklo_epi16(tr0_0, tr0_1); - B = _mm_srli_si128(A, 4); - C = _mm_srli_si128(A, 8); - D = _mm_srli_si128(A, 12); - - *(int *)(dst) = _mm_cvtsi128_si32(A); - *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(B); - *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(C); - *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(D); -} - -static void scaledconvolve_horiz_w4(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *x_filters, - int x0_q4, int x_step_q4, int w, int h) { - DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]); - int x, y, z; - src -= SUBPEL_TAPS / 2 - 1; - - for (y = 0; y < h; y += 4) { - int x_q4 = x0_q4; - for (x = 0; x < w; x += 4) { - // process 4 src_x steps - for (z = 0; z < 4; ++z) { - const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; - const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; - if (x_q4 & SUBPEL_MASK) { - filter_horiz_w4_ssse3(src_x, src_stride, temp + (z * 4), x_filter); - } else { - int i; - for (i = 0; i < 4; ++i) { - temp[z * 4 + i] = src_x[i * src_stride + 3]; - } - } - x_q4 += x_step_q4; - } - - // transpose the 4x4 filters values back to dst - transpose4x4_to_dst(temp, 4, dst + x, dst_stride); - } - - src += src_stride * 4; - dst += dst_stride * 4; - } -} - -static void filter_vert_w4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_cvtsi32_si128(*(const int *)src_ptr); - const __m128i B = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch)); - const __m128i C = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 2)); - const __m128i D = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 3)); - const __m128i E = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 4)); - const __m128i F = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 5)); - const __m128i G = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 6)); - const __m128i H = _mm_cvtsi32_si128(*(const int *)(src_ptr + src_pitch * 7)); - const __m128i s1s0 = _mm_unpacklo_epi8(A, B); - const __m128i s3s2 = _mm_unpacklo_epi8(C, D); - const __m128i s5s4 = _mm_unpacklo_epi8(E, F); - const __m128i s7s6 = _mm_unpacklo_epi8(G, H); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 4 bytes - *(int *)dst = _mm_cvtsi128_si32(temp); -} - -static void scaledconvolve_vert_w4(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int y; - int y_q4 = y0_q4; - - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - - if (y_q4 & SUBPEL_MASK) { - filter_vert_w4_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); - } else { - memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); - } - - y_q4 += y_step_q4; - } -} - -static void filter_vert_w8_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - const __m128i A = _mm_loadl_epi64((const __m128i *)src_ptr); - const __m128i B = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch)); - const __m128i C = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2)); - const __m128i D = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3)); - const __m128i E = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4)); - const __m128i F = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5)); - const __m128i G = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)); - const __m128i H = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)); - const __m128i s1s0 = _mm_unpacklo_epi8(A, B); - const __m128i s3s2 = _mm_unpacklo_epi8(C, D); - const __m128i s5s4 = _mm_unpacklo_epi8(E, F); - const __m128i s7s6 = _mm_unpacklo_epi8(G, H); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0 = _mm_maddubs_epi16(s1s0, f1f0); - const __m128i x1 = _mm_maddubs_epi16(s3s2, f3f2); - const __m128i x2 = _mm_maddubs_epi16(s5s4, f5f4); - const __m128i x3 = _mm_maddubs_epi16(s7s6, f7f6); - // add and saturate the results together - const __m128i min_x2x1 = _mm_min_epi16(x2, x1); - const __m128i max_x2x1 = _mm_max_epi16(x2, x1); - __m128i temp = _mm_adds_epi16(x0, x3); - temp = _mm_adds_epi16(temp, min_x2x1); - temp = _mm_adds_epi16(temp, max_x2x1); - // round and shift by 7 bit each 16 bit - temp = _mm_mulhrs_epi16(temp, k_256); - // shrink to 8 bit each 16 bits - temp = _mm_packus_epi16(temp, temp); - // save only 8 bytes convolve result - _mm_storel_epi64((__m128i*)dst, temp); -} - -static void scaledconvolve_vert_w8(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int y; - int y_q4 = y0_q4; - - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - if (y_q4 & SUBPEL_MASK) { - filter_vert_w8_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); - } else { - memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); - } - y_q4 += y_step_q4; - } -} - -static void filter_vert_w16_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, - uint8_t *dst, const int16_t *filter, int w) { - const __m128i k_256 = _mm_set1_epi16(1 << 8); - const __m128i f_values = _mm_load_si128((const __m128i *)filter); - // pack and duplicate the filter values - const __m128i f1f0 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); - const __m128i f3f2 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); - const __m128i f5f4 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); - const __m128i f7f6 = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); - int i; - - for (i = 0; i < w; i += 16) { - const __m128i A = _mm_loadu_si128((const __m128i *)src_ptr); - const __m128i B = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)); - const __m128i C = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)); - const __m128i D = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)); - const __m128i E = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)); - const __m128i F = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)); - const __m128i G = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)); - const __m128i H = - _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); - // merge the result together - const __m128i s1s0_lo = _mm_unpacklo_epi8(A, B); - const __m128i s7s6_lo = _mm_unpacklo_epi8(G, H); - const __m128i s1s0_hi = _mm_unpackhi_epi8(A, B); - const __m128i s7s6_hi = _mm_unpackhi_epi8(G, H); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x0_lo = _mm_maddubs_epi16(s1s0_lo, f1f0); - const __m128i x3_lo = _mm_maddubs_epi16(s7s6_lo, f7f6); - const __m128i x0_hi = _mm_maddubs_epi16(s1s0_hi, f1f0); - const __m128i x3_hi = _mm_maddubs_epi16(s7s6_hi, f7f6); - // add and saturate the results together - const __m128i x3x0_lo = _mm_adds_epi16(x0_lo, x3_lo); - const __m128i x3x0_hi = _mm_adds_epi16(x0_hi, x3_hi); - // merge the result together - const __m128i s3s2_lo = _mm_unpacklo_epi8(C, D); - const __m128i s3s2_hi = _mm_unpackhi_epi8(C, D); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x1_lo = _mm_maddubs_epi16(s3s2_lo, f3f2); - const __m128i x1_hi = _mm_maddubs_epi16(s3s2_hi, f3f2); - // merge the result together - const __m128i s5s4_lo = _mm_unpacklo_epi8(E, F); - const __m128i s5s4_hi = _mm_unpackhi_epi8(E, F); - // multiply 2 adjacent elements with the filter and add the result - const __m128i x2_lo = _mm_maddubs_epi16(s5s4_lo, f5f4); - const __m128i x2_hi = _mm_maddubs_epi16(s5s4_hi, f5f4); - // add and saturate the results together - __m128i temp_lo = _mm_adds_epi16(x3x0_lo, _mm_min_epi16(x1_lo, x2_lo)); - __m128i temp_hi = _mm_adds_epi16(x3x0_hi, _mm_min_epi16(x1_hi, x2_hi)); - - // add and saturate the results together - temp_lo = _mm_adds_epi16(temp_lo, _mm_max_epi16(x1_lo, x2_lo)); - temp_hi = _mm_adds_epi16(temp_hi, _mm_max_epi16(x1_hi, x2_hi)); - // round and shift by 7 bit each 16 bit - temp_lo = _mm_mulhrs_epi16(temp_lo, k_256); - temp_hi = _mm_mulhrs_epi16(temp_hi, k_256); - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - temp_hi = _mm_packus_epi16(temp_lo, temp_hi); - src_ptr += 16; - // save 16 bytes convolve result - _mm_store_si128((__m128i*)&dst[i], temp_hi); - } -} - -static void scaledconvolve_vert_w16(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *y_filters, - int y0_q4, int y_step_q4, int w, int h) { - int y; - int y_q4 = y0_q4; - - src -= src_stride * (SUBPEL_TAPS / 2 - 1); - for (y = 0; y < h; ++y) { - const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; - const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; - if (y_q4 & SUBPEL_MASK) { - filter_vert_w16_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter, - w); - } else { - memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); - } - y_q4 += y_step_q4; - } -} - -static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const InterpKernel *const x_filters, - int x0_q4, int x_step_q4, - const InterpKernel *const y_filters, - int y0_q4, int y_step_q4, - int w, int h) { - // Note: Fixed size intermediate buffer, temp, places limits on parameters. - // 2d filtering proceeds in 2 steps: - // (1) Interpolate horizontally into an intermediate buffer, temp. - // (2) Interpolate temp vertically to derive the sub-pixel result. - // Deriving the maximum number of rows in the temp buffer (135): - // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). - // --Largest block size is 64x64 pixels. - // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the - // original frame (in 1/16th pixel units). - // --Must round-up because block may be located at sub-pixel position. - // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. - // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. - // --Require an additional 8 rows for the horiz_w8 transpose tail. - DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]); - const int intermediate_height = - (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; - - assert(w <= 64); - assert(h <= 64); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); - - if (w >= 8) { - scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, temp, 64, x_filters, x0_q4, x_step_q4, - w, intermediate_height); - } else { - scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), - src_stride, temp, 64, x_filters, x0_q4, x_step_q4, - w, intermediate_height); - } - - if (w >= 16) { - scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, - dst_stride, y_filters, y0_q4, y_step_q4, w, h); - } else if (w == 8) { - scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, - dst_stride, y_filters, y0_q4, y_step_q4, w, h); - } else { - scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, - dst_stride, y_filters, y0_q4, y_step_q4, w, h); - } -} - -static const InterpKernel *get_filter_base(const int16_t *filter) { - // NOTE: This assumes that the filter table is 256-byte aligned. - // TODO(agrange) Modify to make independent of table alignment. - return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF)); -} - -static int get_filter_offset(const int16_t *f, const InterpKernel *base) { - return (int)((const InterpKernel *)(intptr_t)f - base); -} - -void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h) { - const InterpKernel *const filters_x = get_filter_base(filter_x); - const int x0_q4 = get_filter_offset(filter_x, filters_x); - - const InterpKernel *const filters_y = get_filter_base(filter_y); - const int y0_q4 = get_filter_offset(filter_y, filters_y); - - scaledconvolve2d(src, src_stride, dst, dst_stride, - filters_x, x0_q4, x_step_q4, - filters_y, y0_q4, y_step_q4, w, h); -} - -// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, -// uint8_t *dst, ptrdiff_t dst_stride, -// const int16_t *filter_x, int x_step_q4, -// const int16_t *filter_y, int y_step_q4, -// int w, int h); -FUN_CONV_2D(, ssse3); -FUN_CONV_2D(avg_ , ssse3); diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm deleted file mode 100644 index 08f3d6a6cf..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm +++ /dev/null @@ -1,987 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;Note: tap3 and tap4 have to be applied and added after other taps to avoid -;overflow. - -%macro GET_FILTERS_4 0 - mov rdx, arg(5) ;filter ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - pshuflw xmm0, xmm7, 0b ;k0 - pshuflw xmm1, xmm7, 01010101b ;k1 - pshuflw xmm2, xmm7, 10101010b ;k2 - pshuflw xmm3, xmm7, 11111111b ;k3 - psrldq xmm7, 8 - pshuflw xmm4, xmm7, 0b ;k4 - pshuflw xmm5, xmm7, 01010101b ;k5 - pshuflw xmm6, xmm7, 10101010b ;k6 - pshuflw xmm7, xmm7, 11111111b ;k7 - - punpcklqdq xmm0, xmm1 - punpcklqdq xmm2, xmm3 - punpcklqdq xmm5, xmm4 - punpcklqdq xmm6, xmm7 - - movdqa k0k1, xmm0 - movdqa k2k3, xmm2 - movdqa k5k4, xmm5 - movdqa k6k7, xmm6 - - movq xmm6, rcx - pshufd xmm6, xmm6, 0 - movdqa krd, xmm6 - - pxor xmm7, xmm7 - movdqa zero, xmm7 -%endm - -%macro APPLY_FILTER_4 1 - punpckldq xmm0, xmm1 ;two row in one register - punpckldq xmm6, xmm7 - punpckldq xmm2, xmm3 - punpckldq xmm5, xmm4 - - punpcklbw xmm0, zero ;unpack to word - punpcklbw xmm6, zero - punpcklbw xmm2, zero - punpcklbw xmm5, zero - - pmullw xmm0, k0k1 ;multiply the filter factors - pmullw xmm6, k6k7 - pmullw xmm2, k2k3 - pmullw xmm5, k5k4 - - paddsw xmm0, xmm6 ;sum - movdqa xmm1, xmm0 - psrldq xmm1, 8 - paddsw xmm0, xmm1 - paddsw xmm0, xmm2 - psrldq xmm2, 8 - paddsw xmm0, xmm5 - psrldq xmm5, 8 - paddsw xmm0, xmm2 - paddsw xmm0, xmm5 - - paddsw xmm0, krd ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movd [rdi], xmm0 -%endm - -%macro GET_FILTERS 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - pshuflw xmm0, xmm7, 0b ;k0 - pshuflw xmm1, xmm7, 01010101b ;k1 - pshuflw xmm2, xmm7, 10101010b ;k2 - pshuflw xmm3, xmm7, 11111111b ;k3 - pshufhw xmm4, xmm7, 0b ;k4 - pshufhw xmm5, xmm7, 01010101b ;k5 - pshufhw xmm6, xmm7, 10101010b ;k6 - pshufhw xmm7, xmm7, 11111111b ;k7 - - punpcklwd xmm0, xmm0 - punpcklwd xmm1, xmm1 - punpcklwd xmm2, xmm2 - punpcklwd xmm3, xmm3 - punpckhwd xmm4, xmm4 - punpckhwd xmm5, xmm5 - punpckhwd xmm6, xmm6 - punpckhwd xmm7, xmm7 - - movdqa k0, xmm0 ;store filter factors on stack - movdqa k1, xmm1 - movdqa k2, xmm2 - movdqa k3, xmm3 - movdqa k4, xmm4 - movdqa k5, xmm5 - movdqa k6, xmm6 - movdqa k7, xmm7 - - movq xmm6, rcx - pshufd xmm6, xmm6, 0 - movdqa krd, xmm6 ;rounding - - pxor xmm7, xmm7 - movdqa zero, xmm7 -%endm - -%macro LOAD_VERT_8 1 - movq xmm0, [rsi + %1] ;0 - movq xmm1, [rsi + rax + %1] ;1 - movq xmm6, [rsi + rdx * 2 + %1] ;6 - lea rsi, [rsi + rax] - movq xmm7, [rsi + rdx * 2 + %1] ;7 - movq xmm2, [rsi + rax + %1] ;2 - movq xmm3, [rsi + rax * 2 + %1] ;3 - movq xmm4, [rsi + rdx + %1] ;4 - movq xmm5, [rsi + rax * 4 + %1] ;5 -%endm - -%macro APPLY_FILTER_8 2 - punpcklbw xmm0, zero - punpcklbw xmm1, zero - punpcklbw xmm6, zero - punpcklbw xmm7, zero - punpcklbw xmm2, zero - punpcklbw xmm5, zero - punpcklbw xmm3, zero - punpcklbw xmm4, zero - - pmullw xmm0, k0 - pmullw xmm1, k1 - pmullw xmm6, k6 - pmullw xmm7, k7 - pmullw xmm2, k2 - pmullw xmm5, k5 - pmullw xmm3, k3 - pmullw xmm4, k4 - - paddsw xmm0, xmm1 - paddsw xmm0, xmm6 - paddsw xmm0, xmm7 - paddsw xmm0, xmm2 - paddsw xmm0, xmm5 - paddsw xmm0, xmm3 - paddsw xmm0, xmm4 - - paddsw xmm0, krd ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte -%if %1 - movq xmm1, [rdi + %2] - pavgb xmm0, xmm1 -%endif - movq [rdi + %2], xmm0 -%endm - -;void vpx_filter_block1d4_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d4_v8_sse2) PRIVATE -sym(vpx_filter_block1d4_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movd xmm0, [rsi] ;load src: row 0 - movd xmm1, [rsi + rax] ;1 - movd xmm6, [rsi + rdx * 2] ;6 - lea rsi, [rsi + rax] - movd xmm7, [rsi + rdx * 2] ;7 - movd xmm2, [rsi + rax] ;2 - movd xmm3, [rsi + rax * 2] ;3 - movd xmm4, [rsi + rdx] ;4 - movd xmm5, [rsi + rax * 4] ;5 - - APPLY_FILTER_4 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d8_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d8_v8_sse2) PRIVATE -sym(vpx_filter_block1d8_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 0, 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d16_v8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pitch, -; unsigned char *output_ptr, -; unsigned int out_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d16_v8_sse2) PRIVATE -sym(vpx_filter_block1d16_v8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 0, 0 - sub rsi, rax - - LOAD_VERT_8 8 - APPLY_FILTER_8 0, 8 - add rdi, rbx - - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movd xmm0, [rsi] ;load src: row 0 - movd xmm1, [rsi + rax] ;1 - movd xmm6, [rsi + rdx * 2] ;6 - lea rsi, [rsi + rax] - movd xmm7, [rsi + rdx * 2] ;7 - movd xmm2, [rsi + rax] ;2 - movd xmm3, [rsi + rax * 2] ;3 - movd xmm4, [rsi + rdx] ;4 - movd xmm5, [rsi + rax * 4] ;5 - - APPLY_FILTER_4 1 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 1, 0 - - lea rdi, [rdi + rbx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_v8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - push rbx - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rbx, DWORD PTR arg(3) ;out_pitch - lea rdx, [rax + rax * 2] - movsxd rcx, DWORD PTR arg(4) ;output_height -.loop: - LOAD_VERT_8 0 - APPLY_FILTER_8 1, 0 - sub rsi, rax - - LOAD_VERT_8 8 - APPLY_FILTER_8 1, 8 - add rdi, rbx - - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - pop rbx - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d4_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d4_h8_sse2) PRIVATE -sym(vpx_filter_block1d4_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - movdqa xmm5, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm3, 3 - psrldq xmm5, 5 - psrldq xmm4, 4 - - APPLY_FILTER_4 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d8_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d8_h8_sse2) PRIVATE -sym(vpx_filter_block1d8_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 0, 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_filter_block1d16_h8_sse2 -;( -; unsigned char *src_ptr, -; unsigned int src_pixels_per_line, -; unsigned char *output_ptr, -; unsigned int output_pitch, -; unsigned int output_height, -; short *filter -;) -global sym(vpx_filter_block1d16_h8_sse2) PRIVATE -sym(vpx_filter_block1d16_h8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 0, 0 - - movdqu xmm0, [rsi + 5] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 0, 8 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 6 - %define k0k1 [rsp + 16 * 0] - %define k2k3 [rsp + 16 * 1] - %define k5k4 [rsp + 16 * 2] - %define k6k7 [rsp + 16 * 3] - %define krd [rsp + 16 * 4] - %define zero [rsp + 16 * 5] - - GET_FILTERS_4 - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm3, xmm0 - movdqa xmm5, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm3, 3 - psrldq xmm5, 5 - psrldq xmm4, 4 - - APPLY_FILTER_4 1 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 6 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 1, 0 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_h8_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - ALIGN_STACK 16, rax - sub rsp, 16 * 10 - %define k0 [rsp + 16 * 0] - %define k1 [rsp + 16 * 1] - %define k2 [rsp + 16 * 2] - %define k3 [rsp + 16 * 3] - %define k4 [rsp + 16 * 4] - %define k5 [rsp + 16 * 5] - %define k6 [rsp + 16 * 6] - %define k7 [rsp + 16 * 7] - %define krd [rsp + 16 * 8] - %define zero [rsp + 16 * 9] - - GET_FILTERS - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height - -.loop: - movdqu xmm0, [rsi - 3] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 1, 0 - - movdqu xmm0, [rsi + 5] ;load src - - movdqa xmm1, xmm0 - movdqa xmm6, xmm0 - movdqa xmm7, xmm0 - movdqa xmm2, xmm0 - movdqa xmm5, xmm0 - movdqa xmm3, xmm0 - movdqa xmm4, xmm0 - - psrldq xmm1, 1 - psrldq xmm6, 6 - psrldq xmm7, 7 - psrldq xmm2, 2 - psrldq xmm5, 5 - psrldq xmm3, 3 - psrldq xmm4, 4 - - APPLY_FILTER_8 1, 8 - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx - jnz .loop - - add rsp, 16 * 10 - pop rsp - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm deleted file mode 100644 index d2cb8ea292..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ /dev/null @@ -1,629 +0,0 @@ -; -; Copyright (c) 2015 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pw_64: times 8 dw 64 - -; %define USE_PMULHRSW -; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss -; when using this instruction. -; -; The add order below (based on ffvp9) must be followed to prevent outranges. -; x = k0k1 + k4k5 -; y = k2k3 + k6k7 -; z = signed SAT(x + y) - -SECTION .text -%if ARCH_X86_64 - %define LOCAL_VARS_SIZE 16*4 -%else - %define LOCAL_VARS_SIZE 16*6 -%endif - -%macro SETUP_LOCAL_VARS 0 - ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + - ; pmaddubsw has a higher latency on some platforms, this might be eased by - ; interleaving the instructions. - %define k0k1 [rsp + 16*0] - %define k2k3 [rsp + 16*1] - %define k4k5 [rsp + 16*2] - %define k6k7 [rsp + 16*3] - packsswb m4, m4 - ; TODO(slavarnway): multiple pshufb instructions had a higher latency on - ; some platforms. - pshuflw m0, m4, 0b ;k0_k1 - pshuflw m1, m4, 01010101b ;k2_k3 - pshuflw m2, m4, 10101010b ;k4_k5 - pshuflw m3, m4, 11111111b ;k6_k7 - punpcklqdq m0, m0 - punpcklqdq m1, m1 - punpcklqdq m2, m2 - punpcklqdq m3, m3 - mova k0k1, m0 - mova k2k3, m1 - mova k4k5, m2 - mova k6k7, m3 -%if ARCH_X86_64 - %define krd m12 - %define tmp m13 - mova krd, [GLOBAL(pw_64)] -%else - %define tmp [rsp + 16*4] - %define krd [rsp + 16*5] -%if CONFIG_PIC=0 - mova m6, [GLOBAL(pw_64)] -%else - ; build constants without accessing global memory - pcmpeqb m6, m6 ;all ones - psrlw m6, 15 - psllw m6, 6 ;aka pw_64 -%endif - mova krd, m6 -%endif -%endm - -%macro HORIZx4_ROW 2 - mova %2, %1 - punpcklbw %1, %1 - punpckhbw %2, %2 - - mova m3, %2 - palignr %2, %1, 1 - palignr m3, %1, 5 - - pmaddubsw %2, k0k1k4k5 - pmaddubsw m3, k2k3k6k7 - mova m4, %2 ;k0k1 - mova m5, m3 ;k2k3 - psrldq %2, 8 ;k4k5 - psrldq m3, 8 ;k6k7 - paddsw %2, m4 - paddsw m5, m3 - paddsw %2, m5 - paddsw %2, krd - psraw %2, 7 - packuswb %2, %2 -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_HFILTER4 1 -cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - packsswb m4, m4 -%if ARCH_X86_64 - %define k0k1k4k5 m8 - %define k2k3k6k7 m9 - %define krd m10 - %define orig_height r7d - mova krd, [GLOBAL(pw_64)] - pshuflw k0k1k4k5, m4, 0b ;k0_k1 - pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 - pshuflw k2k3k6k7, m4, 01010101b ;k2_k3 - pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7 -%else - %define k0k1k4k5 [rsp + 16*0] - %define k2k3k6k7 [rsp + 16*1] - %define krd [rsp + 16*2] - %define orig_height [rsp + 16*3] - pshuflw m6, m4, 0b ;k0_k1 - pshufhw m6, m6, 10101010b ;k0_k1_k4_k5 - pshuflw m7, m4, 01010101b ;k2_k3 - pshufhw m7, m7, 11111111b ;k2_k3_k6_k7 -%if CONFIG_PIC=0 - mova m1, [GLOBAL(pw_64)] -%else - ; build constants without accessing global memory - pcmpeqb m1, m1 ;all ones - psrlw m1, 15 - psllw m1, 6 ;aka pw_64 -%endif - mova k0k1k4k5, m6 - mova k2k3k6k7, m7 - mova krd, m1 -%endif - mov orig_height, heightd - shr heightd, 1 -.loop: - ;Do two rows at once - movh m0, [srcq - 3] - movh m1, [srcq + 5] - punpcklqdq m0, m1 - mova m1, m0 - movh m2, [srcq + sstrideq - 3] - movh m3, [srcq + sstrideq + 5] - punpcklqdq m2, m3 - mova m3, m2 - punpcklbw m0, m0 - punpckhbw m1, m1 - punpcklbw m2, m2 - punpckhbw m3, m3 - mova m4, m1 - palignr m4, m0, 1 - pmaddubsw m4, k0k1k4k5 - palignr m1, m0, 5 - pmaddubsw m1, k2k3k6k7 - mova m7, m3 - palignr m7, m2, 1 - pmaddubsw m7, k0k1k4k5 - palignr m3, m2, 5 - pmaddubsw m3, k2k3k6k7 - mova m0, m4 ;k0k1 - mova m5, m1 ;k2k3 - mova m2, m7 ;k0k1 upper - psrldq m4, 8 ;k4k5 - psrldq m1, 8 ;k6k7 - paddsw m4, m0 - paddsw m5, m1 - mova m1, m3 ;k2k3 upper - psrldq m7, 8 ;k4k5 upper - psrldq m3, 8 ;k6k7 upper - paddsw m7, m2 - paddsw m4, m5 - paddsw m1, m3 - paddsw m7, m1 - paddsw m4, krd - psraw m4, 7 - packuswb m4, m4 - paddsw m7, krd - psraw m7, 7 - packuswb m7, m7 - -%ifidn %1, h8_avg - movd m0, [dstq] - pavgb m4, m0 - movd m2, [dstq + dstrideq] - pavgb m7, m2 -%endif - movd [dstq], m4 - movd [dstq + dstrideq], m7 - - lea srcq, [srcq + sstrideq ] - prefetcht0 [srcq + 4 * sstrideq - 3] - lea srcq, [srcq + sstrideq ] - lea dstq, [dstq + 2 * dstrideq ] - prefetcht0 [srcq + 2 * sstrideq - 3] - - dec heightd - jnz .loop - - ; Do last row if output_height is odd - mov heightd, orig_height - and heightd, 1 - je .done - - movh m0, [srcq - 3] ; load src - movh m1, [srcq + 5] - punpcklqdq m0, m1 - - HORIZx4_ROW m0, m1 -%ifidn %1, h8_avg - movd m0, [dstq] - pavgb m1, m0 -%endif - movd [dstq], m1 -.done - RET -%endm - -%macro HORIZx8_ROW 5 - mova %2, %1 - punpcklbw %1, %1 - punpckhbw %2, %2 - - mova %3, %2 - mova %4, %2 - mova %5, %2 - - palignr %2, %1, 1 - palignr %3, %1, 5 - palignr %4, %1, 9 - palignr %5, %1, 13 - - pmaddubsw %2, k0k1 - pmaddubsw %3, k2k3 - pmaddubsw %4, k4k5 - pmaddubsw %5, k6k7 - paddsw %2, %4 - paddsw %5, %3 - paddsw %2, %5 - paddsw %2, krd - psraw %2, 7 - packuswb %2, %2 - SWAP %1, %2 -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_HFILTER8 1 -cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define orig_height r7d -%else - %define orig_height heightmp -%endif - mov orig_height, heightd - shr heightd, 1 - -.loop: - movh m0, [srcq - 3] - movh m3, [srcq + 5] - movh m4, [srcq + sstrideq - 3] - movh m7, [srcq + sstrideq + 5] - punpcklqdq m0, m3 - mova m1, m0 - punpcklbw m0, m0 - punpckhbw m1, m1 - mova m5, m1 - palignr m5, m0, 13 - pmaddubsw m5, k6k7 - mova m2, m1 - mova m3, m1 - palignr m1, m0, 1 - pmaddubsw m1, k0k1 - punpcklqdq m4, m7 - mova m6, m4 - punpcklbw m4, m4 - palignr m2, m0, 5 - punpckhbw m6, m6 - palignr m3, m0, 9 - mova m7, m6 - pmaddubsw m2, k2k3 - pmaddubsw m3, k4k5 - - palignr m7, m4, 13 - mova m0, m6 - palignr m0, m4, 5 - pmaddubsw m7, k6k7 - paddsw m1, m3 - paddsw m2, m5 - paddsw m1, m2 - mova m5, m6 - palignr m6, m4, 1 - pmaddubsw m0, k2k3 - pmaddubsw m6, k0k1 - palignr m5, m4, 9 - paddsw m1, krd - pmaddubsw m5, k4k5 - psraw m1, 7 - paddsw m0, m7 -%ifidn %1, h8_avg - movh m7, [dstq] - movh m2, [dstq + dstrideq] -%endif - packuswb m1, m1 - paddsw m6, m5 - paddsw m6, m0 - paddsw m6, krd - psraw m6, 7 - packuswb m6, m6 -%ifidn %1, h8_avg - pavgb m1, m7 - pavgb m6, m2 -%endif - movh [dstq], m1 - movh [dstq + dstrideq], m6 - - lea srcq, [srcq + sstrideq ] - prefetcht0 [srcq + 4 * sstrideq - 3] - lea srcq, [srcq + sstrideq ] - lea dstq, [dstq + 2 * dstrideq ] - prefetcht0 [srcq + 2 * sstrideq - 3] - dec heightd - jnz .loop - - ;Do last row if output_height is odd - mov heightd, orig_height - and heightd, 1 - je .done - - movh m0, [srcq - 3] - movh m3, [srcq + 5] - punpcklqdq m0, m3 - - HORIZx8_ROW m0, m1, m2, m3, m4 - -%ifidn %1, h8_avg - movh m1, [dstq] - pavgb m0, m1 -%endif - movh [dstq], m0 -.done: - RET -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_HFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -.loop: - prefetcht0 [srcq + 2 * sstrideq -3] - - movh m0, [srcq - 3] - movh m4, [srcq + 5] - movh m6, [srcq + 13] - punpcklqdq m0, m4 - mova m7, m0 - punpckhbw m0, m0 - mova m1, m0 - punpcklqdq m4, m6 - mova m3, m0 - punpcklbw m7, m7 - - palignr m3, m7, 13 - mova m2, m0 - pmaddubsw m3, k6k7 - palignr m0, m7, 1 - pmaddubsw m0, k0k1 - palignr m1, m7, 5 - pmaddubsw m1, k2k3 - palignr m2, m7, 9 - pmaddubsw m2, k4k5 - paddsw m1, m3 - mova m3, m4 - punpckhbw m4, m4 - mova m5, m4 - punpcklbw m3, m3 - mova m7, m4 - palignr m5, m3, 5 - mova m6, m4 - palignr m4, m3, 1 - pmaddubsw m4, k0k1 - pmaddubsw m5, k2k3 - palignr m6, m3, 9 - pmaddubsw m6, k4k5 - palignr m7, m3, 13 - pmaddubsw m7, k6k7 - paddsw m0, m2 - paddsw m0, m1 -%ifidn %1, h8_avg - mova m1, [dstq] -%endif - paddsw m4, m6 - paddsw m5, m7 - paddsw m4, m5 - paddsw m0, krd - paddsw m4, krd - psraw m0, 7 - psraw m4, 7 - packuswb m0, m4 -%ifidn %1, h8_avg - pavgb m0, m1 -%endif - lea srcq, [srcq + sstrideq] - mova [dstq], m0 - lea dstq, [dstq + dstrideq] - dec heightd - jnz .loop - RET -%endm - -INIT_XMM ssse3 -SUBPIX_HFILTER16 h8 -SUBPIX_HFILTER16 h8_avg -SUBPIX_HFILTER8 h8 -SUBPIX_HFILTER8 h8_avg -SUBPIX_HFILTER4 h8 -SUBPIX_HFILTER4 h8_avg - -;------------------------------------------------------------------------------- -%macro SUBPIX_VFILTER 2 -cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define src1q r7 - %define sstride6q r8 - %define dst_stride dstrideq -%else - %define src1q filterq - %define sstride6q dstrideq - %define dst_stride dstridemp -%endif - mov src1q, srcq - add src1q, sstrideq - lea sstride6q, [sstrideq + sstrideq * 4] - add sstride6q, sstrideq ;pitch * 6 - -%ifidn %2, 8 - %define movx movh -%else - %define movx movd -%endif -.loop: - movx m0, [srcq ] ;A - movx m1, [srcq + sstrideq ] ;B - punpcklbw m0, m1 ;A B - movx m2, [srcq + sstrideq * 2 ] ;C - pmaddubsw m0, k0k1 - mova m6, m2 - movx m3, [src1q + sstrideq * 2] ;D - punpcklbw m2, m3 ;C D - pmaddubsw m2, k2k3 - movx m4, [srcq + sstrideq * 4 ] ;E - mova m7, m4 - movx m5, [src1q + sstrideq * 4] ;F - punpcklbw m4, m5 ;E F - pmaddubsw m4, k4k5 - punpcklbw m1, m6 ;A B next iter - movx m6, [srcq + sstride6q ] ;G - punpcklbw m5, m6 ;E F next iter - punpcklbw m3, m7 ;C D next iter - pmaddubsw m5, k4k5 - movx m7, [src1q + sstride6q ] ;H - punpcklbw m6, m7 ;G H - pmaddubsw m6, k6k7 - pmaddubsw m3, k2k3 - pmaddubsw m1, k0k1 - paddsw m0, m4 - paddsw m2, m6 - movx m6, [srcq + sstrideq * 8 ] ;H next iter - punpcklbw m7, m6 - pmaddubsw m7, k6k7 - paddsw m0, m2 - paddsw m0, krd - psraw m0, 7 - paddsw m1, m5 - packuswb m0, m0 - - paddsw m3, m7 - paddsw m1, m3 - paddsw m1, krd - psraw m1, 7 - lea srcq, [srcq + sstrideq * 2 ] - lea src1q, [src1q + sstrideq * 2] - packuswb m1, m1 - -%ifidn %1, v8_avg - movx m2, [dstq] - pavgb m0, m2 -%endif - movx [dstq], m0 - add dstq, dst_stride -%ifidn %1, v8_avg - movx m3, [dstq] - pavgb m1, m3 -%endif - movx [dstq], m1 - add dstq, dst_stride - sub heightd, 2 - cmp heightd, 1 - jg .loop - - cmp heightd, 0 - je .done - - movx m0, [srcq ] ;A - movx m1, [srcq + sstrideq ] ;B - movx m6, [srcq + sstride6q ] ;G - punpcklbw m0, m1 ;A B - movx m7, [src1q + sstride6q ] ;H - pmaddubsw m0, k0k1 - movx m2, [srcq + sstrideq * 2 ] ;C - punpcklbw m6, m7 ;G H - movx m3, [src1q + sstrideq * 2] ;D - pmaddubsw m6, k6k7 - movx m4, [srcq + sstrideq * 4 ] ;E - punpcklbw m2, m3 ;C D - movx m5, [src1q + sstrideq * 4] ;F - punpcklbw m4, m5 ;E F - pmaddubsw m2, k2k3 - pmaddubsw m4, k4k5 - paddsw m2, m6 - paddsw m0, m4 - paddsw m0, m2 - paddsw m0, krd - psraw m0, 7 - packuswb m0, m0 -%ifidn %1, v8_avg - movx m1, [dstq] - pavgb m0, m1 -%endif - movx [dstq], m0 -.done: - RET -%endm - -;------------------------------------------------------------------------------- -%macro SUBPIX_VFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ - src, sstride, dst, dstride, height, filter - mova m4, [filterq] - SETUP_LOCAL_VARS -%if ARCH_X86_64 - %define src1q r7 - %define sstride6q r8 - %define dst_stride dstrideq -%else - %define src1q filterq - %define sstride6q dstrideq - %define dst_stride dstridemp -%endif - mov src1q, srcq - add src1q, sstrideq - lea sstride6q, [sstrideq + sstrideq * 4] - add sstride6q, sstrideq ;pitch * 6 - -.loop: - movh m0, [srcq ] ;A - movh m1, [srcq + sstrideq ] ;B - movh m2, [srcq + sstrideq * 2 ] ;C - movh m3, [src1q + sstrideq * 2] ;D - movh m4, [srcq + sstrideq * 4 ] ;E - movh m5, [src1q + sstrideq * 4] ;F - - punpcklbw m0, m1 ;A B - movh m6, [srcq + sstride6q] ;G - punpcklbw m2, m3 ;C D - movh m7, [src1q + sstride6q] ;H - punpcklbw m4, m5 ;E F - pmaddubsw m0, k0k1 - movh m3, [srcq + 8] ;A - pmaddubsw m2, k2k3 - punpcklbw m6, m7 ;G H - movh m5, [srcq + sstrideq + 8] ;B - pmaddubsw m4, k4k5 - punpcklbw m3, m5 ;A B - movh m7, [srcq + sstrideq * 2 + 8] ;C - pmaddubsw m6, k6k7 - movh m5, [src1q + sstrideq * 2 + 8] ;D - punpcklbw m7, m5 ;C D - paddsw m2, m6 - pmaddubsw m3, k0k1 - movh m1, [srcq + sstrideq * 4 + 8] ;E - paddsw m0, m4 - pmaddubsw m7, k2k3 - movh m6, [src1q + sstrideq * 4 + 8] ;F - punpcklbw m1, m6 ;E F - paddsw m0, m2 - paddsw m0, krd - movh m2, [srcq + sstride6q + 8] ;G - pmaddubsw m1, k4k5 - movh m5, [src1q + sstride6q + 8] ;H - psraw m0, 7 - punpcklbw m2, m5 ;G H - pmaddubsw m2, k6k7 -%ifidn %1, v8_avg - mova m4, [dstq] -%endif - movh [dstq], m0 - paddsw m7, m2 - paddsw m3, m1 - paddsw m3, m7 - paddsw m3, krd - psraw m3, 7 - packuswb m0, m3 - - add srcq, sstrideq - add src1q, sstrideq -%ifidn %1, v8_avg - pavgb m0, m4 -%endif - mova [dstq], m0 - add dstq, dst_stride - dec heightd - jnz .loop - RET -%endm - -INIT_XMM ssse3 -SUBPIX_VFILTER16 v8 -SUBPIX_VFILTER16 v8_avg -SUBPIX_VFILTER v8, 8 -SUBPIX_VFILTER v8_avg, 8 -SUBPIX_VFILTER v8, 4 -SUBPIX_VFILTER v8_avg, 4 diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm deleted file mode 100644 index a378dd0402..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm +++ /dev/null @@ -1,448 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm3, [rdx] ;load filters - pshuflw xmm4, xmm3, 11111111b ;k3 - psrldq xmm3, 8 - pshuflw xmm3, xmm3, 0b ;k4 - punpcklqdq xmm4, xmm3 ;k3k4 - - movq xmm3, rcx ;rounding - pshufd xmm3, xmm3, 0 - - pxor xmm2, xmm2 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_4 1 - - punpckldq xmm0, xmm1 ;two row in one register - punpcklbw xmm0, xmm2 ;unpack to word - pmullw xmm0, xmm4 ;multiply the filter factors - - movdqa xmm1, xmm0 - psrldq xmm1, 8 - paddsw xmm0, xmm1 - - paddsw xmm0, xmm3 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - - movd [rdi], xmm0 - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - - pshuflw xmm6, xmm7, 11111111b ;k3 - pshufhw xmm7, xmm7, 0b ;k4 - punpcklwd xmm6, xmm6 - punpckhwd xmm7, xmm7 - - movq xmm4, rcx ;rounding - pshufd xmm4, xmm4, 0 - - pxor xmm5, xmm5 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_8 1 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - - pmullw xmm0, xmm6 - pmullw xmm1, xmm7 - paddsw xmm0, xmm1 - paddsw xmm0, xmm4 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro APPLY_FILTER_16 1 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - punpckhbw xmm3, xmm5 - - pmullw xmm0, xmm6 - pmullw xmm1, xmm7 - pmullw xmm2, xmm6 - pmullw xmm3, xmm7 - - paddsw xmm0, xmm1 - paddsw xmm2, xmm3 - - paddsw xmm0, xmm4 ;rounding - paddsw xmm2, xmm4 - psraw xmm0, 7 ;shift - psraw xmm2, 7 - packuswb xmm0, xmm2 ;pack back to byte -%if %1 - movdqu xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -global sym(vpx_filter_block1d4_v2_sse2) PRIVATE -sym(vpx_filter_block1d4_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_sse2) PRIVATE -sym(vpx_filter_block1d8_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_sse2) PRIVATE -sym(vpx_filter_block1d16_v2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_v2_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_v2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_sse2) PRIVATE -sym(vpx_filter_block1d4_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_sse2) PRIVATE -sym(vpx_filter_block1d8_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_sse2) PRIVATE -sym(vpx_filter_block1d16_h2_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_avg_sse2) PRIVATE -sym(vpx_filter_block1d4_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_avg_sse2) PRIVATE -sym(vpx_filter_block1d8_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_avg_sse2) PRIVATE -sym(vpx_filter_block1d16_h2_avg_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm deleted file mode 100644 index 3c8cfd2253..0000000000 --- a/thirdparty/libvpx/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm +++ /dev/null @@ -1,422 +0,0 @@ -; -; Copyright (c) 2014 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -%macro GET_PARAM_4 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm3, [rdx] ;load filters - psrldq xmm3, 6 - packsswb xmm3, xmm3 - pshuflw xmm3, xmm3, 0b ;k3_k4 - - movq xmm2, rcx ;rounding - pshufd xmm2, xmm2, 0 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_4 1 - punpcklbw xmm0, xmm1 - pmaddubsw xmm0, xmm3 - - paddsw xmm0, xmm2 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack to byte - -%if %1 - movd xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movd [rdi], xmm0 - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro GET_PARAM 0 - mov rdx, arg(5) ;filter ptr - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 - - movdqa xmm7, [rdx] ;load filters - psrldq xmm7, 6 - packsswb xmm7, xmm7 - pshuflw xmm7, xmm7, 0b ;k3_k4 - punpcklwd xmm7, xmm7 - - movq xmm6, rcx ;rounding - pshufd xmm6, xmm6, 0 - - movsxd rax, DWORD PTR arg(1) ;pixels_per_line - movsxd rdx, DWORD PTR arg(3) ;out_pitch - movsxd rcx, DWORD PTR arg(4) ;output_height -%endm - -%macro APPLY_FILTER_8 1 - punpcklbw xmm0, xmm1 - pmaddubsw xmm0, xmm7 - - paddsw xmm0, xmm6 ;rounding - psraw xmm0, 7 ;shift - packuswb xmm0, xmm0 ;pack back to byte - -%if %1 - movq xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movq [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -%macro APPLY_FILTER_16 1 - punpcklbw xmm0, xmm1 - punpckhbw xmm2, xmm1 - pmaddubsw xmm0, xmm7 - pmaddubsw xmm2, xmm7 - - paddsw xmm0, xmm6 ;rounding - paddsw xmm2, xmm6 - psraw xmm0, 7 ;shift - psraw xmm2, 7 - packuswb xmm0, xmm2 ;pack back to byte - -%if %1 - movdqu xmm1, [rdi] - pavgb xmm0, xmm1 -%endif - movdqu [rdi], xmm0 ;store the result - - lea rsi, [rsi + rax] - lea rdi, [rdi + rdx] - dec rcx -%endm - -global sym(vpx_filter_block1d4_v2_ssse3) PRIVATE -sym(vpx_filter_block1d4_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_ssse3) PRIVATE -sym(vpx_filter_block1d8_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_ssse3) PRIVATE -sym(vpx_filter_block1d16_v2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d4_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movd xmm0, [rsi] ;load src - movd xmm1, [rsi + rax] - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d8_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movq xmm0, [rsi] ;0 - movq xmm1, [rsi + rax] ;1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d16_v2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;0 - movdqu xmm1, [rsi + rax] ;1 - movdqa xmm2, xmm0 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_ssse3) PRIVATE -sym(vpx_filter_block1d4_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_ssse3) PRIVATE -sym(vpx_filter_block1d8_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_ssse3) PRIVATE -sym(vpx_filter_block1d16_h2_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - - APPLY_FILTER_16 0 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d4_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - ; end prolog - - GET_PARAM_4 -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_4 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d8_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqa xmm1, xmm0 - psrldq xmm1, 1 - - APPLY_FILTER_8 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -global sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE -sym(vpx_filter_block1d16_h2_avg_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - SAVE_XMM 7 - push rsi - push rdi - ; end prolog - - GET_PARAM -.loop: - movdqu xmm0, [rsi] ;load src - movdqu xmm1, [rsi + 1] - movdqa xmm2, xmm0 - - APPLY_FILTER_16 1 - jnz .loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/thirdparty/libvpx/vpx_dsp_rtcd.h b/thirdparty/libvpx/vpx_dsp_rtcd.h deleted file mode 100644 index 4d5ad89533..0000000000 --- a/thirdparty/libvpx/vpx_dsp_rtcd.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - #include "rtcd/vpx_dsp_rtcd_x86.h" -#elif defined(WEBM_ARMASM) && ARCH_ARM - #include "rtcd/vpx_dsp_rtcd_arm.h" -#else - #include "rtcd/vpx_dsp_rtcd_c.h" -#endif diff --git a/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h b/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h deleted file mode 100644 index c4dd78550f..0000000000 --- a/thirdparty/libvpx/vpx_mem/include/vpx_mem_intrnl.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ -#define VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ -#include "./vpx_config.h" - -#define ADDRESS_STORAGE_SIZE sizeof(size_t) - -#ifndef DEFAULT_ALIGNMENT -# if defined(VXWORKS) -# define DEFAULT_ALIGNMENT 32 /*default addr alignment to use in -calls to vpx_* functions other -than vpx_memalign*/ -# else -# define DEFAULT_ALIGNMENT (2 * sizeof(void*)) /* NOLINT */ -# endif -#endif - -/*returns an addr aligned to the byte boundary specified by align*/ -#define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) - -#endif // VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ diff --git a/thirdparty/libvpx/vpx_mem/vpx_mem.c b/thirdparty/libvpx/vpx_mem/vpx_mem.c deleted file mode 100644 index b261fc0da1..0000000000 --- a/thirdparty/libvpx/vpx_mem/vpx_mem.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "vpx_mem.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "include/vpx_mem_intrnl.h" -#include "vpx/vpx_integer.h" - -void *vpx_memalign(size_t align, size_t size) { - void *addr, - * x = NULL; - - addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE); - - if (addr) { - x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align); - /* save the actual malloc address */ - ((size_t *)x)[-1] = (size_t)addr; - } - - return x; -} - -void *vpx_malloc(size_t size) { - return vpx_memalign(DEFAULT_ALIGNMENT, size); -} - -void *vpx_calloc(size_t num, size_t size) { - void *x; - - x = vpx_memalign(DEFAULT_ALIGNMENT, num * size); - - if (x) - memset(x, 0, num * size); - - return x; -} - -void *vpx_realloc(void *memblk, size_t size) { - void *addr, - * new_addr = NULL; - int align = DEFAULT_ALIGNMENT; - - /* - The realloc() function changes the size of the object pointed to by - ptr to the size specified by size, and returns a pointer to the - possibly moved block. The contents are unchanged up to the lesser - of the new and old sizes. If ptr is null, realloc() behaves like - malloc() for the specified size. If size is zero (0) and ptr is - not a null pointer, the object pointed to is freed. - */ - if (!memblk) - new_addr = vpx_malloc(size); - else if (!size) - vpx_free(memblk); - else { - addr = (void *)(((size_t *)memblk)[-1]); - memblk = NULL; - - new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE); - - if (new_addr) { - addr = new_addr; - new_addr = (void *)(((size_t) - ((unsigned char *)new_addr + ADDRESS_STORAGE_SIZE) + (align - 1)) & - (size_t) - align); - /* save the actual malloc address */ - ((size_t *)new_addr)[-1] = (size_t)addr; - } - } - - return new_addr; -} - -void vpx_free(void *memblk) { - if (memblk) { - void *addr = (void *)(((size_t *)memblk)[-1]); - free(addr); - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void *vpx_memset16(void *dest, int val, size_t length) { - size_t i; - uint16_t *dest16 = (uint16_t *)dest; - for (i = 0; i < length; i++) - *dest16++ = val; - return dest; -} -#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vpx_mem/vpx_mem.h b/thirdparty/libvpx/vpx_mem/vpx_mem.h deleted file mode 100644 index a006e0f00b..0000000000 --- a/thirdparty/libvpx/vpx_mem/vpx_mem.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_MEM_VPX_MEM_H_ -#define VPX_MEM_VPX_MEM_H_ - -#include "vpx_config.h" -#if defined(__uClinux__) -# include <lddk.h> -#endif - -#include <stdlib.h> -#include <stddef.h> - -#if defined(__cplusplus) -extern "C" { -#endif - - void *vpx_memalign(size_t align, size_t size); - void *vpx_malloc(size_t size); - void *vpx_calloc(size_t num, size_t size); - void *vpx_realloc(void *memblk, size_t size); - void vpx_free(void *memblk); - -#if CONFIG_VP9_HIGHBITDEPTH - void *vpx_memset16(void *dest, int val, size_t length); -#endif - -#include <string.h> - -#ifdef VPX_MEM_PLTFRM -# include VPX_MEM_PLTFRM -#endif - -#if defined(__cplusplus) -} -#endif - -#endif // VPX_MEM_VPX_MEM_H_ diff --git a/thirdparty/libvpx/vpx_ports/arm.h b/thirdparty/libvpx/vpx_ports/arm.h deleted file mode 100644 index 42c98f5a83..0000000000 --- a/thirdparty/libvpx/vpx_ports/arm.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_ARM_H_ -#define VPX_PORTS_ARM_H_ -#include <stdlib.h> -#include "vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/*ARMv5TE "Enhanced DSP" instructions.*/ -#define HAS_EDSP 0x01 -/*ARMv6 "Parallel" or "Media" instructions.*/ -#define HAS_MEDIA 0x02 -/*ARMv7 optional NEON instructions.*/ -#define HAS_NEON 0x04 - -int arm_cpu_caps(void); - -// Earlier gcc compilers have issues with some neon intrinsics -#if !defined(__clang__) && defined(__GNUC__) && \ - __GNUC__ == 4 && __GNUC_MINOR__ <= 6 -#define VPX_INCOMPATIBLE_GCC -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_PORTS_ARM_H_ - diff --git a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c deleted file mode 100644 index 7eb74a7dc9..0000000000 --- a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> -#include <string.h> -#include "vpx_ports/arm.h" -#include "./vpx_config.h" - -#ifdef WINAPI_FAMILY -#include <winapifamily.h> -#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -#define getenv(x) NULL -#endif -#endif - -static int arm_cpu_env_flags(int *flags) { - char *env; - env = getenv("VPX_SIMD_CAPS"); - if (env && *env) { - *flags = (int)strtol(env, NULL, 0); - return 0; - } - *flags = 0; - return -1; -} - -static int arm_cpu_env_mask(void) { - char *env; - env = getenv("VPX_SIMD_CAPS_MASK"); - return env && *env ? (int)strtol(env, NULL, 0) : ~0; -} - -#if !CONFIG_RUNTIME_CPU_DETECT - #error "CONFIG_RUNTIME_CPU_DETECT should be enabled!" -#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ -/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ -#define WIN32_LEAN_AND_MEAN -#define WIN32_EXTRA_LEAN -#include <windows.h> - -int arm_cpu_caps(void) { - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); - /* MSVC has no inline __asm support for ARM, but it does let you __emit - * instructions via their assembled hex code. - * All of these instructions should be essentially nops. - */ -#if HAVE_MEDIA - if (mask & HAS_MEDIA) { - __try { - /*SHADD8 r3,r3,r3*/ - __emit(0xE6333F93); - flags |= HAS_MEDIA; - } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - /*Ignore exception.*/ - } - } -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - if (mask &HAS_NEON) { - __try { - /*VORR q0,q0,q0*/ - __emit(0xF2200150); - flags |= HAS_NEON; - } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - /*Ignore exception.*/ - } - } -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} - -#elif defined(__ANDROID__) /* end _MSC_VER */ -#include <cpu-features.h> - -int arm_cpu_caps(void) { - int flags; - int mask; - uint64_t features; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); - features = android_getCpuFeatures(); - -#if HAVE_MEDIA - flags |= HAS_MEDIA; -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - flags |= HAS_NEON; -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} - -#elif defined(__linux__) /* end __ANDROID__ */ - -#include <stdio.h> - -int arm_cpu_caps(void) { - FILE *fin; - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); - /* Reading /proc/self/auxv would be easier, but that doesn't work reliably - * on Android. - * This also means that detection will fail in Scratchbox. - */ - fin = fopen("/proc/cpuinfo", "r"); - if (fin != NULL) { - /* 512 should be enough for anybody (it's even enough for all the flags - * that x86 has accumulated... so far). - */ - char buf[512]; - while (fgets(buf, 511, fin) != NULL) { -#if HAVE_NEON || HAVE_NEON_ASM - if (memcmp(buf, "Features", 8) == 0) { - char *p; - p = strstr(buf, " neon"); - if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { - flags |= HAS_NEON; - } - } -#endif /* HAVE_NEON || HAVE_NEON_ASM */ -#if HAVE_MEDIA - if (memcmp(buf, "CPU architecture:", 17) == 0) { - int version; - version = atoi(buf + 17); - if (version >= 6) { - flags |= HAS_MEDIA; - } - } -#endif /* HAVE_MEDIA */ - } - fclose(fin); - } - return flags & mask; -} -#else /* end __linux__ */ -int arm_cpu_caps(void) { - int flags; - int mask; - if (!arm_cpu_env_flags(&flags)) { - return flags; - } - mask = arm_cpu_env_mask(); -#if HAVE_MEDIA - flags |= HAS_MEDIA; -#endif /* HAVE_MEDIA */ -#if HAVE_NEON || HAVE_NEON_ASM - flags |= HAS_NEON; -#endif /* HAVE_NEON || HAVE_NEON_ASM */ - return flags & mask; -} -#warning "ARM run-time CPU detection is disabled for this platform..." -#endif diff --git a/thirdparty/libvpx/vpx_ports/bitops.h b/thirdparty/libvpx/vpx_ports/bitops.h deleted file mode 100644 index 84ff3659fe..0000000000 --- a/thirdparty/libvpx/vpx_ports/bitops.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_BITOPS_H_ -#define VPX_PORTS_BITOPS_H_ - -#include <assert.h> - -#include "vpx_ports/msvc.h" - -#ifdef _MSC_VER -# include <math.h> // the ceil() definition must precede intrin.h -# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86)) -# include <intrin.h> -# define USE_MSC_INTRINSICS -# endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// These versions of get_msb() are only valid when n != 0 because all -// of the optimized versions are undefined when n == 0: -// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html - -// use GNU builtins where available. -#if defined(__GNUC__) && \ - ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) -static INLINE int get_msb(unsigned int n) { - assert(n != 0); - return 31 ^ __builtin_clz(n); -} -#elif defined(USE_MSC_INTRINSICS) -#pragma intrinsic(_BitScanReverse) - -static INLINE int get_msb(unsigned int n) { - unsigned long first_set_bit; - assert(n != 0); - _BitScanReverse(&first_set_bit, n); - return first_set_bit; -} -#undef USE_MSC_INTRINSICS -#else -// Returns (int)floor(log2(n)). n must be > 0. -static INLINE int get_msb(unsigned int n) { - int log = 0; - unsigned int value = n; - int i; - - assert(n != 0); - - for (i = 4; i >= 0; --i) { - const int shift = (1 << i); - const unsigned int x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - return log; -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_PORTS_BITOPS_H_ diff --git a/thirdparty/libvpx/vpx_ports/config.h b/thirdparty/libvpx/vpx_ports/config.h deleted file mode 100644 index 3c1ab99f4a..0000000000 --- a/thirdparty/libvpx/vpx_ports/config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_CONFIG_H_ -#define VPX_PORTS_CONFIG_H_ - -#include "vpx_config.h" - -#endif // VPX_PORTS_CONFIG_H_ diff --git a/thirdparty/libvpx/vpx_ports/emmintrin_compat.h b/thirdparty/libvpx/vpx_ports/emmintrin_compat.h deleted file mode 100644 index 16176383d2..0000000000 --- a/thirdparty/libvpx/vpx_ports/emmintrin_compat.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_EMMINTRIN_COMPAT_H_ -#define VPX_PORTS_EMMINTRIN_COMPAT_H_ - -#if defined(__GNUC__) && __GNUC__ < 4 -/* From emmintrin.h (gcc 4.5.3) */ -/* Casts between various SP, DP, INT vector types. Note that these do no - conversion of values, they just change the type. */ -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castpd_ps(__m128d __A) -{ - return (__m128) __A; -} - -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castpd_si128(__m128d __A) -{ - return (__m128i) __A; -} - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castps_pd(__m128 __A) -{ - return (__m128d) __A; -} - -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castps_si128(__m128 __A) -{ - return (__m128i) __A; -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castsi128_ps(__m128i __A) -{ - return (__m128) __A; -} - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_castsi128_pd(__m128i __A) -{ - return (__m128d) __A; -} -#endif - -#endif // VPX_PORTS_EMMINTRIN_COMPAT_H_ diff --git a/thirdparty/libvpx/vpx_ports/emms.asm b/thirdparty/libvpx/vpx_ports/emms.asm deleted file mode 100644 index db8da28737..0000000000 --- a/thirdparty/libvpx/vpx_ports/emms.asm +++ /dev/null @@ -1,38 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -section .text -global sym(vpx_reset_mmx_state) PRIVATE -sym(vpx_reset_mmx_state): - emms - ret - - -%if LIBVPX_YASM_WIN64 -global sym(vpx_winx64_fldcw) PRIVATE -sym(vpx_winx64_fldcw): - sub rsp, 8 - mov [rsp], rcx ; win x64 specific - fldcw [rsp] - add rsp, 8 - ret - - -global sym(vpx_winx64_fstcw) PRIVATE -sym(vpx_winx64_fstcw): - sub rsp, 8 - fstcw [rsp] - mov rax, [rsp] - add rsp, 8 - ret -%endif diff --git a/thirdparty/libvpx/vpx_ports/mem.h b/thirdparty/libvpx/vpx_ports/mem.h deleted file mode 100644 index 7502f90632..0000000000 --- a/thirdparty/libvpx/vpx_ports/mem.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_MEM_H_ -#define VPX_PORTS_MEM_H_ - -#include "vpx_config.h" -#include "vpx/vpx_integer.h" - -#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C) -#define DECLARE_ALIGNED(n,typ,val) typ val __attribute__ ((aligned (n))) -#elif defined(_MSC_VER) -#define DECLARE_ALIGNED(n,typ,val) __declspec(align(n)) typ val -#else -#warning No alignment directives known for this compiler. -#define DECLARE_ALIGNED(n,typ,val) typ val -#endif - -/* Indicates that the usage of the specified variable has been audited to assure - * that it's safe to use uninitialized. Silences 'may be used uninitialized' - * warnings on gcc. - */ -#if defined(__GNUC__) && __GNUC__ -#define UNINITIALIZED_IS_SAFE(x) x=x -#else -#define UNINITIALIZED_IS_SAFE(x) x -#endif - -#if HAVE_NEON && defined(_MSC_VER) -#define __builtin_prefetch(x) -#endif - -/* Shift down with rounding */ -#define ROUND_POWER_OF_TWO(value, n) \ - (((value) + (1 << ((n) - 1))) >> (n)) - -#define ALIGN_POWER_OF_TWO(value, n) \ - (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) - -#if CONFIG_VP9_HIGHBITDEPTH -#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1)) -#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1)) -#endif // CONFIG_VP9_HIGHBITDEPTH - -#endif // VPX_PORTS_MEM_H_ diff --git a/thirdparty/libvpx/vpx_ports/mem_ops.h b/thirdparty/libvpx/vpx_ports/mem_ops.h deleted file mode 100644 index 620df31b22..0000000000 --- a/thirdparty/libvpx/vpx_ports/mem_ops.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_MEM_OPS_H_ -#define VPX_PORTS_MEM_OPS_H_ - -/* \file - * \brief Provides portable memory access primitives - * - * This function provides portable primitives for getting and setting of - * signed and unsigned integers in 16, 24, and 32 bit sizes. The operations - * can be performed on unaligned data regardless of hardware support for - * unaligned accesses. - * - * The type used to pass the integral values may be changed by defining - * MEM_VALUE_T with the appropriate type. The type given must be an integral - * numeric type. - * - * The actual functions instantiated have the MEM_VALUE_T type name pasted - * on to the symbol name. This allows the developer to instantiate these - * operations for multiple types within the same translation unit. This is - * of somewhat questionable utility, but the capability exists nonetheless. - * Users not making use of this functionality should call the functions - * without the type name appended, and the preprocessor will take care of - * it. - * - * NOTE: This code is not supported on platforms where char > 1 octet ATM. - */ - -#ifndef MAU_T -/* Minimum Access Unit for this target */ -#define MAU_T unsigned char -#endif - -#ifndef MEM_VALUE_T -#define MEM_VALUE_T int -#endif - -#undef MEM_VALUE_T_SZ_BITS -#define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3) - -#undef mem_ops_wrap_symbol -#define mem_ops_wrap_symbol(fn) mem_ops_wrap_symbol2(fn, MEM_VALUE_T) -#undef mem_ops_wrap_symbol2 -#define mem_ops_wrap_symbol2(fn,typ) mem_ops_wrap_symbol3(fn,typ) -#undef mem_ops_wrap_symbol3 -#define mem_ops_wrap_symbol3(fn,typ) fn##_as_##typ - -/* - * Include aligned access routines - */ -#define INCLUDED_BY_MEM_OPS_H -#include "mem_ops_aligned.h" -#undef INCLUDED_BY_MEM_OPS_H - -#undef mem_get_be16 -#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16) -static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 8; - val |= mem[1]; - return val; -} - -#undef mem_get_be24 -#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24) -static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[0] << 16; - val |= mem[1] << 8; - val |= mem[2]; - return val; -} - -#undef mem_get_be32 -#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32) -static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = ((unsigned MEM_VALUE_T)mem[0]) << 24; - val |= mem[1] << 16; - val |= mem[2] << 8; - val |= mem[3]; - return val; -} - -#undef mem_get_le16 -#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16) -static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[1] << 8; - val |= mem[0]; - return val; -} - -#undef mem_get_le24 -#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24) -static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; -} - -#undef mem_get_le32 -#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32) -static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { - unsigned MEM_VALUE_T val; - const MAU_T *mem = (const MAU_T *)vmem; - - val = ((unsigned MEM_VALUE_T)mem[3]) << 24; - val |= mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; -} - -#define mem_get_s_generic(end,sz) \ - static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ - const MAU_T *mem = (const MAU_T*)vmem;\ - signed MEM_VALUE_T val = mem_get_##end##sz(mem);\ - return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\ - } - -#undef mem_get_sbe16 -#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) -mem_get_s_generic(be, 16) - -#undef mem_get_sbe24 -#define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) -mem_get_s_generic(be, 24) - -#undef mem_get_sbe32 -#define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) -mem_get_s_generic(be, 32) - -#undef mem_get_sle16 -#define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) -mem_get_s_generic(le, 16) - -#undef mem_get_sle24 -#define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) -mem_get_s_generic(le, 24) - -#undef mem_get_sle32 -#define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) -mem_get_s_generic(le, 32) - -#undef mem_put_be16 -#define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) -static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 8) & 0xff); - mem[1] = (MAU_T)((val >> 0) & 0xff); -} - -#undef mem_put_be24 -#define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24) -static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 16) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); - mem[2] = (MAU_T)((val >> 0) & 0xff); -} - -#undef mem_put_be32 -#define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32) -static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 24) & 0xff); - mem[1] = (MAU_T)((val >> 16) & 0xff); - mem[2] = (MAU_T)((val >> 8) & 0xff); - mem[3] = (MAU_T)((val >> 0) & 0xff); -} - -#undef mem_put_le16 -#define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16) -static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 0) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); -} - -#undef mem_put_le24 -#define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24) -static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 0) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); - mem[2] = (MAU_T)((val >> 16) & 0xff); -} - -#undef mem_put_le32 -#define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32) -static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) { - MAU_T *mem = (MAU_T *)vmem; - - mem[0] = (MAU_T)((val >> 0) & 0xff); - mem[1] = (MAU_T)((val >> 8) & 0xff); - mem[2] = (MAU_T)((val >> 16) & 0xff); - mem[3] = (MAU_T)((val >> 24) & 0xff); -} - -#endif // VPX_PORTS_MEM_OPS_H_ diff --git a/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h b/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h deleted file mode 100644 index 46f61738ba..0000000000 --- a/thirdparty/libvpx/vpx_ports/mem_ops_aligned.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_MEM_OPS_ALIGNED_H_ -#define VPX_PORTS_MEM_OPS_ALIGNED_H_ - -#include "vpx/vpx_integer.h" - -/* \file - * \brief Provides portable memory access primitives for operating on aligned - * data - * - * This file is split from mem_ops.h for easier maintenance. See mem_ops.h - * for a more detailed description of these primitives. - */ -#ifndef INCLUDED_BY_MEM_OPS_H -#error Include mem_ops.h, not mem_ops_aligned.h directly. -#endif - -/* Architectures that provide instructions for doing this byte swapping - * could redefine these macros. - */ -#define swap_endian_16(val,raw) do {\ - val = (uint16_t)(((raw>>8) & 0x00ff) \ - | ((raw<<8) & 0xff00));\ - } while(0) -#define swap_endian_32(val,raw) do {\ - val = ((raw>>24) & 0x000000ff) \ - | ((raw>>8) & 0x0000ff00) \ - | ((raw<<8) & 0x00ff0000) \ - | ((raw<<24) & 0xff000000); \ - } while(0) -#define swap_endian_16_se(val,raw) do {\ - swap_endian_16(val,raw);\ - val = ((val << 16) >> 16);\ - } while(0) -#define swap_endian_32_se(val,raw) swap_endian_32(val,raw) - -#define mem_get_ne_aligned_generic(end,sz) \ - static VPX_INLINE unsigned MEM_VALUE_T \ - mem_get_##end##sz##_aligned(const void *vmem) {\ - const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ - return *mem;\ - } - -#define mem_get_sne_aligned_generic(end,sz) \ - static VPX_INLINE signed MEM_VALUE_T \ - mem_get_s##end##sz##_aligned(const void *vmem) {\ - const int##sz##_t *mem = (const int##sz##_t *)vmem;\ - return *mem;\ - } - -#define mem_get_se_aligned_generic(end,sz) \ - static VPX_INLINE unsigned MEM_VALUE_T \ - mem_get_##end##sz##_aligned(const void *vmem) {\ - const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ - unsigned MEM_VALUE_T val, raw = *mem;\ - swap_endian_##sz(val,raw);\ - return val;\ - } - -#define mem_get_sse_aligned_generic(end,sz) \ - static VPX_INLINE signed MEM_VALUE_T \ - mem_get_s##end##sz##_aligned(const void *vmem) {\ - const int##sz##_t *mem = (const int##sz##_t *)vmem;\ - unsigned MEM_VALUE_T val, raw = *mem;\ - swap_endian_##sz##_se(val,raw);\ - return val;\ - } - -#define mem_put_ne_aligned_generic(end,sz) \ - static VPX_INLINE void \ - mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ - uint##sz##_t *mem = (uint##sz##_t *)vmem;\ - *mem = (uint##sz##_t)val;\ - } - -#define mem_put_se_aligned_generic(end,sz) \ - static VPX_INLINE void \ - mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ - uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\ - swap_endian_##sz(raw,val);\ - *mem = (uint##sz##_t)raw;\ - } - -#include "vpx_config.h" -#if CONFIG_BIG_ENDIAN -#define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be,sz) -#define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be,sz) -#define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le,sz) -#define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le,sz) -#define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be,sz) -#define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le,sz) -#else -#define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be,sz) -#define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be,sz) -#define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le,sz) -#define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le,sz) -#define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be,sz) -#define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le,sz) -#endif - -#undef mem_get_be16_aligned -#define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) -mem_get_be_aligned_generic(16) - -#undef mem_get_be32_aligned -#define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) -mem_get_be_aligned_generic(32) - -#undef mem_get_le16_aligned -#define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) -mem_get_le_aligned_generic(16) - -#undef mem_get_le32_aligned -#define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) -mem_get_le_aligned_generic(32) - -#undef mem_get_sbe16_aligned -#define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) -mem_get_sbe_aligned_generic(16) - -#undef mem_get_sbe32_aligned -#define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) -mem_get_sbe_aligned_generic(32) - -#undef mem_get_sle16_aligned -#define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) -mem_get_sle_aligned_generic(16) - -#undef mem_get_sle32_aligned -#define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) -mem_get_sle_aligned_generic(32) - -#undef mem_put_be16_aligned -#define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) -mem_put_be_aligned_generic(16) - -#undef mem_put_be32_aligned -#define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) -mem_put_be_aligned_generic(32) - -#undef mem_put_le16_aligned -#define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) -mem_put_le_aligned_generic(16) - -#undef mem_put_le32_aligned -#define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) -mem_put_le_aligned_generic(32) - -#undef mem_get_ne_aligned_generic -#undef mem_get_se_aligned_generic -#undef mem_get_sne_aligned_generic -#undef mem_get_sse_aligned_generic -#undef mem_put_ne_aligned_generic -#undef mem_put_se_aligned_generic -#undef swap_endian_16 -#undef swap_endian_32 -#undef swap_endian_16_se -#undef swap_endian_32_se - -#endif // VPX_PORTS_MEM_OPS_ALIGNED_H_ diff --git a/thirdparty/libvpx/vpx_ports/msvc.h b/thirdparty/libvpx/vpx_ports/msvc.h deleted file mode 100644 index cab77405f4..0000000000 --- a/thirdparty/libvpx/vpx_ports/msvc.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_MSVC_H_ -#define VPX_PORTS_MSVC_H_ -#ifdef _MSC_VER - -#include "./vpx_config.h" - -# if _MSC_VER < 1900 // VS2015 provides snprintf -# define snprintf _snprintf -# endif // _MSC_VER < 1900 - -#if _MSC_VER < 1800 // VS2013 provides round -#include <math.h> -static INLINE double round(double x) { - if (x < 0) - return ceil(x - 0.5); - else - return floor(x + 0.5); -} -#endif // _MSC_VER < 1800 - -#endif // _MSC_VER -#endif // VPX_PORTS_MSVC_H_ diff --git a/thirdparty/libvpx/vpx_ports/system_state.h b/thirdparty/libvpx/vpx_ports/system_state.h deleted file mode 100644 index 01989dcafc..0000000000 --- a/thirdparty/libvpx/vpx_ports/system_state.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_SYSTEM_STATE_H_ -#define VPX_PORTS_SYSTEM_STATE_H_ - -#include "./vpx_config.h" - -#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64) - void vpx_reset_mmx_state(void); - #define vpx_clear_system_state() vpx_reset_mmx_state() -#else - #define vpx_clear_system_state() -#endif // ARCH_X86 || ARCH_X86_64 -#endif // VPX_PORTS_SYSTEM_STATE_H_ diff --git a/thirdparty/libvpx/vpx_ports/vpx_once.h b/thirdparty/libvpx/vpx_ports/vpx_once.h deleted file mode 100644 index da04db4590..0000000000 --- a/thirdparty/libvpx/vpx_ports/vpx_once.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2015 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_PORTS_VPX_ONCE_H_ -#define VPX_PORTS_VPX_ONCE_H_ - -#include "vpx_config.h" - -/* Implement a function wrapper to guarantee initialization - * thread-safety for library singletons. - * - * NOTE: These functions use static locks, and can only be - * used with one common argument per compilation unit. So - * - * file1.c: - * vpx_once(foo); - * ... - * vpx_once(foo); - * - * file2.c: - * vpx_once(bar); - * - * will ensure foo() and bar() are each called only once, but in - * - * file1.c: - * vpx_once(foo); - * vpx_once(bar): - * - * bar() will never be called because the lock is used up - * by the call to foo(). - */ - -#if CONFIG_MULTITHREAD && defined(_WIN32) -#include <windows.h> -#include <stdlib.h> -/* Declare a per-compilation-unit state variable to track the progress - * of calling func() only once. This must be at global scope because - * local initializers are not thread-safe in MSVC prior to Visual - * Studio 2015. - * - * As a static, once_state will be zero-initialized as program start. - */ -static LONG once_state; -static void once(void (*func)(void)) -{ - /* Try to advance once_state from its initial value of 0 to 1. - * Only one thread can succeed in doing so. - */ - if (InterlockedCompareExchange(&once_state, 1, 0) == 0) { - /* We're the winning thread, having set once_state to 1. - * Call our function. */ - func(); - /* Now advance once_state to 2, unblocking any other threads. */ - InterlockedIncrement(&once_state); - return; - } - - /* We weren't the winning thread, but we want to block on - * the state variable so we don't return before func() - * has finished executing elsewhere. - * - * Try to advance once_state from 2 to 2, which is only possible - * after the winning thead advances it from 1 to 2. - */ - while (InterlockedCompareExchange(&once_state, 2, 2) != 2) { - /* State isn't yet 2. Try again. - * - * We are used for singleton initialization functions, - * which should complete quickly. Contention will likewise - * be rare, so it's worthwhile to use a simple but cpu- - * intensive busy-wait instead of successive backoff, - * waiting on a kernel object, or another heavier-weight scheme. - * - * We can at least yield our timeslice. - */ - Sleep(0); - } - - /* We've seen once_state advance to 2, so we know func() - * has been called. And we've left once_state as we found it, - * so other threads will have the same experience. - * - * It's safe to return now. - */ - return; -} - - -#elif CONFIG_MULTITHREAD && defined(__OS2__) -#define INCL_DOS -#include <os2.h> -static void once(void (*func)(void)) -{ - static int done; - - /* If the initialization is complete, return early. */ - if(done) - return; - - /* Causes all other threads in the process to block themselves - * and give up their time slice. - */ - DosEnterCritSec(); - - if (!done) - { - func(); - done = 1; - } - - /* Restores normal thread dispatching for the current process. */ - DosExitCritSec(); -} - - -#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H -#include <pthread.h> -static void once(void (*func)(void)) -{ - static pthread_once_t lock = PTHREAD_ONCE_INIT; - pthread_once(&lock, func); -} - - -#else -/* No-op version that performs no synchronization. *_rtcd() is idempotent, - * so as long as your platform provides atomic loads/stores of pointers - * no synchronization is strictly necessary. - */ - -static void once(void (*func)(void)) -{ - static int done; - - if(!done) - { - func(); - done = 1; - } -} -#endif - -#endif // VPX_PORTS_VPX_ONCE_H_ diff --git a/thirdparty/libvpx/vpx_ports/vpx_timer.h b/thirdparty/libvpx/vpx_ports/vpx_timer.h deleted file mode 100644 index dd98e291c2..0000000000 --- a/thirdparty/libvpx/vpx_ports/vpx_timer.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_VPX_TIMER_H_ -#define VPX_PORTS_VPX_TIMER_H_ - -#include "./vpx_config.h" - -#include "vpx/vpx_integer.h" - -#if CONFIG_OS_SUPPORT - -#if defined(_WIN32) -/* - * Win32 specific includes - */ -#ifndef WIN32_LEAN_AND_MEAN -#define WIN32_LEAN_AND_MEAN -#endif -#include <windows.h> -#else -/* - * POSIX specific includes - */ -#include <sys/time.h> - -/* timersub is not provided by msys at this time. */ -#ifndef timersub -#define timersub(a, b, result) \ - do { \ - (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((result)->tv_usec < 0) { \ - --(result)->tv_sec; \ - (result)->tv_usec += 1000000; \ - } \ - } while (0) -#endif -#endif - - -struct vpx_usec_timer { -#if defined(_WIN32) - LARGE_INTEGER begin, end; -#else - struct timeval begin, end; -#endif -}; - - -static INLINE void -vpx_usec_timer_start(struct vpx_usec_timer *t) { -#if defined(_WIN32) - QueryPerformanceCounter(&t->begin); -#else - gettimeofday(&t->begin, NULL); -#endif -} - - -static INLINE void -vpx_usec_timer_mark(struct vpx_usec_timer *t) { -#if defined(_WIN32) - QueryPerformanceCounter(&t->end); -#else - gettimeofday(&t->end, NULL); -#endif -} - - -static INLINE int64_t -vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { -#if defined(_WIN32) - LARGE_INTEGER freq, diff; - - diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; - - QueryPerformanceFrequency(&freq); - return diff.QuadPart * 1000000 / freq.QuadPart; -#else - struct timeval diff; - - timersub(&t->end, &t->begin, &diff); - return diff.tv_sec * 1000000 + diff.tv_usec; -#endif -} - -#else /* CONFIG_OS_SUPPORT = 0*/ - -/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */ -#ifndef timersub -#define timersub(a, b, result) -#endif - -struct vpx_usec_timer { - void *dummy; -}; - -static INLINE void -vpx_usec_timer_start(struct vpx_usec_timer *t) { } - -static INLINE void -vpx_usec_timer_mark(struct vpx_usec_timer *t) { } - -static INLINE int -vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { - return 0; -} - -#endif /* CONFIG_OS_SUPPORT */ - -#endif // VPX_PORTS_VPX_TIMER_H_ diff --git a/thirdparty/libvpx/vpx_ports/x86.h b/thirdparty/libvpx/vpx_ports/x86.h deleted file mode 100644 index bae25ac345..0000000000 --- a/thirdparty/libvpx/vpx_ports/x86.h +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_PORTS_X86_H_ -#define VPX_PORTS_X86_H_ -#include <stdlib.h> - -#if defined(_MSC_VER) -#include <intrin.h> /* For __cpuidex, __rdtsc */ -#endif - -#include "vpx_config.h" -#include "vpx/vpx_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - VPX_CPU_UNKNOWN = -1, - VPX_CPU_AMD, - VPX_CPU_AMD_OLD, - VPX_CPU_CENTAUR, - VPX_CPU_CYRIX, - VPX_CPU_INTEL, - VPX_CPU_NEXGEN, - VPX_CPU_NSC, - VPX_CPU_RISE, - VPX_CPU_SIS, - VPX_CPU_TRANSMETA, - VPX_CPU_TRANSMETA_OLD, - VPX_CPU_UMC, - VPX_CPU_VIA, - - VPX_CPU_LAST -} vpx_cpu_t; - -#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) -#if ARCH_X86_64 -#define cpuid(func, func2, ax, bx, cx, dx)\ - __asm__ __volatile__ (\ - "cpuid \n\t" \ - : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#else -#define cpuid(func, func2, ax, bx, cx, dx)\ - __asm__ __volatile__ (\ - "mov %%ebx, %%edi \n\t" \ - "cpuid \n\t" \ - "xchg %%edi, %%ebx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#endif -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ -#if ARCH_X86_64 -#define cpuid(func, func2, ax, bx, cx, dx)\ - asm volatile (\ - "xchg %rsi, %rbx \n\t" \ - "cpuid \n\t" \ - "movl %ebx, %edi \n\t" \ - "xchg %rsi, %rbx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#else -#define cpuid(func, func2, ax, bx, cx, dx)\ - asm volatile (\ - "pushl %ebx \n\t" \ - "cpuid \n\t" \ - "movl %ebx, %edi \n\t" \ - "popl %ebx \n\t" \ - : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func), "c" (func2)); -#endif -#else /* end __SUNPRO__ */ -#if ARCH_X86_64 -#if defined(_MSC_VER) && _MSC_VER > 1500 -#define cpuid(func, func2, a, b, c, d) do {\ - int regs[4];\ - __cpuidex(regs, func, func2); \ - a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ - } while(0) -#else -#define cpuid(func, func2, a, b, c, d) do {\ - int regs[4];\ - __cpuid(regs, func); \ - a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ - } while (0) -#endif -#else -#define cpuid(func, func2, a, b, c, d)\ - __asm mov eax, func\ - __asm mov ecx, func2\ - __asm cpuid\ - __asm mov a, eax\ - __asm mov b, ebx\ - __asm mov c, ecx\ - __asm mov d, edx -#endif -#endif /* end others */ - -// NaCl has no support for xgetbv or the raw opcode. -#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) -static INLINE uint64_t xgetbv(void) { - const uint32_t ecx = 0; - uint32_t eax, edx; - // Use the raw opcode for xgetbv for compatibility with older toolchains. - __asm__ volatile ( - ".byte 0x0f, 0x01, 0xd0\n" - : "=a"(eax), "=d"(edx) : "c" (ecx)); - return ((uint64_t)edx << 32) | eax; -} -#elif (defined(_M_X64) || defined(_M_IX86)) && \ - defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 -#include <immintrin.h> -#define xgetbv() _xgetbv(0) -#elif defined(_MSC_VER) && defined(_M_IX86) -static INLINE uint64_t xgetbv(void) { - uint32_t eax_, edx_; - __asm { - xor ecx, ecx // ecx = 0 - // Use the raw opcode for xgetbv for compatibility with older toolchains. - __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 - mov eax_, eax - mov edx_, edx - } - return ((uint64_t)edx_ << 32) | eax_; -} -#else -#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. -#endif - -#if defined(_MSC_VER) && _MSC_VER >= 1700 -#include <windows.h> -#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) -#define getenv(x) NULL -#endif -#endif - -#define HAS_MMX 0x01 -#define HAS_SSE 0x02 -#define HAS_SSE2 0x04 -#define HAS_SSE3 0x08 -#define HAS_SSSE3 0x10 -#define HAS_SSE4_1 0x20 -#define HAS_AVX 0x40 -#define HAS_AVX2 0x80 -#ifndef BIT -#define BIT(n) (1<<n) -#endif - -static INLINE int -x86_simd_caps(void) { - unsigned int flags = 0; - unsigned int mask = ~0; - unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; - char *env; - (void)reg_ebx; - - /* See if the CPU capabilities are being overridden by the environment */ - env = getenv("VPX_SIMD_CAPS"); - - if (env && *env) - return (int)strtol(env, NULL, 0); - - env = getenv("VPX_SIMD_CAPS_MASK"); - - if (env && *env) - mask = (unsigned int)strtoul(env, NULL, 0); - - /* Ensure that the CPUID instruction supports extended features */ - cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); - - if (max_cpuid_val < 1) - return 0; - - /* Get the standard feature flags */ - cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); - - if (reg_edx & BIT(23)) flags |= HAS_MMX; - - if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ - - if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ - - if (reg_ecx & BIT(0)) flags |= HAS_SSE3; - - if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; - - if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; - - // bits 27 (OSXSAVE) & 28 (256-bit AVX) - if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { - if ((xgetbv() & 0x6) == 0x6) { - flags |= HAS_AVX; - - if (max_cpuid_val >= 7) { - /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ - cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); - - if (reg_ebx & BIT(5)) flags |= HAS_AVX2; - } - } - } - - return flags & mask; -} - -// Note: -// 32-bit CPU cycle counter is light-weighted for most function performance -// measurement. For large function (CPU time > a couple of seconds), 64-bit -// counter should be used. -// 32-bit CPU cycle counter -static INLINE unsigned int -x86_readtsc(void) { -#if defined(__GNUC__) && __GNUC__ - unsigned int tsc; - __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); - return tsc; -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - unsigned int tsc; - asm volatile("rdtsc\n\t":"=a"(tsc):); - return tsc; -#else -#if ARCH_X86_64 - return (unsigned int)__rdtsc(); -#else - __asm rdtsc; -#endif -#endif -} -// 64-bit CPU cycle counter -static INLINE uint64_t -x86_readtsc64(void) { -#if defined(__GNUC__) && __GNUC__ - uint32_t hi, lo; - __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); - return ((uint64_t)hi << 32) | lo; -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - uint_t hi, lo; - asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); - return ((uint64_t)hi << 32) | lo; -#else -#if ARCH_X86_64 - return (uint64_t)__rdtsc(); -#else - __asm rdtsc; -#endif -#endif -} - -#if defined(__GNUC__) && __GNUC__ -#define x86_pause_hint()\ - __asm__ __volatile__ ("pause \n\t") -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -#define x86_pause_hint()\ - asm volatile ("pause \n\t") -#else -#if ARCH_X86_64 -#define x86_pause_hint()\ - _mm_pause(); -#else -#define x86_pause_hint()\ - __asm pause -#endif -#endif - -#if defined(__GNUC__) && __GNUC__ -static void -x87_set_control_word(unsigned short mode) { - __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); -} -static unsigned short -x87_get_control_word(void) { - unsigned short mode; - __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); - return mode; -} -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -static void -x87_set_control_word(unsigned short mode) { - asm volatile("fldcw %0" : : "m"(*&mode)); -} -static unsigned short -x87_get_control_word(void) { - unsigned short mode; - asm volatile("fstcw %0\n\t":"=m"(*&mode):); - return mode; -} -#elif ARCH_X86_64 -/* No fldcw intrinsics on Windows x64, punt to external asm */ -extern void vpx_winx64_fldcw(unsigned short mode); -extern unsigned short vpx_winx64_fstcw(void); -#define x87_set_control_word vpx_winx64_fldcw -#define x87_get_control_word vpx_winx64_fstcw -#else -static void -x87_set_control_word(unsigned short mode) { - __asm { fldcw mode } -} -static unsigned short -x87_get_control_word(void) { - unsigned short mode; - __asm { fstcw mode } - return mode; -} -#endif - -static INLINE unsigned int -x87_set_double_precision(void) { - unsigned int mode = x87_get_control_word(); - x87_set_control_word((mode&~0x300) | 0x200); - return mode; -} - - -extern void vpx_reset_mmx_state(void); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_PORTS_X86_H_ diff --git a/thirdparty/libvpx/vpx_ports/x86_abi_support.asm b/thirdparty/libvpx/vpx_ports/x86_abi_support.asm deleted file mode 100644 index 708fa101c5..0000000000 --- a/thirdparty/libvpx/vpx_ports/x86_abi_support.asm +++ /dev/null @@ -1,404 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_config.asm" - -; 32/64 bit compatibility macros -; -; In general, we make the source use 64 bit syntax, then twiddle with it using -; the preprocessor to get the 32 bit syntax on 32 bit platforms. -; -%ifidn __OUTPUT_FORMAT__,elf32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,macho32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,win32 -%define ABI_IS_32BIT 1 -%elifidn __OUTPUT_FORMAT__,aout -%define ABI_IS_32BIT 1 -%else -%define ABI_IS_32BIT 0 -%endif - -%if ABI_IS_32BIT -%define rax eax -%define rbx ebx -%define rcx ecx -%define rdx edx -%define rsi esi -%define rdi edi -%define rsp esp -%define rbp ebp -%define movsxd mov -%macro movq 2 - %ifidn %1,eax - movd %1,%2 - %elifidn %2,eax - movd %1,%2 - %elifidn %1,ebx - movd %1,%2 - %elifidn %2,ebx - movd %1,%2 - %elifidn %1,ecx - movd %1,%2 - %elifidn %2,ecx - movd %1,%2 - %elifidn %1,edx - movd %1,%2 - %elifidn %2,edx - movd %1,%2 - %elifidn %1,esi - movd %1,%2 - %elifidn %2,esi - movd %1,%2 - %elifidn %1,edi - movd %1,%2 - %elifidn %2,edi - movd %1,%2 - %elifidn %1,esp - movd %1,%2 - %elifidn %2,esp - movd %1,%2 - %elifidn %1,ebp - movd %1,%2 - %elifidn %2,ebp - movd %1,%2 - %else - movq %1,%2 - %endif -%endmacro -%endif - - -; LIBVPX_YASM_WIN64 -; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 -; or win64 is defined on the Yasm command line. -%ifidn __OUTPUT_FORMAT__,win64 -%define LIBVPX_YASM_WIN64 1 -%elifidn __OUTPUT_FORMAT__,x64 -%define LIBVPX_YASM_WIN64 1 -%else -%define LIBVPX_YASM_WIN64 0 -%endif - -; sym() -; Return the proper symbol name for the target ABI. -; -; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols -; with C linkage be prefixed with an underscore. -; -%ifidn __OUTPUT_FORMAT__,elf32 -%define sym(x) x -%elifidn __OUTPUT_FORMAT__,elf64 -%define sym(x) x -%elifidn __OUTPUT_FORMAT__,elfx32 -%define sym(x) x -%elif LIBVPX_YASM_WIN64 -%define sym(x) x -%else -%define sym(x) _ %+ x -%endif - -; PRIVATE -; Macro for the attribute to hide a global symbol for the target ABI. -; This is only active if CHROMIUM is defined. -; -; Chromium doesn't like exported global symbols due to symbol clashing with -; plugins among other things. -; -; Requires Chromium's patched copy of yasm: -; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 -; http://www.tortall.net/projects/yasm/ticket/236 -; -%ifdef CHROMIUM - %ifidn __OUTPUT_FORMAT__,elf32 - %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,elf64 - %define PRIVATE :hidden - %elifidn __OUTPUT_FORMAT__,elfx32 - %define PRIVATE :hidden - %elif LIBVPX_YASM_WIN64 - %define PRIVATE - %else - %define PRIVATE :private_extern - %endif -%else - %define PRIVATE -%endif - -; arg() -; Return the address specification of the given argument -; -%if ABI_IS_32BIT - %define arg(x) [ebp+8+4*x] -%else - ; 64 bit ABI passes arguments in registers. This is a workaround to get up - ; and running quickly. Relies on SHADOW_ARGS_TO_STACK - %if LIBVPX_YASM_WIN64 - %define arg(x) [rbp+16+8*x] - %else - %define arg(x) [rbp-8-8*x] - %endif -%endif - -; REG_SZ_BYTES, REG_SZ_BITS -; Size of a register -%if ABI_IS_32BIT -%define REG_SZ_BYTES 4 -%define REG_SZ_BITS 32 -%else -%define REG_SZ_BYTES 8 -%define REG_SZ_BITS 64 -%endif - - -; ALIGN_STACK <alignment> <register> -; This macro aligns the stack to the given alignment (in bytes). The stack -; is left such that the previous value of the stack pointer is the first -; argument on the stack (ie, the inverse of this macro is 'pop rsp.') -; This macro uses one temporary register, which is not preserved, and thus -; must be specified as an argument. -%macro ALIGN_STACK 2 - mov %2, rsp - and rsp, -%1 - lea rsp, [rsp - (%1 - REG_SZ_BYTES)] - push %2 -%endmacro - - -; -; The Microsoft assembler tries to impose a certain amount of type safety in -; its register usage. YASM doesn't recognize these directives, so we just -; %define them away to maintain as much compatibility as possible with the -; original inline assembler we're porting from. -; -%idefine PTR -%idefine XMMWORD -%idefine MMWORD - -; PIC macros -; -%if ABI_IS_32BIT - %if CONFIG_PIC=1 - %ifidn __OUTPUT_FORMAT__,elf32 - %define WRT_PLT wrt ..plt - %macro GET_GOT 1 - extern _GLOBAL_OFFSET_TABLE_ - push %1 - call %%get_got - %%sub_offset: - jmp %%exitGG - %%get_got: - mov %1, [esp] - add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc - ret - %%exitGG: - %undef GLOBAL - %define GLOBAL(x) x + %1 wrt ..gotoff - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %elifidn __OUTPUT_FORMAT__,macho32 - %macro GET_GOT 1 - push %1 - call %%get_got - %%get_got: - pop %1 - %undef GLOBAL - %define GLOBAL(x) x + %1 - %%get_got - %undef RESTORE_GOT - %define RESTORE_GOT pop %1 - %endmacro - %endif - %endif - - %ifdef CHROMIUM - %ifidn __OUTPUT_FORMAT__,macho32 - %define HIDDEN_DATA(x) x:private_extern - %else - %define HIDDEN_DATA(x) x - %endif - %else - %define HIDDEN_DATA(x) x - %endif -%else - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) rel x - %ifidn __OUTPUT_FORMAT__,elf64 - %define WRT_PLT wrt ..plt - %define HIDDEN_DATA(x) x:data hidden - %elifidn __OUTPUT_FORMAT__,elfx32 - %define WRT_PLT wrt ..plt - %define HIDDEN_DATA(x) x:data hidden - %elifidn __OUTPUT_FORMAT__,macho64 - %ifdef CHROMIUM - %define HIDDEN_DATA(x) x:private_extern - %else - %define HIDDEN_DATA(x) x - %endif - %else - %define HIDDEN_DATA(x) x - %endif -%endif -%ifnmacro GET_GOT - %macro GET_GOT 1 - %endmacro - %define GLOBAL(x) x -%endif -%ifndef RESTORE_GOT -%define RESTORE_GOT -%endif -%ifndef WRT_PLT -%define WRT_PLT -%endif - -%if ABI_IS_32BIT - %macro SHADOW_ARGS_TO_STACK 1 - %endm - %define UNSHADOW_ARGS -%else -%if LIBVPX_YASM_WIN64 - %macro SHADOW_ARGS_TO_STACK 1 ; argc - %if %1 > 0 - mov arg(0),rcx - %endif - %if %1 > 1 - mov arg(1),rdx - %endif - %if %1 > 2 - mov arg(2),r8 - %endif - %if %1 > 3 - mov arg(3),r9 - %endif - %endm -%else - %macro SHADOW_ARGS_TO_STACK 1 ; argc - %if %1 > 0 - push rdi - %endif - %if %1 > 1 - push rsi - %endif - %if %1 > 2 - push rdx - %endif - %if %1 > 3 - push rcx - %endif - %if %1 > 4 - push r8 - %endif - %if %1 > 5 - push r9 - %endif - %if %1 > 6 - %assign i %1-6 - %assign off 16 - %rep i - mov rax,[rbp+off] - push rax - %assign off off+8 - %endrep - %endif - %endm -%endif - %define UNSHADOW_ARGS mov rsp, rbp -%endif - -; Win64 ABI requires that XMM6:XMM15 are callee saved -; SAVE_XMM n, [u] -; store registers 6-n on the stack -; if u is specified, use unaligned movs. -; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return -; value. Typically we follow this up with 'push rbp' - re-aligning the stack - -; but in some cases this is not done and unaligned movs must be used. -%if LIBVPX_YASM_WIN64 -%macro SAVE_XMM 1-2 a - %if %1 < 6 - %error Only xmm registers 6-15 must be preserved - %else - %assign last_xmm %1 - %define movxmm movdq %+ %2 - %assign xmm_stack_space ((last_xmm - 5) * 16) - sub rsp, xmm_stack_space - %assign i 6 - %rep (last_xmm - 5) - movxmm [rsp + ((i - 6) * 16)], xmm %+ i - %assign i i+1 - %endrep - %endif -%endmacro -%macro RESTORE_XMM 0 - %ifndef last_xmm - %error RESTORE_XMM must be paired with SAVE_XMM n - %else - %assign i last_xmm - %rep (last_xmm - 5) - movxmm xmm %+ i, [rsp +((i - 6) * 16)] - %assign i i-1 - %endrep - add rsp, xmm_stack_space - ; there are a couple functions which return from multiple places. - ; otherwise, we could uncomment these: - ; %undef last_xmm - ; %undef xmm_stack_space - ; %undef movxmm - %endif -%endmacro -%else -%macro SAVE_XMM 1-2 -%endmacro -%macro RESTORE_XMM 0 -%endmacro -%endif - -; Name of the rodata section -; -; .rodata seems to be an elf-ism, as it doesn't work on OSX. -; -%ifidn __OUTPUT_FORMAT__,macho64 -%define SECTION_RODATA section .text -%elifidn __OUTPUT_FORMAT__,macho32 -%macro SECTION_RODATA 0 -section .text -%endmacro -%elifidn __OUTPUT_FORMAT__,aout -%define SECTION_RODATA section .data -%else -%define SECTION_RODATA section .rodata -%endif - - -; Tell GNU ld that we don't require an executable stack. -%ifidn __OUTPUT_FORMAT__,elf32 -section .note.GNU-stack noalloc noexec nowrite progbits -section .text -%elifidn __OUTPUT_FORMAT__,elf64 -section .note.GNU-stack noalloc noexec nowrite progbits -section .text -%elifidn __OUTPUT_FORMAT__,elfx32 -section .note.GNU-stack noalloc noexec nowrite progbits -section .text -%endif - -; On Android platforms use lrand48 when building postproc routines. Prior to L -; rand() was not available. -%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1 -%ifdef __ANDROID__ -extern sym(lrand48) -%define LIBVPX_RAND lrand48 -%else -extern sym(rand) -%define LIBVPX_RAND rand -%endif -%endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC diff --git a/thirdparty/libvpx/vpx_scale/generic/yv12config.c b/thirdparty/libvpx/vpx_scale/generic/yv12config.c deleted file mode 100644 index 6bbb6d8d48..0000000000 --- a/thirdparty/libvpx/vpx_scale/generic/yv12config.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> - -#include "vpx_scale/yv12config.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -/**************************************************************************** -* Exports -****************************************************************************/ - -/**************************************************************************** - * - ****************************************************************************/ -#define yv12_align_addr(addr, align) \ - (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align)) - -int -vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { - if (ybf) { - // If libvpx is using frame buffer callbacks then buffer_alloc_sz must - // not be set. - if (ybf->buffer_alloc_sz > 0) { - vpx_free(ybf->buffer_alloc); - } - - /* buffer_alloc isn't accessed by most functions. Rather y_buffer, - u_buffer and v_buffer point to buffer_alloc and are used. Clear out - all of this so that a freed pointer isn't inadvertently used */ - memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); - } else { - return -1; - } - - return 0; -} - -int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border) { - if (ybf) { - int aligned_width = (width + 15) & ~15; - int aligned_height = (height + 15) & ~15; - int y_stride = ((aligned_width + 2 * border) + 31) & ~31; - int yplane_size = (aligned_height + 2 * border) * y_stride; - int uv_width = aligned_width >> 1; - int uv_height = aligned_height >> 1; - /** There is currently a bunch of code which assumes - * uv_stride == y_stride/2, so enforce this here. */ - int uv_stride = y_stride >> 1; - int uvplane_size = (uv_height + border) * uv_stride; - const int frame_size = yplane_size + 2 * uvplane_size; - - if (!ybf->buffer_alloc) { - ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); - ybf->buffer_alloc_sz = frame_size; - } - - if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) - return -1; - - /* Only support allocating buffers that have a border that's a multiple - * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without introducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if (border & 0x1f) - return -3; - - ybf->y_crop_width = width; - ybf->y_crop_height = height; - ybf->y_width = aligned_width; - ybf->y_height = aligned_height; - ybf->y_stride = y_stride; - - ybf->uv_crop_width = (width + 1) / 2; - ybf->uv_crop_height = (height + 1) / 2; - ybf->uv_width = uv_width; - ybf->uv_height = uv_height; - ybf->uv_stride = uv_stride; - - ybf->alpha_width = 0; - ybf->alpha_height = 0; - ybf->alpha_stride = 0; - - ybf->border = border; - ybf->frame_size = frame_size; - - ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; - ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; - ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; - ybf->alpha_buffer = NULL; - - ybf->corrupted = 0; /* assume not currupted by errors */ - return 0; - } - return -2; -} - -int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border) { - if (ybf) { - vp8_yv12_de_alloc_frame_buffer(ybf); - return vp8_yv12_realloc_frame_buffer(ybf, width, height, border); - } - return -2; -} - -#if CONFIG_VP9 -// TODO(jkoleszar): Maybe replace this with struct vpx_image - -int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { - if (ybf) { - if (ybf->buffer_alloc_sz > 0) { - vpx_free(ybf->buffer_alloc); - } - - /* buffer_alloc isn't accessed by most functions. Rather y_buffer, - u_buffer and v_buffer point to buffer_alloc and are used. Clear out - all of this so that a freed pointer isn't inadvertently used */ - memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); - } else { - return -1; - } - - return 0; -} - -int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, - int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, - int byte_alignment, - vpx_codec_frame_buffer_t *fb, - vpx_get_frame_buffer_cb_fn_t cb, - void *cb_priv) { - if (ybf) { - const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; - const int aligned_width = (width + 7) & ~7; - const int aligned_height = (height + 7) & ~7; - const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; - const uint64_t yplane_size = (aligned_height + 2 * border) * - (uint64_t)y_stride + byte_alignment; - const int uv_width = aligned_width >> ss_x; - const int uv_height = aligned_height >> ss_y; - const int uv_stride = y_stride >> ss_x; - const int uv_border_w = border >> ss_x; - const int uv_border_h = border >> ss_y; - const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * - (uint64_t)uv_stride + byte_alignment; - -#if CONFIG_VP9_HIGHBITDEPTH - const uint64_t frame_size = - (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); -#else - const uint64_t frame_size = yplane_size + 2 * uvplane_size; -#endif // CONFIG_VP9_HIGHBITDEPTH - - uint8_t *buf = NULL; - - if (cb != NULL) { - const int align_addr_extra_size = 31; - const uint64_t external_frame_size = frame_size + align_addr_extra_size; - - assert(fb != NULL); - - if (external_frame_size != (size_t)external_frame_size) - return -1; - - // Allocation to hold larger frame, or first allocation. - if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) - return -1; - - if (fb->data == NULL || fb->size < external_frame_size) - return -1; - - ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); - -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) - // This memset is needed for fixing the issue of using uninitialized - // value in msan test. It will cause a perf loss, so only do this for - // msan test. - memset(ybf->buffer_alloc, 0, (int)frame_size); -#endif -#endif - } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { - // Allocation to hold larger frame, or first allocation. - vpx_free(ybf->buffer_alloc); - ybf->buffer_alloc = NULL; - - if (frame_size != (size_t)frame_size) - return -1; - - ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size); - if (!ybf->buffer_alloc) - return -1; - - ybf->buffer_alloc_sz = (int)frame_size; - - // This memset is needed for fixing valgrind error from C loop filter - // due to access uninitialized memory in frame border. It could be - // removed if border is totally removed. - memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); - } - - /* Only support allocating buffers that have a border that's a multiple - * of 32. The border restriction is required to get 16-byte alignment of - * the start of the chroma rows without introducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if (border & 0x1f) - return -3; - - ybf->y_crop_width = width; - ybf->y_crop_height = height; - ybf->y_width = aligned_width; - ybf->y_height = aligned_height; - ybf->y_stride = y_stride; - - ybf->uv_crop_width = (width + ss_x) >> ss_x; - ybf->uv_crop_height = (height + ss_y) >> ss_y; - ybf->uv_width = uv_width; - ybf->uv_height = uv_height; - ybf->uv_stride = uv_stride; - - ybf->border = border; - ybf->frame_size = (int)frame_size; - ybf->subsampling_x = ss_x; - ybf->subsampling_y = ss_y; - - buf = ybf->buffer_alloc; -#if CONFIG_VP9_HIGHBITDEPTH - if (use_highbitdepth) { - // Store uint16 addresses when using 16bit framebuffers - buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc); - ybf->flags = YV12_FLAG_HIGHBITDEPTH; - } else { - ybf->flags = 0; - } -#endif // CONFIG_VP9_HIGHBITDEPTH - - ybf->y_buffer = (uint8_t *)yv12_align_addr( - buf + (border * y_stride) + border, vp9_byte_align); - ybf->u_buffer = (uint8_t *)yv12_align_addr( - buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w, - vp9_byte_align); - ybf->v_buffer = (uint8_t *)yv12_align_addr( - buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + - uv_border_w, vp9_byte_align); - - ybf->corrupted = 0; /* assume not corrupted by errors */ - return 0; - } - return -2; -} - -int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, - int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, - int byte_alignment) { - if (ybf) { - vpx_free_frame_buffer(ybf); - return vpx_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border, byte_alignment, NULL, NULL, NULL); - } - return -2; -} -#endif diff --git a/thirdparty/libvpx/vpx_scale/generic/yv12extend.c b/thirdparty/libvpx/vpx_scale/generic/yv12extend.c deleted file mode 100644 index 52f0aff1f2..0000000000 --- a/thirdparty/libvpx/vpx_scale/generic/yv12extend.c +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> -#include "./vpx_config.h" -#include "./vpx_scale_rtcd.h" -#include "vpx/vpx_integer.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" -#include "vpx_scale/yv12config.h" -#if CONFIG_VP9_HIGHBITDEPTH -#include "vp9/common/vp9_common.h" -#endif - -static void extend_plane(uint8_t *const src, int src_stride, - int width, int height, - int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i; - const int linesize = extend_left + extend_right + width; - - /* copy the left and right most columns out */ - uint8_t *src_ptr1 = src; - uint8_t *src_ptr2 = src + width - 1; - uint8_t *dst_ptr1 = src - extend_left; - uint8_t *dst_ptr2 = src + width; - - for (i = 0; i < height; ++i) { - memset(dst_ptr1, src_ptr1[0], extend_left); - memset(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_stride; - src_ptr2 += src_stride; - dst_ptr1 += src_stride; - dst_ptr2 += src_stride; - } - - /* Now copy the top and bottom lines into each line of the respective - * borders - */ - src_ptr1 = src - extend_left; - src_ptr2 = src + src_stride * (height - 1) - extend_left; - dst_ptr1 = src + src_stride * -extend_top - extend_left; - dst_ptr2 = src + src_stride * height - extend_left; - - for (i = 0; i < extend_top; ++i) { - memcpy(dst_ptr1, src_ptr1, linesize); - dst_ptr1 += src_stride; - } - - for (i = 0; i < extend_bottom; ++i) { - memcpy(dst_ptr2, src_ptr2, linesize); - dst_ptr2 += src_stride; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void extend_plane_high(uint8_t *const src8, int src_stride, - int width, int height, - int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i; - const int linesize = extend_left + extend_right + width; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - - /* copy the left and right most columns out */ - uint16_t *src_ptr1 = src; - uint16_t *src_ptr2 = src + width - 1; - uint16_t *dst_ptr1 = src - extend_left; - uint16_t *dst_ptr2 = src + width; - - for (i = 0; i < height; ++i) { - vpx_memset16(dst_ptr1, src_ptr1[0], extend_left); - vpx_memset16(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_stride; - src_ptr2 += src_stride; - dst_ptr1 += src_stride; - dst_ptr2 += src_stride; - } - - /* Now copy the top and bottom lines into each line of the respective - * borders - */ - src_ptr1 = src - extend_left; - src_ptr2 = src + src_stride * (height - 1) - extend_left; - dst_ptr1 = src + src_stride * -extend_top - extend_left; - dst_ptr2 = src + src_stride * height - extend_left; - - for (i = 0; i < extend_top; ++i) { - memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); - dst_ptr1 += src_stride; - } - - for (i = 0; i < extend_bottom; ++i) { - memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); - dst_ptr2 += src_stride; - } -} -#endif - -void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { - const int uv_border = ybf->border / 2; - - assert(ybf->border % 2 == 0); - assert(ybf->y_height - ybf->y_crop_height < 16); - assert(ybf->y_width - ybf->y_crop_width < 16); - assert(ybf->y_height - ybf->y_crop_height >= 0); - assert(ybf->y_width - ybf->y_crop_width >= 0); - -#if CONFIG_VP9_HIGHBITDEPTH - if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { - extend_plane_high( - ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ybf->border, ybf->border, - ybf->border + ybf->y_height - ybf->y_crop_height, - ybf->border + ybf->y_width - ybf->y_crop_width); - - extend_plane_high( - ybf->u_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); - - extend_plane_high( - ybf->v_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); - return; - } -#endif - extend_plane(ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ybf->border, ybf->border, - ybf->border + ybf->y_height - ybf->y_crop_height, - ybf->border + ybf->y_width - ybf->y_crop_width); - - extend_plane(ybf->u_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); - - extend_plane(ybf->v_buffer, ybf->uv_stride, - ybf->uv_crop_width, ybf->uv_crop_height, - uv_border, uv_border, - uv_border + ybf->uv_height - ybf->uv_crop_height, - uv_border + ybf->uv_width - ybf->uv_crop_width); -} - -#if CONFIG_VP9 -static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) { - const int c_w = ybf->uv_crop_width; - const int c_h = ybf->uv_crop_height; - const int ss_x = ybf->uv_width < ybf->y_width; - const int ss_y = ybf->uv_height < ybf->y_height; - const int c_et = ext_size >> ss_y; - const int c_el = ext_size >> ss_x; - const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height; - const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width; - - assert(ybf->y_height - ybf->y_crop_height < 16); - assert(ybf->y_width - ybf->y_crop_width < 16); - assert(ybf->y_height - ybf->y_crop_height >= 0); - assert(ybf->y_width - ybf->y_crop_width >= 0); - -#if CONFIG_VP9_HIGHBITDEPTH - if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { - extend_plane_high(ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ext_size, ext_size, - ext_size + ybf->y_height - ybf->y_crop_height, - ext_size + ybf->y_width - ybf->y_crop_width); - extend_plane_high(ybf->u_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); - extend_plane_high(ybf->v_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); - return; - } -#endif - extend_plane(ybf->y_buffer, ybf->y_stride, - ybf->y_crop_width, ybf->y_crop_height, - ext_size, ext_size, - ext_size + ybf->y_height - ybf->y_crop_height, - ext_size + ybf->y_width - ybf->y_crop_width); - - extend_plane(ybf->u_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); - - extend_plane(ybf->v_buffer, ybf->uv_stride, - c_w, c_h, c_et, c_el, c_eb, c_er); -} - -void vpx_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { - extend_frame(ybf, ybf->border); -} - -void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) { - const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? - VP9INNERBORDERINPIXELS : ybf->border; - extend_frame(ybf, inner_bw); -} - -#if CONFIG_VP9_HIGHBITDEPTH -static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - memcpy(dst, src, num * sizeof(uint16_t)); -} -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_VP9 - -// Copies the source image into the destination image and updates the -// destination's UMV borders. -// Note: The frames are assumed to be identical in size. -void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc) { - int row; - const uint8_t *src = src_ybc->y_buffer; - uint8_t *dst = dst_ybc->y_buffer; - -#if 0 - /* These assertions are valid in the codec, but the libvpx-tester uses - * this code slightly differently. - */ - assert(src_ybc->y_width == dst_ybc->y_width); - assert(src_ybc->y_height == dst_ybc->y_height); -#endif - -#if CONFIG_VP9_HIGHBITDEPTH - if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { - assert(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH); - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy_short_addr(dst, src, src_ybc->y_width); - src += src_ybc->y_stride; - dst += dst_ybc->y_stride; - } - - src = src_ybc->u_buffer; - dst = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy_short_addr(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - src = src_ybc->v_buffer; - dst = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy_short_addr(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders_c(dst_ybc); - return; - } else { - assert(!(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH)); - } -#endif - - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy(dst, src, src_ybc->y_width); - src += src_ybc->y_stride; - dst += dst_ybc->y_stride; - } - - src = src_ybc->u_buffer; - dst = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - src = src_ybc->v_buffer; - dst = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; ++row) { - memcpy(dst, src, src_ybc->uv_width); - src += src_ybc->uv_stride; - dst += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders_c(dst_ybc); -} - -void vpx_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, - YV12_BUFFER_CONFIG *dst_ybc) { - int row; - const uint8_t *src = src_ybc->y_buffer; - uint8_t *dst = dst_ybc->y_buffer; - -#if CONFIG_VP9_HIGHBITDEPTH - if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); - uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t)); - src16 += src_ybc->y_stride; - dst16 += dst_ybc->y_stride; - } - return; - } -#endif - - for (row = 0; row < src_ybc->y_height; ++row) { - memcpy(dst, src, src_ybc->y_width); - src += src_ybc->y_stride; - dst += dst_ybc->y_stride; - } -} diff --git a/thirdparty/libvpx/vpx_scale/vpx_scale.h b/thirdparty/libvpx/vpx_scale/vpx_scale.h deleted file mode 100644 index 43fcf9d66e..0000000000 --- a/thirdparty/libvpx/vpx_scale/vpx_scale.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VPX_SCALE_VPX_SCALE_H_ -#define VPX_SCALE_VPX_SCALE_H_ - -#include "vpx_scale/yv12config.h" - -extern void vpx_scale_frame(YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced); - -#endif // VPX_SCALE_VPX_SCALE_H_ diff --git a/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c b/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c deleted file mode 100644 index bea603fd10..0000000000 --- a/thirdparty/libvpx/vpx_scale/vpx_scale_rtcd.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2011 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "./vpx_config.h" -#define RTCD_C -#include "./vpx_scale_rtcd.h" -#include "vpx_ports/vpx_once.h" - -void vpx_scale_rtcd() -{ - once(setup_rtcd_internal); -} diff --git a/thirdparty/libvpx/vpx_scale/yv12config.h b/thirdparty/libvpx/vpx_scale/yv12config.h deleted file mode 100644 index 37b255d4d3..0000000000 --- a/thirdparty/libvpx/vpx_scale/yv12config.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VPX_SCALE_YV12CONFIG_H_ -#define VPX_SCALE_YV12CONFIG_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "./vpx_config.h" -#include "vpx/vpx_codec.h" -#include "vpx/vpx_frame_buffer.h" -#include "vpx/vpx_integer.h" - -#define VP8BORDERINPIXELS 32 -#define VP9INNERBORDERINPIXELS 96 -#define VP9_INTERP_EXTEND 4 -#define VP9_ENC_BORDER_IN_PIXELS 160 -#define VP9_DEC_BORDER_IN_PIXELS 32 - -typedef struct yv12_buffer_config { - int y_width; - int y_height; - int y_crop_width; - int y_crop_height; - int y_stride; - - int uv_width; - int uv_height; - int uv_crop_width; - int uv_crop_height; - int uv_stride; - - int alpha_width; - int alpha_height; - int alpha_stride; - - uint8_t *y_buffer; - uint8_t *u_buffer; - uint8_t *v_buffer; - uint8_t *alpha_buffer; - - uint8_t *buffer_alloc; - int buffer_alloc_sz; - int border; - int frame_size; - int subsampling_x; - int subsampling_y; - unsigned int bit_depth; - vpx_color_space_t color_space; - vpx_color_range_t color_range; - int render_width; - int render_height; - - int corrupted; - int flags; -} YV12_BUFFER_CONFIG; - -#define YV12_FLAG_HIGHBITDEPTH 8 - -int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border); -int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int border); -int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); - -int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, int byte_alignment); - -// Updates the yv12 buffer config with the frame buffer. |byte_alignment| must -// be a power of 2, from 32 to 1024. 0 sets legacy alignment. If cb is not -// NULL, then libvpx is using the frame buffer callbacks to handle memory. -// If cb is not NULL, libvpx will call cb with minimum size in bytes needed -// to decode the current frame. If cb is NULL, libvpx will allocate memory -// internally to decode the current frame. Returns 0 on success. Returns < 0 -// on failure. -int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, - int width, int height, int ss_x, int ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - int use_highbitdepth, -#endif - int border, - int byte_alignment, - vpx_codec_frame_buffer_t *fb, - vpx_get_frame_buffer_cb_fn_t cb, - void *cb_priv); -int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf); - -#ifdef __cplusplus -} -#endif - -#endif // VPX_SCALE_YV12CONFIG_H_ diff --git a/thirdparty/libvpx/vpx_scale_rtcd.h b/thirdparty/libvpx/vpx_scale_rtcd.h deleted file mode 100644 index 4d59467fe9..0000000000 --- a/thirdparty/libvpx/vpx_scale_rtcd.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef VPX_SCALE_RTCD_H_ -#define VPX_SCALE_RTCD_H_ - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -struct yv12_buffer_config; - -#ifdef __cplusplus -extern "C" { -#endif - -void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c - -void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf); -#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c - -void vpx_extend_frame_borders_c(struct yv12_buffer_config *ybf); -#define vpx_extend_frame_borders vpx_extend_frame_borders_c - -void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); -#define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c - -void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -#define vpx_yv12_copy_y vpx_yv12_copy_y_c - -void vpx_scale_rtcd(void); - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ - //Only MIPS has something here, but it is not supported -} -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif diff --git a/thirdparty/libvpx/vpx_util/endian_inl.h b/thirdparty/libvpx/vpx_util/endian_inl.h deleted file mode 100644 index 37bdce1ccd..0000000000 --- a/thirdparty/libvpx/vpx_util/endian_inl.h +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2014 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Endian related functions. - -#ifndef VPX_UTIL_ENDIAN_INL_H_ -#define VPX_UTIL_ENDIAN_INL_H_ - -#include <stdlib.h> -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" - -#if defined(__GNUC__) -# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) -# define LOCAL_GCC_PREREQ(maj, min) \ - (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_GCC_VERSION 0 -# define LOCAL_GCC_PREREQ(maj, min) 0 -#endif - -// handle clang compatibility -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) -#if !defined(WORDS_BIGENDIAN) && \ - (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) -#define WORDS_BIGENDIAN -#endif - -#if defined(WORDS_BIGENDIAN) -#define HToLE32 BSwap32 -#define HToLE16 BSwap16 -#define HToBE64(x) (x) -#define HToBE32(x) (x) -#else -#define HToLE32(x) (x) -#define HToLE16(x) (x) -#define HToBE64(X) BSwap64(X) -#define HToBE32(X) BSwap32(X) -#endif - -#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) -#define HAVE_BUILTIN_BSWAP16 -#endif - -#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) -#define HAVE_BUILTIN_BSWAP32 -#endif - -#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) -#define HAVE_BUILTIN_BSWAP64 -#endif - -#if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \ - defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6) -#define VPX_USE_MIPS32_R2 -#endif - -static INLINE uint16_t BSwap16(uint16_t x) { -#if defined(HAVE_BUILTIN_BSWAP16) - return __builtin_bswap16(x); -#elif defined(_MSC_VER) - return _byteswap_ushort(x); -#else - // gcc will recognize a 'rorw $8, ...' here: - return (x >> 8) | ((x & 0xff) << 8); -#endif // HAVE_BUILTIN_BSWAP16 -} - -static INLINE uint32_t BSwap32(uint32_t x) { -#if defined(VPX_USE_MIPS32_R2) - uint32_t ret; - __asm__ volatile ( - "wsbh %[ret], %[x] \n\t" - "rotr %[ret], %[ret], 16 \n\t" - : [ret]"=r"(ret) - : [x]"r"(x) - ); - return ret; -#elif defined(HAVE_BUILTIN_BSWAP32) - return __builtin_bswap32(x); -#elif defined(__i386__) || defined(__x86_64__) - uint32_t swapped_bytes; - __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); - return swapped_bytes; -#elif defined(_MSC_VER) - return (uint32_t)_byteswap_ulong(x); -#else - return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); -#endif // HAVE_BUILTIN_BSWAP32 -} - -static INLINE uint64_t BSwap64(uint64_t x) { -#if defined(HAVE_BUILTIN_BSWAP64) - return __builtin_bswap64(x); -#elif defined(__x86_64__) - uint64_t swapped_bytes; - __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); - return swapped_bytes; -#elif defined(_MSC_VER) - return (uint64_t)_byteswap_uint64(x); -#else // generic code for swapping 64-bit values (suggested by bdb@) - x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); - x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); - x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); - return x; -#endif // HAVE_BUILTIN_BSWAP64 -} - -#endif // VPX_UTIL_ENDIAN_INL_H_ diff --git a/thirdparty/libvpx/vpx_util/vpx_thread.c b/thirdparty/libvpx/vpx_util/vpx_thread.c deleted file mode 100644 index 0bb0125bd4..0000000000 --- a/thirdparty/libvpx/vpx_util/vpx_thread.c +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 264210ba2807e4da47eb5d18c04cf869d89b9784 src/utils/thread.c - -#include <assert.h> -#include <string.h> // for memset() -#include "./vpx_thread.h" -#include "vpx_mem/vpx_mem.h" - -#if CONFIG_MULTITHREAD - -struct VPxWorkerImpl { - pthread_mutex_t mutex_; - pthread_cond_t condition_; - pthread_t thread_; -}; - -//------------------------------------------------------------------------------ - -static void execute(VPxWorker *const worker); // Forward declaration. - -static THREADFN thread_loop(void *ptr) { - VPxWorker *const worker = (VPxWorker*)ptr; - int done = 0; - while (!done) { - pthread_mutex_lock(&worker->impl_->mutex_); - while (worker->status_ == OK) { // wait in idling mode - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - if (worker->status_ == WORK) { - execute(worker); - worker->status_ = OK; - } else if (worker->status_ == NOT_OK) { // finish the worker - done = 1; - } - // signal to the main thread that we're done (for sync()) - pthread_cond_signal(&worker->impl_->condition_); - pthread_mutex_unlock(&worker->impl_->mutex_); - } - return THREAD_RETURN(NULL); // Thread is finished -} - -// main thread state control -static void change_state(VPxWorker *const worker, - VPxWorkerStatus new_status) { - // No-op when attempting to change state on a thread that didn't come up. - // Checking status_ without acquiring the lock first would result in a data - // race. - if (worker->impl_ == NULL) return; - - pthread_mutex_lock(&worker->impl_->mutex_); - if (worker->status_ >= OK) { - // wait for the worker to finish - while (worker->status_ != OK) { - pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); - } - // assign new status and release the working thread if needed - if (new_status != OK) { - worker->status_ = new_status; - pthread_cond_signal(&worker->impl_->condition_); - } - } - pthread_mutex_unlock(&worker->impl_->mutex_); -} - -#endif // CONFIG_MULTITHREAD - -//------------------------------------------------------------------------------ - -static void init(VPxWorker *const worker) { - memset(worker, 0, sizeof(*worker)); - worker->status_ = NOT_OK; -} - -static int sync(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, OK); -#endif - assert(worker->status_ <= OK); - return !worker->had_error; -} - -static int reset(VPxWorker *const worker) { - int ok = 1; - worker->had_error = 0; - if (worker->status_ < OK) { -#if CONFIG_MULTITHREAD - worker->impl_ = (VPxWorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_)); - if (worker->impl_ == NULL) { - return 0; - } - if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { - goto Error; - } - if (pthread_cond_init(&worker->impl_->condition_, NULL)) { - pthread_mutex_destroy(&worker->impl_->mutex_); - goto Error; - } - pthread_mutex_lock(&worker->impl_->mutex_); - ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); - if (ok) worker->status_ = OK; - pthread_mutex_unlock(&worker->impl_->mutex_); - if (!ok) { - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - Error: - vpx_free(worker->impl_); - worker->impl_ = NULL; - return 0; - } -#else - worker->status_ = OK; -#endif - } else if (worker->status_ > OK) { - ok = sync(worker); - } - assert(!ok || (worker->status_ == OK)); - return ok; -} - -static void execute(VPxWorker *const worker) { - if (worker->hook != NULL) { - worker->had_error |= !worker->hook(worker->data1, worker->data2); - } -} - -static void launch(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - change_state(worker, WORK); -#else - execute(worker); -#endif -} - -static void end(VPxWorker *const worker) { -#if CONFIG_MULTITHREAD - if (worker->impl_ != NULL) { - change_state(worker, NOT_OK); - pthread_join(worker->impl_->thread_, NULL); - pthread_mutex_destroy(&worker->impl_->mutex_); - pthread_cond_destroy(&worker->impl_->condition_); - vpx_free(worker->impl_); - worker->impl_ = NULL; - } -#else - worker->status_ = NOT_OK; - assert(worker->impl_ == NULL); -#endif - assert(worker->status_ == NOT_OK); -} - -//------------------------------------------------------------------------------ - -static VPxWorkerInterface g_worker_interface = { - init, reset, sync, launch, execute, end -}; - -int vpx_set_worker_interface(const VPxWorkerInterface* const winterface) { - if (winterface == NULL || - winterface->init == NULL || winterface->reset == NULL || - winterface->sync == NULL || winterface->launch == NULL || - winterface->execute == NULL || winterface->end == NULL) { - return 0; - } - g_worker_interface = *winterface; - return 1; -} - -const VPxWorkerInterface *vpx_get_worker_interface(void) { - return &g_worker_interface; -} - -//------------------------------------------------------------------------------ diff --git a/thirdparty/libvpx/vpx_util/vpx_thread.h b/thirdparty/libvpx/vpx_util/vpx_thread.h deleted file mode 100644 index 2062abd75f..0000000000 --- a/thirdparty/libvpx/vpx_util/vpx_thread.h +++ /dev/null @@ -1,369 +0,0 @@ -// Copyright 2013 Google Inc. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the COPYING file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. -// ----------------------------------------------------------------------------- -// -// Multi-threaded worker -// -// Original source: -// http://git.chromium.org/webm/libwebp.git -// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h - -#ifndef VPX_THREAD_H_ -#define VPX_THREAD_H_ - -#include "./vpx_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Set maximum decode threads to be 8 due to the limit of frame buffers -// and not enough semaphores in the emulation layer on windows. -#define MAX_DECODE_THREADS 8 - -#if CONFIG_MULTITHREAD - -#if defined(_WIN32) && !HAVE_PTHREAD_H -#include <errno.h> // NOLINT -#include <process.h> // NOLINT -#include <windows.h> // NOLINT -typedef HANDLE pthread_t; -typedef CRITICAL_SECTION pthread_mutex_t; -typedef struct { - HANDLE waiting_sem_; - HANDLE received_sem_; - HANDLE signal_event_; -} pthread_cond_t; - -//------------------------------------------------------------------------------ -// simplistic pthread emulation layer - -// _beginthreadex requires __stdcall -#define THREADFN unsigned int __stdcall -#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) - -static INLINE int pthread_create(pthread_t* const thread, const void* attr, - unsigned int (__stdcall *start)(void*), - void* arg) { - (void)attr; - *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ - 0, /* unsigned stack_size */ - start, - arg, - 0, /* unsigned initflag */ - NULL); /* unsigned *thrdaddr */ - if (*thread == NULL) return 1; - SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); - return 0; -} - -static INLINE int pthread_join(pthread_t thread, void** value_ptr) { - (void)value_ptr; - return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || - CloseHandle(thread) == 0); -} - -// Mutex -static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, - void* mutexattr) { - (void)mutexattr; - InitializeCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { - return TryEnterCriticalSection(mutex) ? 0 : EBUSY; -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { - EnterCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { - LeaveCriticalSection(mutex); - return 0; -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { - DeleteCriticalSection(mutex); - return 0; -} - -// Condition -static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { - int ok = 1; - ok &= (CloseHandle(condition->waiting_sem_) != 0); - ok &= (CloseHandle(condition->received_sem_) != 0); - ok &= (CloseHandle(condition->signal_event_) != 0); - return !ok; -} - -static INLINE int pthread_cond_init(pthread_cond_t *const condition, - void* cond_attr) { - (void)cond_attr; - condition->waiting_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); - condition->received_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); - condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); - if (condition->waiting_sem_ == NULL || - condition->received_sem_ == NULL || - condition->signal_event_ == NULL) { - pthread_cond_destroy(condition); - return 1; - } - return 0; -} - -static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { - int ok = 1; - if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { - // a thread is waiting in pthread_cond_wait: allow it to be notified - ok = SetEvent(condition->signal_event_); - // wait until the event is consumed so the signaler cannot consume - // the event via its own pthread_cond_wait. - ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != - WAIT_OBJECT_0); - } - return !ok; -} - -static INLINE int pthread_cond_wait(pthread_cond_t *const condition, - pthread_mutex_t *const mutex) { - int ok; - // note that there is a consumer available so the signal isn't dropped in - // pthread_cond_signal - if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) - return 1; - // now unlock the mutex so pthread_cond_signal may be issued - pthread_mutex_unlock(mutex); - ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == - WAIT_OBJECT_0); - ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); - pthread_mutex_lock(mutex); - return !ok; -} -#elif defined(__OS2__) -#define INCL_DOS -#include <os2.h> // NOLINT - -#include <errno.h> // NOLINT -#include <stdlib.h> // NOLINT -#include <sys/builtin.h> // NOLINT - -#define pthread_t TID -#define pthread_mutex_t HMTX - -typedef struct { - HEV event_sem_; - HEV ack_sem_; - volatile unsigned wait_count_; -} pthread_cond_t; - -//------------------------------------------------------------------------------ -// simplistic pthread emulation layer - -#define THREADFN void * -#define THREAD_RETURN(val) (val) - -typedef struct { - void* (*start_)(void*); - void* arg_; -} thread_arg; - -static void thread_start(void* arg) { - thread_arg targ = *(thread_arg *)arg; - free(arg); - - targ.start_(targ.arg_); -} - -static INLINE int pthread_create(pthread_t* const thread, const void* attr, - void* (*start)(void*), - void* arg) { - int tid; - thread_arg *targ = (thread_arg *)malloc(sizeof(*targ)); - if (targ == NULL) return 1; - - (void)attr; - - targ->start_ = start; - targ->arg_ = arg; - tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ); - if (tid == -1) { - free(targ); - return 1; - } - - *thread = tid; - return 0; -} - -static INLINE int pthread_join(pthread_t thread, void** value_ptr) { - (void)value_ptr; - return DosWaitThread(&thread, DCWW_WAIT) != 0; -} - -// Mutex -static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, - void* mutexattr) { - (void)mutexattr; - return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0; -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { - return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY; -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { - return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0; -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { - return DosReleaseMutexSem(*mutex) != 0; -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { - return DosCloseMutexSem(*mutex) != 0; -} - -// Condition -static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { - int ok = 1; - ok &= DosCloseEventSem(condition->event_sem_) == 0; - ok &= DosCloseEventSem(condition->ack_sem_) == 0; - return !ok; -} - -static INLINE int pthread_cond_init(pthread_cond_t *const condition, - void* cond_attr) { - int ok = 1; - (void)cond_attr; - - ok &= DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE) - == 0; - ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0; - if (!ok) { - pthread_cond_destroy(condition); - return 1; - } - condition->wait_count_ = 0; - return 0; -} - -static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { - int ok = 1; - - if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) { - ok &= DosPostEventSem(condition->event_sem_) == 0; - ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0; - } - - return !ok; -} - -static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { - int ok = 1; - - while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) - ok &= pthread_cond_signal(condition) == 0; - - return !ok; -} - -static INLINE int pthread_cond_wait(pthread_cond_t *const condition, - pthread_mutex_t *const mutex) { - int ok = 1; - - __atomic_increment(&condition->wait_count_); - - ok &= pthread_mutex_unlock(mutex) == 0; - - ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0; - - __atomic_decrement(&condition->wait_count_); - - ok &= DosPostEventSem(condition->ack_sem_) == 0; - - pthread_mutex_lock(mutex); - - return !ok; -} -#else // _WIN32 -#include <pthread.h> // NOLINT -# define THREADFN void* -# define THREAD_RETURN(val) val -#endif - -#endif // CONFIG_MULTITHREAD - -// State of the worker thread object -typedef enum { - NOT_OK = 0, // object is unusable - OK, // ready to work - WORK // busy finishing the current task -} VPxWorkerStatus; - -// Function to be called by the worker thread. Takes two opaque pointers as -// arguments (data1 and data2), and should return false in case of error. -typedef int (*VPxWorkerHook)(void*, void*); - -// Platform-dependent implementation details for the worker. -typedef struct VPxWorkerImpl VPxWorkerImpl; - -// Synchronization object used to launch job in the worker thread -typedef struct { - VPxWorkerImpl *impl_; - VPxWorkerStatus status_; - VPxWorkerHook hook; // hook to call - void *data1; // first argument passed to 'hook' - void *data2; // second argument passed to 'hook' - int had_error; // return value of the last call to 'hook' -} VPxWorker; - -// The interface for all thread-worker related functions. All these functions -// must be implemented. -typedef struct { - // Must be called first, before any other method. - void (*init)(VPxWorker *const worker); - // Must be called to initialize the object and spawn the thread. Re-entrant. - // Will potentially launch the thread. Returns false in case of error. - int (*reset)(VPxWorker *const worker); - // Makes sure the previous work is finished. Returns true if worker->had_error - // was not set and no error condition was triggered by the working thread. - int (*sync)(VPxWorker *const worker); - // Triggers the thread to call hook() with data1 and data2 arguments. These - // hook/data1/data2 values can be changed at any time before calling this - // function, but not be changed afterward until the next call to Sync(). - void (*launch)(VPxWorker *const worker); - // This function is similar to launch() except that it calls the - // hook directly instead of using a thread. Convenient to bypass the thread - // mechanism while still using the VPxWorker structs. sync() must - // still be called afterward (for error reporting). - void (*execute)(VPxWorker *const worker); - // Kill the thread and terminate the object. To use the object again, one - // must call reset() again. - void (*end)(VPxWorker *const worker); -} VPxWorkerInterface; - -// Install a new set of threading functions, overriding the defaults. This -// should be done before any workers are started, i.e., before any encoding or -// decoding takes place. The contents of the interface struct are copied, it -// is safe to free the corresponding memory after this call. This function is -// not thread-safe. Return false in case of invalid pointer or methods. -int vpx_set_worker_interface(const VPxWorkerInterface *const winterface); - -// Retrieve the currently set thread worker interface. -const VPxWorkerInterface *vpx_get_worker_interface(void); - -//------------------------------------------------------------------------------ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VPX_THREAD_H_ diff --git a/thirdparty/libvpx/vpx_version.h b/thirdparty/libvpx/vpx_version.h deleted file mode 100644 index 5cff3b429f..0000000000 --- a/thirdparty/libvpx/vpx_version.h +++ /dev/null @@ -1,7 +0,0 @@ -#define VERSION_MAJOR 1 -#define VERSION_MINOR 6 -#define VERSION_PATCH 0 -#define VERSION_EXTRA "" -#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.6.0" -#define VERSION_STRING " v1.6.0" diff --git a/thirdparty/opus/COPYING b/thirdparty/opus/COPYING deleted file mode 100644 index 9c739c34a3..0000000000 --- a/thirdparty/opus/COPYING +++ /dev/null @@ -1,44 +0,0 @@ -Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic, - Jean-Marc Valin, Timothy B. Terriberry, - CSIRO, Gregory Maxwell, Mark Borgerding, - Erik de Castro Lopo - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -- Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Opus is subject to the royalty-free patent licenses which are -specified at: - -Xiph.Org Foundation: -https://datatracker.ietf.org/ipr/1524/ - -Microsoft Corporation: -https://datatracker.ietf.org/ipr/1914/ - -Broadcom Corporation: -https://datatracker.ietf.org/ipr/1526/ diff --git a/thirdparty/opus/analysis.c b/thirdparty/opus/analysis.c deleted file mode 100644 index 663431a436..0000000000 --- a/thirdparty/opus/analysis.c +++ /dev/null @@ -1,672 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "kiss_fft.h" -#include "celt.h" -#include "modes.h" -#include "arch.h" -#include "quant_bands.h" -#include <stdio.h> -#include "analysis.h" -#include "mlp.h" -#include "stack_alloc.h" - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -static const float dct_table[128] = { - 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, - 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, - 0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f, - -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f, - 0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f, - -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f, - 0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f, - 0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f, - 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f, - 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f, - 0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f, - -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f, - 0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f, - -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f, - 0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f, - 0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f, -}; - -static const float analysis_window[240] = { - 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f, - 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f, - 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f, - 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f, - 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f, - 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f, - 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f, - 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f, - 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f, - 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f, - 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f, - 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f, - 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f, - 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f, - 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f, - 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f, - 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f, - 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f, - 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f, - 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f, - 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f, - 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f, - 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f, - 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f, - 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f, - 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f, - 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f, - 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f, - 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f, - 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f, -}; - -static const int tbands[NB_TBANDS+1] = { - 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 -}; - -static const int extra_bands[NB_TOT_BANDS+1] = { - 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200 -}; - -/*static const float tweight[NB_TBANDS+1] = { - .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 -};*/ - -#define NB_TONAL_SKIP_BANDS 9 - -#define cA 0.43157974f -#define cB 0.67848403f -#define cC 0.08595542f -#define cE ((float)M_PI/2) -static OPUS_INLINE float fast_atan2f(float y, float x) { - float x2, y2; - /* Should avoid underflow on the values we'll get */ - if (ABS16(x)+ABS16(y)<1e-9f) - { - x*=1e12f; - y*=1e12f; - } - x2 = x*x; - y2 = y*y; - if(x2<y2){ - float den = (y2 + cB*x2) * (y2 + cC*x2); - if (den!=0) - return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE); - else - return (y<0 ? -cE : cE); - }else{ - float den = (x2 + cB*y2) * (x2 + cC*y2); - if (den!=0) - return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); - else - return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); - } -} - -void tonality_analysis_init(TonalityAnalysisState *tonal) -{ - /* Initialize reusable fields. */ - tonal->arch = opus_select_arch(); - /* Clear remaining fields. */ - tonality_analysis_reset(tonal); -} - -void tonality_analysis_reset(TonalityAnalysisState *tonal) -{ - /* Clear non-reusable fields. */ - char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START; - OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal)); -} - -void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) -{ - int pos; - int curr_lookahead; - float psum; - int i; - - pos = tonal->read_pos; - curr_lookahead = tonal->write_pos-tonal->read_pos; - if (curr_lookahead<0) - curr_lookahead += DETECT_SIZE; - - if (len > 480 && pos != tonal->write_pos) - { - pos++; - if (pos==DETECT_SIZE) - pos=0; - } - if (pos == tonal->write_pos) - pos--; - if (pos<0) - pos = DETECT_SIZE-1; - OPUS_COPY(info_out, &tonal->info[pos], 1); - tonal->read_subframe += len/120; - while (tonal->read_subframe>=4) - { - tonal->read_subframe -= 4; - tonal->read_pos++; - } - if (tonal->read_pos>=DETECT_SIZE) - tonal->read_pos-=DETECT_SIZE; - - /* Compensate for the delay in the features themselves. - FIXME: Need a better estimate the 10 I just made up */ - curr_lookahead = IMAX(curr_lookahead-10, 0); - - psum=0; - /* Summing the probability of transition patterns that involve music at - time (DETECT_SIZE-curr_lookahead-1) */ - for (i=0;i<DETECT_SIZE-curr_lookahead;i++) - psum += tonal->pmusic[i]; - for (;i<DETECT_SIZE;i++) - psum += tonal->pspeech[i]; - psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; - /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ - - info_out->music_prob = psum; -} - -static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) -{ - int i, b; - const kiss_fft_state *kfft; - VARDECL(kiss_fft_cpx, in); - VARDECL(kiss_fft_cpx, out); - int N = 480, N2=240; - float * OPUS_RESTRICT A = tonal->angle; - float * OPUS_RESTRICT dA = tonal->d_angle; - float * OPUS_RESTRICT d2A = tonal->d2_angle; - VARDECL(float, tonality); - VARDECL(float, noisiness); - float band_tonality[NB_TBANDS]; - float logE[NB_TBANDS]; - float BFCC[8]; - float features[25]; - float frame_tonality; - float max_frame_tonality; - /*float tw_sum=0;*/ - float frame_noisiness; - const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI); - float slope=0; - float frame_stationarity; - float relativeE; - float frame_probs[2]; - float alpha, alphaE, alphaE2; - float frame_loudness; - float bandwidth_mask; - int bandwidth=0; - float maxE = 0; - float noise_floor; - int remaining; - AnalysisInfo *info; - SAVE_STACK; - - tonal->last_transition++; - alpha = 1.f/IMIN(20, 1+tonal->count); - alphaE = 1.f/IMIN(50, 1+tonal->count); - alphaE2 = 1.f/IMIN(1000, 1+tonal->count); - - if (tonal->count<4) - tonal->music_prob = .5; - kfft = celt_mode->mdct.kfft[0]; - if (tonal->count==0) - tonal->mem_fill = 240; - downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); - if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) - { - tonal->mem_fill += len; - /* Don't have enough to update the analysis */ - RESTORE_STACK; - return; - } - info = &tonal->info[tonal->write_pos++]; - if (tonal->write_pos>=DETECT_SIZE) - tonal->write_pos-=DETECT_SIZE; - - ALLOC(in, 480, kiss_fft_cpx); - ALLOC(out, 480, kiss_fft_cpx); - ALLOC(tonality, 240, float); - ALLOC(noisiness, 240, float); - for (i=0;i<N2;i++) - { - float w = analysis_window[i]; - in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); - in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); - in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); - in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); - } - OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); - remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); - tonal->mem_fill = 240 + remaining; - opus_fft(kfft, in, out, tonal->arch); -#ifndef FIXED_POINT - /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */ - if (celt_isnan(out[0].r)) - { - info->valid = 0; - RESTORE_STACK; - return; - } -#endif - - for (i=1;i<N2;i++) - { - float X1r, X2r, X1i, X2i; - float angle, d_angle, d2_angle; - float angle2, d_angle2, d2_angle2; - float mod1, mod2, avg_mod; - X1r = (float)out[i].r+out[N-i].r; - X1i = (float)out[i].i-out[N-i].i; - X2r = (float)out[i].i+out[N-i].i; - X2i = (float)out[N-i].r-out[i].r; - - angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r); - d_angle = angle - A[i]; - d2_angle = d_angle - dA[i]; - - angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r); - d_angle2 = angle2 - angle; - d2_angle2 = d_angle2 - d_angle; - - mod1 = d2_angle - (float)floor(.5+d2_angle); - noisiness[i] = ABS16(mod1); - mod1 *= mod1; - mod1 *= mod1; - - mod2 = d2_angle2 - (float)floor(.5+d2_angle2); - noisiness[i] += ABS16(mod2); - mod2 *= mod2; - mod2 *= mod2; - - avg_mod = .25f*(d2A[i]+2.f*mod1+mod2); - tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f; - - A[i] = angle2; - dA[i] = d_angle2; - d2A[i] = mod2; - } - - frame_tonality = 0; - max_frame_tonality = 0; - /*tw_sum = 0;*/ - info->activity = 0; - frame_noisiness = 0; - frame_stationarity = 0; - if (!tonal->count) - { - for (b=0;b<NB_TBANDS;b++) - { - tonal->lowE[b] = 1e10; - tonal->highE[b] = -1e10; - } - } - relativeE = 0; - frame_loudness = 0; - for (b=0;b<NB_TBANDS;b++) - { - float E=0, tE=0, nE=0; - float L1, L2; - float stationarity; - for (i=tbands[b];i<tbands[b+1];i++) - { - float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r - + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; -#ifdef FIXED_POINT - /* FIXME: It's probably best to change the BFCC filter initial state instead */ - binE *= 5.55e-17f; -#endif - E += binE; - tE += binE*tonality[i]; - nE += binE*2.f*(.5f-noisiness[i]); - } -#ifndef FIXED_POINT - /* Check for extreme band energies that could cause NaNs later. */ - if (!(E<1e9f) || celt_isnan(E)) - { - info->valid = 0; - RESTORE_STACK; - return; - } -#endif - - tonal->E[tonal->E_count][b] = E; - frame_noisiness += nE/(1e-15f+E); - - frame_loudness += (float)sqrt(E+1e-10f); - logE[b] = (float)log(E+1e-10f); - tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); - tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); - if (tonal->highE[b] < tonal->lowE[b]+1.f) - { - tonal->highE[b]+=.5f; - tonal->lowE[b]-=.5f; - } - relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]); - - L1=L2=0; - for (i=0;i<NB_FRAMES;i++) - { - L1 += (float)sqrt(tonal->E[i][b]); - L2 += tonal->E[i][b]; - } - - stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2)); - stationarity *= stationarity; - stationarity *= stationarity; - frame_stationarity += stationarity; - /*band_tonality[b] = tE/(1e-15+E)*/; - band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]); -#if 0 - if (b>=NB_TONAL_SKIP_BANDS) - { - frame_tonality += tweight[b]*band_tonality[b]; - tw_sum += tweight[b]; - } -#else - frame_tonality += band_tonality[b]; - if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) - frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; -#endif - max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality); - slope += band_tonality[b]*(b-8); - /*printf("%f %f ", band_tonality[b], stationarity);*/ - tonal->prev_band_tonality[b] = band_tonality[b]; - } - - bandwidth_mask = 0; - bandwidth = 0; - maxE = 0; - noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); -#ifdef FIXED_POINT - noise_floor *= 1<<(15+SIG_SHIFT); -#endif - noise_floor *= noise_floor; - for (b=0;b<NB_TOT_BANDS;b++) - { - float E=0; - int band_start, band_end; - /* Keep a margin of 300 Hz for aliasing */ - band_start = extra_bands[b]; - band_end = extra_bands[b+1]; - for (i=band_start;i<band_end;i++) - { - float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r - + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; - E += binE; - } - maxE = MAX32(maxE, E); - tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); - E = MAX32(E, tonal->meanE[b]); - /* Use a simple follower with 13 dB/Bark slope for spreading function */ - bandwidth_mask = MAX32(.05f*bandwidth_mask, E); - /* Consider the band "active" only if all these conditions are met: - 1) less than 10 dB below the simple follower - 2) less than 90 dB below the peak band (maximal masking possible considering - both the ATH and the loudness-dependent slope of the spreading function) - 3) above the PCM quantization noise floor - */ - if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start)) - bandwidth = b; - } - if (tonal->count<=2) - bandwidth = 20; - frame_loudness = 20*(float)log10(frame_loudness); - tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness); - tonal->lowECount *= (1-alphaE); - if (frame_loudness < tonal->Etracker-30) - tonal->lowECount += alphaE; - - for (i=0;i<8;i++) - { - float sum=0; - for (b=0;b<16;b++) - sum += dct_table[i*16+b]*logE[b]; - BFCC[i] = sum; - } - - frame_stationarity /= NB_TBANDS; - relativeE /= NB_TBANDS; - if (tonal->count<10) - relativeE = .5; - frame_noisiness /= NB_TBANDS; -#if 1 - info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; -#else - info->activity = .5*(1+frame_noisiness-frame_stationarity); -#endif - frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS)); - frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f); - tonal->prev_tonality = frame_tonality; - - slope /= 8*8; - info->tonality_slope = slope; - - tonal->E_count = (tonal->E_count+1)%NB_FRAMES; - tonal->count++; - info->tonality = frame_tonality; - - for (i=0;i<4;i++) - features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i]; - - for (i=0;i<4;i++) - tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i]; - - for (i=0;i<4;i++) - features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]); - for (i=0;i<3;i++) - features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8]; - - if (tonal->count > 5) - { - for (i=0;i<9;i++) - tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; - } - - for (i=0;i<8;i++) - { - tonal->mem[i+24] = tonal->mem[i+16]; - tonal->mem[i+16] = tonal->mem[i+8]; - tonal->mem[i+8] = tonal->mem[i]; - tonal->mem[i] = BFCC[i]; - } - for (i=0;i<9;i++) - features[11+i] = (float)sqrt(tonal->std[i]); - features[20] = info->tonality; - features[21] = info->activity; - features[22] = frame_stationarity; - features[23] = info->tonality_slope; - features[24] = tonal->lowECount; - -#ifndef DISABLE_FLOAT_API - mlp_process(&net, features, frame_probs); - frame_probs[0] = .5f*(frame_probs[0]+1); - /* Curve fitting between the MLP probability and the actual probability */ - frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10); - /* Probability of active audio (as opposed to silence) */ - frame_probs[1] = .5f*frame_probs[1]+.5f; - /* Consider that silence has a 50-50 probability. */ - frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f; - - /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/ - { - /* Probability of state transition */ - float tau; - /* Represents independence of the MLP probabilities, where - beta=1 means fully independent. */ - float beta; - /* Denormalized probability of speech (p0) and music (p1) after update */ - float p0, p1; - /* Probabilities for "all speech" and "all music" */ - float s0, m0; - /* Probability sum for renormalisation */ - float psum; - /* Instantaneous probability of speech and music, with beta pre-applied. */ - float speech0; - float music0; - float p, q; - - /* One transition every 3 minutes of active audio */ - tau = .00005f*frame_probs[1]; - /* Adapt beta based on how "unexpected" the new prob is */ - p = MAX16(.05f,MIN16(.95f,frame_probs[0])); - q = MAX16(.05f,MIN16(.95f,tonal->music_prob)); - beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p)); - /* p0 and p1 are the probabilities of speech and music at this frame - using only information from previous frame and applying the - state transition model */ - p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; - p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; - /* We apply the current probability with exponent beta to work around - the fact that the probability estimates aren't independent. */ - p0 *= (float)pow(1-frame_probs[0], beta); - p1 *= (float)pow(frame_probs[0], beta); - /* Normalise the probabilities to get the Marokv probability of music. */ - tonal->music_prob = p1/(p0+p1); - info->music_prob = tonal->music_prob; - - /* This chunk of code deals with delayed decision. */ - psum=1e-20f; - /* Instantaneous probability of speech and music, with beta pre-applied. */ - speech0 = (float)pow(1-frame_probs[0], beta); - music0 = (float)pow(frame_probs[0], beta); - if (tonal->count==1) - { - tonal->pspeech[0]=.5; - tonal->pmusic [0]=.5; - } - /* Updated probability of having only speech (s0) or only music (m0), - before considering the new observation. */ - s0 = tonal->pspeech[0] + tonal->pspeech[1]; - m0 = tonal->pmusic [0] + tonal->pmusic [1]; - /* Updates s0 and m0 with instantaneous probability. */ - tonal->pspeech[0] = s0*(1-tau)*speech0; - tonal->pmusic [0] = m0*(1-tau)*music0; - /* Propagate the transition probabilities */ - for (i=1;i<DETECT_SIZE-1;i++) - { - tonal->pspeech[i] = tonal->pspeech[i+1]*speech0; - tonal->pmusic [i] = tonal->pmusic [i+1]*music0; - } - /* Probability that the latest frame is speech, when all the previous ones were music. */ - tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0; - /* Probability that the latest frame is music, when all the previous ones were speech. */ - tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0; - - /* Renormalise probabilities to 1 */ - for (i=0;i<DETECT_SIZE;i++) - psum += tonal->pspeech[i] + tonal->pmusic[i]; - psum = 1.f/psum; - for (i=0;i<DETECT_SIZE;i++) - { - tonal->pspeech[i] *= psum; - tonal->pmusic [i] *= psum; - } - psum = tonal->pmusic[0]; - for (i=1;i<DETECT_SIZE;i++) - psum += tonal->pspeech[i]; - - /* Estimate our confidence in the speech/music decisions */ - if (frame_probs[1]>.75) - { - if (tonal->music_prob>.9) - { - float adapt; - adapt = 1.f/(++tonal->music_confidence_count); - tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500); - tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence); - } - if (tonal->music_prob<.1) - { - float adapt; - adapt = 1.f/(++tonal->speech_confidence_count); - tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500); - tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence); - } - } else { - if (tonal->music_confidence_count==0) - tonal->music_confidence = .9f; - if (tonal->speech_confidence_count==0) - tonal->speech_confidence = .1f; - } - } - if (tonal->last_music != (tonal->music_prob>.5f)) - tonal->last_transition=0; - tonal->last_music = tonal->music_prob>.5f; -#else - info->music_prob = 0; -#endif - /*for (i=0;i<25;i++) - printf("%f ", features[i]); - printf("\n");*/ - - info->bandwidth = bandwidth; - /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ - info->noisiness = frame_noisiness; - info->valid = 1; - RESTORE_STACK; -} - -void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, - int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) -{ - int offset; - int pcm_len; - - if (analysis_pcm != NULL) - { - /* Avoid overflow/wrap-around of the analysis buffer */ - analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); - - pcm_len = analysis_frame_size - analysis->analysis_offset; - offset = analysis->analysis_offset; - do { - tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); - offset += 480; - pcm_len -= 480; - } while (pcm_len>0); - analysis->analysis_offset = analysis_frame_size; - - analysis->analysis_offset -= frame_size; - } - - analysis_info->valid = 0; - tonality_get_info(analysis, analysis_info, frame_size); -} diff --git a/thirdparty/opus/analysis.h b/thirdparty/opus/analysis.h deleted file mode 100644 index 9eae56a525..0000000000 --- a/thirdparty/opus/analysis.h +++ /dev/null @@ -1,103 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef ANALYSIS_H -#define ANALYSIS_H - -#include "celt.h" -#include "opus_private.h" - -#define NB_FRAMES 8 -#define NB_TBANDS 18 -#define NB_TOT_BANDS 21 -#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ - -#define DETECT_SIZE 200 - -typedef struct { - int arch; -#define TONALITY_ANALYSIS_RESET_START angle - float angle[240]; - float d_angle[240]; - float d2_angle[240]; - opus_val32 inmem[ANALYSIS_BUF_SIZE]; - int mem_fill; /* number of usable samples in the buffer */ - float prev_band_tonality[NB_TBANDS]; - float prev_tonality; - float E[NB_FRAMES][NB_TBANDS]; - float lowE[NB_TBANDS]; - float highE[NB_TBANDS]; - float meanE[NB_TOT_BANDS]; - float mem[32]; - float cmean[8]; - float std[9]; - float music_prob; - float Etracker; - float lowECount; - int E_count; - int last_music; - int last_transition; - int count; - float subframe_mem[3]; - int analysis_offset; - /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). - pspeech[0] is the probability that all frames in the window are speech. */ - float pspeech[DETECT_SIZE]; - /** Probability of having music for time i to DETECT_SIZE-1 (and speech before). - pmusic[0] is the probability that all frames in the window are music. */ - float pmusic[DETECT_SIZE]; - float speech_confidence; - float music_confidence; - int speech_confidence_count; - int music_confidence_count; - int write_pos; - int read_pos; - int read_subframe; - AnalysisInfo info[DETECT_SIZE]; -} TonalityAnalysisState; - -/** Initialize a TonalityAnalysisState struct. - * - * This performs some possibly slow initialization steps which should - * not be repeated every analysis step. No allocated memory is retained - * by the state struct, so no cleanup call is required. - */ -void tonality_analysis_init(TonalityAnalysisState *analysis); - -/** Reset a TonalityAnalysisState stuct. - * - * Call this when there's a discontinuity in the data. - */ -void tonality_analysis_reset(TonalityAnalysisState *analysis); - -void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); - -void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, - int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); - -#endif diff --git a/thirdparty/opus/celt/_kiss_fft_guts.h b/thirdparty/opus/celt/_kiss_fft_guts.h deleted file mode 100644 index 5e3d58fd66..0000000000 --- a/thirdparty/opus/celt/_kiss_fft_guts.h +++ /dev/null @@ -1,182 +0,0 @@ -/*Copyright (c) 2003-2004, Mark Borgerding - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_GUTS_H -#define KISS_FFT_GUTS_H - -#define MIN(a,b) ((a)<(b) ? (a):(b)) -#define MAX(a,b) ((a)>(b) ? (a):(b)) - -/* kiss_fft.h - defines kiss_fft_scalar as either short or a float type - and defines - typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ -#include "kiss_fft.h" - -/* - Explanation of macros dealing with complex math: - - C_MUL(m,a,b) : m = a*b - C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise - C_SUB( res, a,b) : res = a - b - C_SUBFROM( res , a) : res -= a - C_ADDTO( res , a) : res += a - * */ -#ifdef FIXED_POINT -#include "arch.h" - - -#define SAMP_MAX 2147483647 -#define TWID_MAX 32767 -#define TRIG_UPSCALE 1 - -#define SAMP_MIN -SAMP_MAX - - -# define S_MUL(a,b) MULT16_32_Q15(b, a) - -# define C_MUL(m,a,b) \ - do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) - -# define C_MULC(m,a,b) \ - do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) - -# define C_MULBYSCALAR( c, s ) \ - do{ (c).r = S_MUL( (c).r , s ) ;\ - (c).i = S_MUL( (c).i , s ) ; }while(0) - -# define DIVSCALAR(x,k) \ - (x) = S_MUL( x, (TWID_MAX-((k)>>1))/(k)+1 ) - -# define C_FIXDIV(c,div) \ - do { DIVSCALAR( (c).r , div); \ - DIVSCALAR( (c).i , div); }while (0) - -#define C_ADD( res, a,b)\ - do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ - }while(0) -#define C_SUB( res, a,b)\ - do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ - }while(0) -#define C_ADDTO( res , a)\ - do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ - }while(0) - -#define C_SUBFROM( res , a)\ - do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ - }while(0) - -#if defined(OPUS_ARM_INLINE_ASM) -#include "arm/kiss_fft_armv4.h" -#endif - -#if defined(OPUS_ARM_INLINE_EDSP) -#include "arm/kiss_fft_armv5e.h" -#endif -#if defined(MIPSr1_ASM) -#include "mips/kiss_fft_mipsr1.h" -#endif - -#else /* not FIXED_POINT*/ - -# define S_MUL(a,b) ( (a)*(b) ) -#define C_MUL(m,a,b) \ - do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ - (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) -#define C_MULC(m,a,b) \ - do{ (m).r = (a).r*(b).r + (a).i*(b).i;\ - (m).i = (a).i*(b).r - (a).r*(b).i; }while(0) - -#define C_MUL4(m,a,b) C_MUL(m,a,b) - -# define C_FIXDIV(c,div) /* NOOP */ -# define C_MULBYSCALAR( c, s ) \ - do{ (c).r *= (s);\ - (c).i *= (s); }while(0) -#endif - -#ifndef CHECK_OVERFLOW_OP -# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ -#endif - -#ifndef C_ADD -#define C_ADD( res, a,b)\ - do { \ - CHECK_OVERFLOW_OP((a).r,+,(b).r)\ - CHECK_OVERFLOW_OP((a).i,+,(b).i)\ - (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ - }while(0) -#define C_SUB( res, a,b)\ - do { \ - CHECK_OVERFLOW_OP((a).r,-,(b).r)\ - CHECK_OVERFLOW_OP((a).i,-,(b).i)\ - (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ - }while(0) -#define C_ADDTO( res , a)\ - do { \ - CHECK_OVERFLOW_OP((res).r,+,(a).r)\ - CHECK_OVERFLOW_OP((res).i,+,(a).i)\ - (res).r += (a).r; (res).i += (a).i;\ - }while(0) - -#define C_SUBFROM( res , a)\ - do {\ - CHECK_OVERFLOW_OP((res).r,-,(a).r)\ - CHECK_OVERFLOW_OP((res).i,-,(a).i)\ - (res).r -= (a).r; (res).i -= (a).i; \ - }while(0) -#endif /* C_ADD defined */ - -#ifdef FIXED_POINT -/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) -# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/ -# define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase)) -# define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase)) -# define HALF_OF(x) ((x)>>1) -#elif defined(USE_SIMD) -# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) -# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) -# define HALF_OF(x) ((x)*_mm_set1_ps(.5f)) -#else -# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) -# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) -# define HALF_OF(x) ((x)*.5f) -#endif - -#define kf_cexp(x,phase) \ - do{ \ - (x)->r = KISS_FFT_COS(phase);\ - (x)->i = KISS_FFT_SIN(phase);\ - }while(0) - -#define kf_cexp2(x,phase) \ - do{ \ - (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\ - (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\ -}while(0) - -#endif /* KISS_FFT_GUTS_H */ diff --git a/thirdparty/opus/celt/arch.h b/thirdparty/opus/celt/arch.h deleted file mode 100644 index 8ceab5fe10..0000000000 --- a/thirdparty/opus/celt/arch.h +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright (c) 2003-2008 Jean-Marc Valin - Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file arch.h - @brief Various architecture definitions for CELT -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef ARCH_H -#define ARCH_H - -#include "opus_types.h" -#include "opus_defines.h" - -# if !defined(__GNUC_PREREQ) -# if defined(__GNUC__)&&defined(__GNUC_MINOR__) -# define __GNUC_PREREQ(_maj,_min) \ - ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) -# else -# define __GNUC_PREREQ(_maj,_min) 0 -# endif -# endif - -#define CELT_SIG_SCALE 32768.f - -#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); -#ifdef ENABLE_ASSERTIONS -#include <stdio.h> -#include <stdlib.h> -#ifdef __GNUC__ -__attribute__((noreturn)) -#endif -static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) -{ - fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); - abort(); -} -#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} -#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} -#else -#define celt_assert(cond) -#define celt_assert2(cond, message) -#endif - -#define IMUL32(a,b) ((a)*(b)) - -#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ -#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ -#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ -#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ -#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ -#define UADD32(a,b) ((a)+(b)) -#define USUB32(a,b) ((a)-(b)) - -/* Set this if opus_int64 is a native type of the CPU. */ -/* Assume that all LP64 architectures have fast 64-bit types; also x86_64 - (which can be ILP32 for x32) and Win64 (which is LLP64). */ -#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) -#define OPUS_FAST_INT64 1 -#else -#define OPUS_FAST_INT64 0 -#endif - -#define PRINT_MIPS(file) - -#ifdef FIXED_POINT - -typedef opus_int16 opus_val16; -typedef opus_int32 opus_val32; - -typedef opus_val32 celt_sig; -typedef opus_val16 celt_norm; -typedef opus_val32 celt_ener; - -#define Q15ONE 32767 - -#define SIG_SHIFT 12 - -#define NORM_SCALING 16384 - -#define DB_SHIFT 10 - -#define EPSILON 1 -#define VERY_SMALL 0 -#define VERY_LARGE16 ((opus_val16)32767) -#define Q15_ONE ((opus_val16)32767) - -#define SCALEIN(a) (a) -#define SCALEOUT(a) (a) - -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) - -static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; -} - -#ifdef FIXED_DEBUG -#include "fixed_debug.h" -#else - -#include "fixed_generic.h" - -#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR -#include "arm/fixed_arm64.h" -#elif OPUS_ARM_INLINE_EDSP -#include "arm/fixed_armv5e.h" -#elif defined (OPUS_ARM_INLINE_ASM) -#include "arm/fixed_armv4.h" -#elif defined (BFIN_ASM) -#include "fixed_bfin.h" -#elif defined (TI_C5X_ASM) -#include "fixed_c5x.h" -#elif defined (TI_C6X_ASM) -#include "fixed_c6x.h" -#endif - -#endif - -#else /* FIXED_POINT */ - -typedef float opus_val16; -typedef float opus_val32; - -typedef float celt_sig; -typedef float celt_norm; -typedef float celt_ener; - -#ifdef FLOAT_APPROX -/* This code should reliably detect NaN/inf even when -ffast-math is used. - Assumes IEEE 754 format. */ -static OPUS_INLINE int celt_isnan(float x) -{ - union {float f; opus_uint32 i;} in; - in.f = x; - return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; -} -#else -#ifdef __FAST_MATH__ -#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input -#endif -#define celt_isnan(x) ((x)!=(x)) -#endif - -#define Q15ONE 1.0f - -#define NORM_SCALING 1.f - -#define EPSILON 1e-15f -#define VERY_SMALL 1e-30f -#define VERY_LARGE16 1e15f -#define Q15_ONE ((opus_val16)1.f) - -/* This appears to be the same speed as C99's fabsf() but it's more portable. */ -#define ABS16(x) ((float)fabs(x)) -#define ABS32(x) ((float)fabs(x)) - -#define QCONST16(x,bits) (x) -#define QCONST32(x,bits) (x) - -#define NEG16(x) (-(x)) -#define NEG32(x) (-(x)) -#define EXTRACT16(x) (x) -#define EXTEND32(x) (x) -#define SHR16(a,shift) (a) -#define SHL16(a,shift) (a) -#define SHR32(a,shift) (a) -#define SHL32(a,shift) (a) -#define PSHR32(a,shift) (a) -#define VSHR32(a,shift) (a) - -#define PSHR(a,shift) (a) -#define SHR(a,shift) (a) -#define SHL(a,shift) (a) -#define SATURATE(x,a) (x) -#define SATURATE16(x) (x) - -#define ROUND16(a,shift) (a) -#define HALF16(x) (.5f*(x)) -#define HALF32(x) (.5f*(x)) - -#define ADD16(a,b) ((a)+(b)) -#define SUB16(a,b) ((a)-(b)) -#define ADD32(a,b) ((a)+(b)) -#define SUB32(a,b) ((a)-(b)) -#define MULT16_16_16(a,b) ((a)*(b)) -#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) -#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) - -#define MULT16_32_Q15(a,b) ((a)*(b)) -#define MULT16_32_Q16(a,b) ((a)*(b)) - -#define MULT32_32_Q31(a,b) ((a)*(b)) - -#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) -#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) - -#define MULT16_16_Q11_32(a,b) ((a)*(b)) -#define MULT16_16_Q11(a,b) ((a)*(b)) -#define MULT16_16_Q13(a,b) ((a)*(b)) -#define MULT16_16_Q14(a,b) ((a)*(b)) -#define MULT16_16_Q15(a,b) ((a)*(b)) -#define MULT16_16_P15(a,b) ((a)*(b)) -#define MULT16_16_P13(a,b) ((a)*(b)) -#define MULT16_16_P14(a,b) ((a)*(b)) -#define MULT16_32_P16(a,b) ((a)*(b)) - -#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b)) -#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b)) - -#define SCALEIN(a) ((a)*CELT_SIG_SCALE) -#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) - -#define SIG2WORD16(x) (x) - -#endif /* !FIXED_POINT */ - -#ifndef GLOBAL_STACK_SIZE -#ifdef FIXED_POINT -#define GLOBAL_STACK_SIZE 100000 -#else -#define GLOBAL_STACK_SIZE 100000 -#endif -#endif - -#endif /* ARCH_H */ diff --git a/thirdparty/opus/celt/arm/arm_celt_map.c b/thirdparty/opus/celt/arm/arm_celt_map.c deleted file mode 100644 index 4d4d069a86..0000000000 --- a/thirdparty/opus/celt/arm/arm_celt_map.c +++ /dev/null @@ -1,143 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pitch.h" -#include "kiss_fft.h" -#include "mdct.h" - -#if defined(OPUS_HAVE_RTCD) - -# if defined(FIXED_POINT) -# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ - (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ - (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) -opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int , int) = { - celt_pitch_xcorr_c, /* ARMv4 */ - MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */ - MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ - MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ -}; - -# endif -# else /* !FIXED_POINT */ -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) -void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int) = { - celt_pitch_xcorr_c, /* ARMv4 */ - celt_pitch_xcorr_c, /* EDSP */ - celt_pitch_xcorr_c, /* Media */ - celt_pitch_xcorr_float_neon /* Neon */ -}; -# endif -# endif /* FIXED_POINT */ - -#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \ - defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR) - -void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len -) = { - xcorr_kernel_c, /* ARMv4 */ - xcorr_kernel_c, /* EDSP */ - xcorr_kernel_c, /* Media */ - xcorr_kernel_neon_fixed, /* Neon */ -}; - -#endif - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# if defined(HAVE_ARM_NE10) -# if defined(CUSTOM_MODES) -int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { - opus_fft_alloc_arch_c, /* ARMv4 */ - opus_fft_alloc_arch_c, /* EDSP */ - opus_fft_alloc_arch_c, /* Media */ - opus_fft_alloc_arm_neon /* Neon with NE10 library support */ -}; - -void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = { - opus_fft_free_arch_c, /* ARMv4 */ - opus_fft_free_arch_c, /* EDSP */ - opus_fft_free_arch_c, /* Media */ - opus_fft_free_arm_neon /* Neon with NE10 */ -}; -# endif /* CUSTOM_MODES */ - -void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) = { - opus_fft_c, /* ARMv4 */ - opus_fft_c, /* EDSP */ - opus_fft_c, /* Media */ - opus_fft_neon /* Neon with NE10 */ -}; - -void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) = { - opus_ifft_c, /* ARMv4 */ - opus_ifft_c, /* EDSP */ - opus_ifft_c, /* Media */ - opus_ifft_neon /* Neon with NE10 */ -}; - -void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, - int overlap, int shift, - int stride, int arch) = { - clt_mdct_forward_c, /* ARMv4 */ - clt_mdct_forward_c, /* EDSP */ - clt_mdct_forward_c, /* Media */ - clt_mdct_forward_neon /* Neon with NE10 */ -}; - -void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, - int overlap, int shift, - int stride, int arch) = { - clt_mdct_backward_c, /* ARMv4 */ - clt_mdct_backward_c, /* EDSP */ - clt_mdct_backward_c, /* Media */ - clt_mdct_backward_neon /* Neon with NE10 */ -}; - -# endif /* HAVE_ARM_NE10 */ -# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */ - -#endif /* OPUS_HAVE_RTCD */ diff --git a/thirdparty/opus/celt/arm/armcpu.c b/thirdparty/opus/celt/arm/armcpu.c deleted file mode 100644 index 694a63b78e..0000000000 --- a/thirdparty/opus/celt/arm/armcpu.c +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Original code from libtheora modified to suit to Opus */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef OPUS_HAVE_RTCD - -#include "armcpu.h" -#include "cpu_support.h" -#include "os_support.h" -#include "opus_types.h" -#include "arch.h" - -#define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4) -#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP) -#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA) -#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON) - -#if defined(_MSC_VER) -/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ -# define WIN32_LEAN_AND_MEAN -# define WIN32_EXTRA_LEAN -# include <windows.h> - -static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ - opus_uint32 flags; - flags=0; - /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit - * instructions via their assembled hex code. - * All of these instructions should be essentially nops. */ -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - __try{ - /*PLD [r13]*/ - __emit(0xF5DDF000); - flags|=OPUS_CPU_ARM_EDSP_FLAG; - } - __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ - /*Ignore exception.*/ - } -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - __try{ - /*SHADD8 r3,r3,r3*/ - __emit(0xE6333F93); - flags|=OPUS_CPU_ARM_MEDIA_FLAG; - } - __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ - /*Ignore exception.*/ - } -# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - __try{ - /*VORR q0,q0,q0*/ - __emit(0xF2200150); - flags|=OPUS_CPU_ARM_NEON_FLAG; - } - __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ - /*Ignore exception.*/ - } -# endif -# endif -# endif - return flags; -} - -#elif defined(__linux__) -/* Linux based */ -opus_uint32 opus_cpu_capabilities(void) -{ - opus_uint32 flags = 0; - FILE *cpuinfo; - - /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on - * Android */ - cpuinfo = fopen("/proc/cpuinfo", "r"); - - if(cpuinfo != NULL) - { - /* 512 should be enough for anybody (it's even enough for all the flags that - * x86 has accumulated... so far). */ - char buf[512]; - - while(fgets(buf, 512, cpuinfo) != NULL) - { -# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - /* Search for edsp and neon flag */ - if(memcmp(buf, "Features", 8) == 0) - { - char *p; - p = strstr(buf, " edsp"); - if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_EDSP_FLAG; - -# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - p = strstr(buf, " neon"); - if(p != NULL && (p[5] == ' ' || p[5] == '\n')) - flags |= OPUS_CPU_ARM_NEON_FLAG; -# endif - } -# endif - -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \ - || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - /* Search for media capabilities (>= ARMv6) */ - if(memcmp(buf, "CPU architecture:", 17) == 0) - { - int version; - version = atoi(buf+17); - - if(version >= 6) - flags |= OPUS_CPU_ARM_MEDIA_FLAG; - } -# endif - } - - fclose(cpuinfo); - } - return flags; -} -#else -/* The feature registers which can tell us what the processor supports are - * accessible in priveleged modes only, so we can't have a general user-space - * detection method like on x86.*/ -# error "Configured to use ARM asm but no CPU detection method available for " \ - "your platform. Reconfigure with --disable-rtcd (or send patches)." -#endif - -int opus_select_arch(void) -{ - opus_uint32 flags = opus_cpu_capabilities(); - int arch = 0; - - if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) { - /* Asserts ensure arch values are sequential */ - celt_assert(arch == OPUS_ARCH_ARM_V4); - return arch; - } - arch++; - - if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) { - celt_assert(arch == OPUS_ARCH_ARM_EDSP); - return arch; - } - arch++; - - if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) { - celt_assert(arch == OPUS_ARCH_ARM_MEDIA); - return arch; - } - arch++; - - celt_assert(arch == OPUS_ARCH_ARM_NEON); - return arch; -} - -#endif diff --git a/thirdparty/opus/celt/arm/armcpu.h b/thirdparty/opus/celt/arm/armcpu.h deleted file mode 100644 index 820262ff5f..0000000000 --- a/thirdparty/opus/celt/arm/armcpu.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(ARMCPU_H) -# define ARMCPU_H - -# if defined(OPUS_ARM_MAY_HAVE_EDSP) -# define MAY_HAVE_EDSP(name) name ## _edsp -# else -# define MAY_HAVE_EDSP(name) name ## _c -# endif - -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) -# define MAY_HAVE_MEDIA(name) name ## _media -# else -# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name) -# endif - -# if defined(OPUS_ARM_MAY_HAVE_NEON) -# define MAY_HAVE_NEON(name) name ## _neon -# else -# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name) -# endif - -# if defined(OPUS_ARM_PRESUME_EDSP) -# define PRESUME_EDSP(name) name ## _edsp -# else -# define PRESUME_EDSP(name) name ## _c -# endif - -# if defined(OPUS_ARM_PRESUME_MEDIA) -# define PRESUME_MEDIA(name) name ## _media -# else -# define PRESUME_MEDIA(name) PRESUME_EDSP(name) -# endif - -# if defined(OPUS_ARM_PRESUME_NEON) -# define PRESUME_NEON(name) name ## _neon -# else -# define PRESUME_NEON(name) PRESUME_MEDIA(name) -# endif - -# if defined(OPUS_HAVE_RTCD) -int opus_select_arch(void); - -#define OPUS_ARCH_ARM_V4 (0) -#define OPUS_ARCH_ARM_EDSP (1) -#define OPUS_ARCH_ARM_MEDIA (2) -#define OPUS_ARCH_ARM_NEON (3) - -# endif - -#endif diff --git a/thirdparty/opus/celt/arm/armopts.s.in b/thirdparty/opus/celt/arm/armopts.s.in deleted file mode 100644 index 3d8aaf2754..0000000000 --- a/thirdparty/opus/celt/arm/armopts.s.in +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (C) 2013 Mozilla Corporation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -; Set the following to 1 if we have EDSP instructions -; (LDRD/STRD, etc., ARMv5E and later). -OPUS_ARM_MAY_HAVE_EDSP * @OPUS_ARM_MAY_HAVE_EDSP@ - -; Set the following to 1 if we have ARMv6 media instructions. -OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@ - -; Set the following to 1 if we have NEON (some ARMv7) -OPUS_ARM_MAY_HAVE_NEON * @OPUS_ARM_MAY_HAVE_NEON@ - -END diff --git a/thirdparty/opus/celt/arm/celt_ne10_fft.c b/thirdparty/opus/celt/arm/celt_ne10_fft.c deleted file mode 100644 index 42d96a7117..0000000000 --- a/thirdparty/opus/celt/arm/celt_ne10_fft.c +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file celt_ne10_fft.c - @brief ARM Neon optimizations for fft using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include <NE10_init.h> -#include <NE10_dsp.h> -#include "os_support.h" -#include "kiss_fft.h" -#include "stack_alloc.h" - -#if !defined(FIXED_POINT) -# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon -# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t -# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t -# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32 -# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t -# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon -#else -# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft) -# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t -# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t -# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 -# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32 -# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t -# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon -#endif - -#if defined(CUSTOM_MODES) - -/* nfft lengths in NE10 that support scaled fft */ -# define NE10_FFTSCALED_SUPPORT_MAX 4 -static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = { - 480, 240, 120, 60 -}; - -int opus_fft_alloc_arm_neon(kiss_fft_state *st) -{ - int i; - size_t memneeded = sizeof(struct arch_fft_state); - - st->arch_fft = (arch_fft_state *)opus_alloc(memneeded); - if (!st->arch_fft) - return -1; - - for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) { - if(st->nfft == ne10_fft_scaled_support[i]) - break; - } - if (i == NE10_FFTSCALED_SUPPORT_MAX) { - /* This nfft length (scaled fft) is not supported in NE10 */ - st->arch_fft->is_supported = 0; - st->arch_fft->priv = NULL; - } - else { - st->arch_fft->is_supported = 1; - st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft); - if (st->arch_fft->priv == NULL) { - return -1; - } - } - return 0; -} - -void opus_fft_free_arm_neon(kiss_fft_state *st) -{ - NE10_FFT_CFG_TYPE_T cfg; - - if (!st->arch_fft) - return; - - cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv; - if (cfg) - NE10_FFT_DESTROY_C2C_TYPE(cfg); - opus_free(st->arch_fft); -} -#endif - -void opus_fft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) -{ - NE10_FFT_STATE_TYPE_T state; - NE10_FFT_CFG_TYPE_T cfg = &state; - VARDECL(NE10_FFT_CPX_TYPE_T, buffer); - SAVE_STACK; - ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); - - if (!st->arch_fft->is_supported) { - /* This nfft length (scaled fft) not supported in NE10 */ - opus_fft_c(st, fin, fout); - } - else { - memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); - state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; -#if !defined(FIXED_POINT) - state.is_forward_scaled = 1; - - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 0); -#else - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 0, 1); -#endif - } - RESTORE_STACK; -} - -void opus_ifft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout) -{ - NE10_FFT_STATE_TYPE_T state; - NE10_FFT_CFG_TYPE_T cfg = &state; - VARDECL(NE10_FFT_CPX_TYPE_T, buffer); - SAVE_STACK; - ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T); - - if (!st->arch_fft->is_supported) { - /* This nfft length (scaled fft) not supported in NE10 */ - opus_ifft_c(st, fin, fout); - } - else { - memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T)); - state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0]; -#if !defined(FIXED_POINT) - state.is_backward_scaled = 0; - - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 1); -#else - NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout, - (NE10_FFT_CPX_TYPE_T *)fin, - cfg, 1, 0); -#endif - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/celt/arm/celt_ne10_mdct.c b/thirdparty/opus/celt/arm/celt_ne10_mdct.c deleted file mode 100644 index 293c3efd7a..0000000000 --- a/thirdparty/opus/celt/arm/celt_ne10_mdct.c +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file celt_ne10_mdct.c - @brief ARM Neon optimizations for mdct using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include "kiss_fft.h" -#include "_kiss_fft_guts.h" -#include "mdct.h" -#include "stack_alloc.h" - -void clt_mdct_forward_neon(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, - int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_cpx, f2); - const kiss_fft_state *st = l->kfft[shift]; - const kiss_twiddle_scalar *trig; - - SAVE_STACK; - - N = l->n; - trig = l->trig; - for (i=0;i<shift;i++) - { - N >>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N4, kiss_fft_cpx); - - /* Consider the input to be composed of four blocks: [a, b, c, d] */ - /* Window, shuffle, fold */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); - const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<((overlap+3)>>2);i++) - { - /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); - *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - wp1 = window; - wp2 = window+overlap-1; - for(;i<N4-((overlap+3)>>2);i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = *xp2; - *yp++ = *xp1; - xp1+=2; - xp2-=2; - } - for(;i<N4;i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); - *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - } - /* Pre-rotation */ - { - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &trig[0]; - for(i=0;i<N4;i++) - { - kiss_fft_cpx yc; - kiss_twiddle_scalar t0, t1; - kiss_fft_scalar re, im, yr, yi; - t0 = t[i]; - t1 = t[N4+i]; - re = *yp++; - im = *yp++; - yr = S_MUL(re,t0) - S_MUL(im,t1); - yi = S_MUL(im,t0) + S_MUL(re,t1); - yc.r = yr; - yc.i = yi; - f2[i] = yc; - } - } - - opus_fft(st, f2, (kiss_fft_cpx *)f, arch); - - /* Post-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &trig[0]; - /* Temp pointers to make it really clear to the compiler what we're doing */ - for(i=0;i<N4;i++) - { - kiss_fft_scalar yr, yi; - yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); - yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); - *yp1 = yr; - *yp2 = yi; - fp++; - yp1 += 2*stride; - yp2 -= 2*stride; - } - } - RESTORE_STACK; -} - -void clt_mdct_backward_neon(const mdct_lookup *l, - kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, - int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - const kiss_twiddle_scalar *trig; - const kiss_fft_state *st = l->kfft[shift]; - - N = l->n; - trig = l->trig; - for (i=0;i<shift;i++) - { - N >>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - - /* Pre-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; - for(i=0;i<N4;i++) - { - kiss_fft_scalar yr, yi; - yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); - yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); - yp[2*i] = yr; - yp[2*i+1] = yi; - xp1+=2*stride; - xp2-=2*stride; - } - } - - opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch); - - /* Post-rotate and de-shuffle from both ends of the buffer at once to make - it in-place. */ - { - kiss_fft_scalar * yp0 = out+(overlap>>1); - kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &trig[0]; - /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the - middle pair will be computed twice. */ - for(i=0;i<(N4+1)>>1;i++) - { - kiss_fft_scalar re, im, yr, yi; - kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i]; - t1 = t[N4+i]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - re = yp1[0]; - im = yp1[1]; - yp0[0] = yr; - yp1[1] = yi; - - t0 = t[(N4-i-1)]; - t1 = t[(N2-i-1)]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - yp1[0] = yr; - yp0[1] = yi; - yp0 += 2; - yp1 -= 2; - } - } - - /* Mirror on both sides for TDAC */ - { - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - - for(i = 0; i < overlap/2; i++) - { - kiss_fft_scalar x1, x2; - x1 = *xp1; - x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/celt/arm/celt_neon_intr.c b/thirdparty/opus/celt/arm/celt_neon_intr.c deleted file mode 100644 index 47bbe3dc22..0000000000 --- a/thirdparty/opus/celt/arm/celt_neon_intr.c +++ /dev/null @@ -1,311 +0,0 @@ -/* Copyright (c) 2014-2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file celt_neon_intr.c - @brief ARM Neon Intrinsic optimizations for celt - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#include "../pitch.h" - -#if defined(FIXED_POINT) -void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) -{ - int j; - int32x4_t a = vld1q_s32(sum); - /* Load y[0...3] */ - /* This requires len>0 to always be valid (which we assert in the C code). */ - int16x4_t y0 = vld1_s16(y); - y += 4; - - for (j = 0; j + 8 <= len; j += 8) - { - /* Load x[0...7] */ - int16x8_t xx = vld1q_s16(x); - int16x4_t x0 = vget_low_s16(xx); - int16x4_t x4 = vget_high_s16(xx); - /* Load y[4...11] */ - int16x8_t yy = vld1q_s16(y); - int16x4_t y4 = vget_low_s16(yy); - int16x4_t y8 = vget_high_s16(yy); - int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0); - int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0); - - int16x4_t y1 = vext_s16(y0, y4, 1); - int16x4_t y5 = vext_s16(y4, y8, 1); - int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1); - int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1); - - int16x4_t y2 = vext_s16(y0, y4, 2); - int16x4_t y6 = vext_s16(y4, y8, 2); - int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2); - int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2); - - int16x4_t y3 = vext_s16(y0, y4, 3); - int16x4_t y7 = vext_s16(y4, y8, 3); - int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3); - int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3); - - y0 = y8; - a = a7; - x += 8; - y += 8; - } - - for (; j < len; j++) - { - int16x4_t x0 = vld1_dup_s16(x); /* load next x */ - int32x4_t a0 = vmlal_s16(a, y0, x0); - - int16x4_t y4 = vld1_dup_s16(y); /* load next y */ - y0 = vext_s16(y0, y4, 1); - a = a0; - x++; - y++; - } - - vst1q_s32(sum, a); -} - -#else -/* - * Function: xcorr_kernel_neon_float - * --------------------------------- - * Computes 4 correlation values and stores them in sum[4] - */ -static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y, - float32_t sum[4], int len) { - float32x4_t YY[3]; - float32x4_t YEXT[3]; - float32x4_t XX[2]; - float32x2_t XX_2; - float32x4_t SUMM; - const float32_t *xi = x; - const float32_t *yi = y; - - celt_assert(len>0); - - YY[0] = vld1q_f32(yi); - SUMM = vdupq_n_f32(0); - - /* Consume 8 elements in x vector and 12 elements in y - * vector. However, the 12'th element never really gets - * touched in this loop. So, if len == 8, then we only - * must access y[0] to y[10]. y[11] must not be accessed - * hence make sure len > 8 and not len >= 8 - */ - while (len > 8) { - yi += 4; - YY[1] = vld1q_f32(yi); - yi += 4; - YY[2] = vld1q_f32(yi); - - XX[0] = vld1q_f32(xi); - xi += 4; - XX[1] = vld1q_f32(xi); - xi += 4; - - SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); - YEXT[0] = vextq_f32(YY[0], YY[1], 1); - SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); - YEXT[1] = vextq_f32(YY[0], YY[1], 2); - SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); - YEXT[2] = vextq_f32(YY[0], YY[1], 3); - SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); - - SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0); - YEXT[0] = vextq_f32(YY[1], YY[2], 1); - SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1); - YEXT[1] = vextq_f32(YY[1], YY[2], 2); - SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0); - YEXT[2] = vextq_f32(YY[1], YY[2], 3); - SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1); - - YY[0] = YY[2]; - len -= 8; - } - - /* Consume 4 elements in x vector and 8 elements in y - * vector. However, the 8'th element in y never really gets - * touched in this loop. So, if len == 4, then we only - * must access y[0] to y[6]. y[7] must not be accessed - * hence make sure len>4 and not len>=4 - */ - if (len > 4) { - yi += 4; - YY[1] = vld1q_f32(yi); - - XX[0] = vld1q_f32(xi); - xi += 4; - - SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0); - YEXT[0] = vextq_f32(YY[0], YY[1], 1); - SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1); - YEXT[1] = vextq_f32(YY[0], YY[1], 2); - SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0); - YEXT[2] = vextq_f32(YY[0], YY[1], 3); - SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1); - - YY[0] = YY[1]; - len -= 4; - } - - while (--len > 0) { - XX_2 = vld1_dup_f32(xi++); - SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); - YY[0]= vld1q_f32(++yi); - } - - XX_2 = vld1_dup_f32(xi); - SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0); - - vst1q_f32(sum, SUMM); -} - -/* - * Function: xcorr_kernel_neon_float_process1 - * --------------------------------- - * Computes single correlation values and stores in *sum - */ -static void xcorr_kernel_neon_float_process1(const float32_t *x, - const float32_t *y, float32_t *sum, int len) { - float32x4_t XX[4]; - float32x4_t YY[4]; - float32x2_t XX_2; - float32x2_t YY_2; - float32x4_t SUMM; - float32x2_t SUMM_2[2]; - const float32_t *xi = x; - const float32_t *yi = y; - - SUMM = vdupq_n_f32(0); - - /* Work on 16 values per iteration */ - while (len >= 16) { - XX[0] = vld1q_f32(xi); - xi += 4; - XX[1] = vld1q_f32(xi); - xi += 4; - XX[2] = vld1q_f32(xi); - xi += 4; - XX[3] = vld1q_f32(xi); - xi += 4; - - YY[0] = vld1q_f32(yi); - yi += 4; - YY[1] = vld1q_f32(yi); - yi += 4; - YY[2] = vld1q_f32(yi); - yi += 4; - YY[3] = vld1q_f32(yi); - yi += 4; - - SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); - SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); - SUMM = vmlaq_f32(SUMM, YY[2], XX[2]); - SUMM = vmlaq_f32(SUMM, YY[3], XX[3]); - len -= 16; - } - - /* Work on 8 values */ - if (len >= 8) { - XX[0] = vld1q_f32(xi); - xi += 4; - XX[1] = vld1q_f32(xi); - xi += 4; - - YY[0] = vld1q_f32(yi); - yi += 4; - YY[1] = vld1q_f32(yi); - yi += 4; - - SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); - SUMM = vmlaq_f32(SUMM, YY[1], XX[1]); - len -= 8; - } - - /* Work on 4 values */ - if (len >= 4) { - XX[0] = vld1q_f32(xi); - xi += 4; - YY[0] = vld1q_f32(yi); - yi += 4; - SUMM = vmlaq_f32(SUMM, YY[0], XX[0]); - len -= 4; - } - - /* Start accumulating results */ - SUMM_2[0] = vget_low_f32(SUMM); - if (len >= 2) { - /* While at it, consume 2 more values if available */ - XX_2 = vld1_f32(xi); - xi += 2; - YY_2 = vld1_f32(yi); - yi += 2; - SUMM_2[0] = vmla_f32(SUMM_2[0], YY_2, XX_2); - len -= 2; - } - SUMM_2[1] = vget_high_f32(SUMM); - SUMM_2[0] = vadd_f32(SUMM_2[0], SUMM_2[1]); - SUMM_2[0] = vpadd_f32(SUMM_2[0], SUMM_2[0]); - /* Ok, now we have result accumulated in SUMM_2[0].0 */ - - if (len > 0) { - /* Case when you have one value left */ - XX_2 = vld1_dup_f32(xi); - YY_2 = vld1_dup_f32(yi); - SUMM_2[0] = vmla_f32(SUMM_2[0], XX_2, YY_2); - } - - vst1_lane_f32(sum, SUMM_2[0], 0); -} - -void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch) { - int i; - celt_assert(max_pitch > 0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); - - for (i = 0; i < (max_pitch-3); i += 4) { - xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i, - (float32_t *)xcorr+i, len); - } - - /* In case max_pitch isn't multiple of 4 - * compute single correlation value per iteration - */ - for (; i < max_pitch; i++) { - xcorr_kernel_neon_float_process1((const float32_t *)_x, - (const float32_t *)_y+i, (float32_t *)xcorr+i, len); - } -} -#endif diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S deleted file mode 100644 index 5b2ee55a10..0000000000 --- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm-gnu.S +++ /dev/null @@ -1,551 +0,0 @@ - .syntax unified -@ Copyright (c) 2007-2008 CSIRO -@ Copyright (c) 2007-2009 Xiph.Org Foundation -@ Copyright (c) 2013 Parrot -@ Written by Aurélien Zanelli -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions -@ are met: -@ -@ - Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ -@ - Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -@ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - .text; .p2align 2; .arch armv7-a - .fpu neon - .object_arch armv4t - - .include "celt/arm/armopts-gnu.S" - - .if OPUS_ARM_MAY_HAVE_EDSP - .global celt_pitch_xcorr_edsp - .endif - - .if OPUS_ARM_MAY_HAVE_NEON - .global celt_pitch_xcorr_neon - .endif - - .if OPUS_ARM_MAY_HAVE_NEON - -@ Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 -; xcorr_kernel_neon: @ PROC -xcorr_kernel_neon_start: - @ input: - @ r3 = int len - @ r4 = opus_val16 *x - @ r5 = opus_val16 *y - @ q0 = opus_val32 sum[4] - @ output: - @ q0 = opus_val32 sum[4] - @ preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 - @ internal usage: - @ r12 = int j - @ d3 = y_3|y_2|y_1|y_0 - @ q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 - @ q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 - @ q8 = scratch - @ - @ Load y[0...3] - @ This requires len>0 to always be valid (which we assert in the C code). - VLD1.16 {d5}, [r5]! - SUBS r12, r3, #8 - BLE xcorr_kernel_neon_process4 -@ Process 8 samples at a time. -@ This loop loads one y value more than we actually need. Therefore we have to -@ stop as soon as there are 8 or fewer samples left (instead of 7), to avoid -@ reading past the end of the array. -xcorr_kernel_neon_process8: - @ This loop has 19 total instructions (10 cycles to issue, minimum), with - @ - 2 cycles of ARM insrtuctions, - @ - 10 cycles of load/store/byte permute instructions, and - @ - 9 cycles of data processing instructions. - @ On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the - @ latter two categories, meaning the whole loop should run in 10 cycles per - @ iteration, barring cache misses. - @ - @ Load x[0...7] - VLD1.16 {d6, d7}, [r4]! - @ Unlike VMOV, VAND is a data processsing instruction (and doesn't get - @ assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. - VAND d3, d5, d5 - SUBS r12, r12, #8 - @ Load y[4...11] - VLD1.16 {d4, d5}, [r5]! - VMLAL.S16 q0, d3, d6[0] - VEXT.16 d16, d3, d4, #1 - VMLAL.S16 q0, d4, d7[0] - VEXT.16 d17, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d3, d4, #2 - VMLAL.S16 q0, d17, d7[1] - VEXT.16 d17, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d3, d4, #3 - VMLAL.S16 q0, d17, d7[2] - VEXT.16 d17, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] - VMLAL.S16 q0, d17, d7[3] - BGT xcorr_kernel_neon_process8 -@ Process 4 samples here if we have > 4 left (still reading one extra y value). -xcorr_kernel_neon_process4: - ADDS r12, r12, #4 - BLE xcorr_kernel_neon_process2 - @ Load x[0...3] - VLD1.16 d6, [r4]! - @ Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #4 - @ Load y[4...7] - VLD1.16 d5, [r5]! - VMLAL.S16 q0, d4, d6[0] - VEXT.16 d16, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] -@ Process 2 samples here if we have > 2 left (still reading one extra y value). -xcorr_kernel_neon_process2: - ADDS r12, r12, #2 - BLE xcorr_kernel_neon_process1 - @ Load x[0...1] - VLD2.16 {d6[],d7[]}, [r4]! - @ Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #2 - @ Load y[4...5] - VLD1.32 {d5[]}, [r5]! - VMLAL.S16 q0, d4, d6 - VEXT.16 d16, d4, d5, #1 - @ Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI - @ instead of VEXT, since it's a data-processing instruction. - VSRI.64 d5, d4, #32 - VMLAL.S16 q0, d16, d7 -@ Process 1 sample using the extra y value we loaded above. -xcorr_kernel_neon_process1: - @ Load next *x - VLD1.16 {d6[]}, [r4]! - ADDS r12, r12, #1 - @ y[0...3] are left in d5 from prior iteration(s) (if any) - VMLAL.S16 q0, d5, d6 - MOVLE pc, lr -@ Now process 1 last sample, not reading ahead. - @ Load last *y - VLD1.16 {d4[]}, [r5]! - VSRI.64 d4, d5, #16 - @ Load last *x - VLD1.16 {d6[]}, [r4]! - VMLAL.S16 q0, d4, d6 - MOV pc, lr - .size xcorr_kernel_neon, .-xcorr_kernel_neon @ ENDP - -@ opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, -@ opus_val32 *xcorr, int len, int max_pitch) -; celt_pitch_xcorr_neon: @ PROC - @ input: - @ r0 = opus_val16 *_x - @ r1 = opus_val16 *_y - @ r2 = opus_val32 *xcorr - @ r3 = int len - @ output: - @ r0 = int maxcorr - @ internal usage: - @ r4 = opus_val16 *x (for xcorr_kernel_neon()) - @ r5 = opus_val16 *y (for xcorr_kernel_neon()) - @ r6 = int max_pitch - @ r12 = int j - @ q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) - STMFD sp!, {r4-r6, lr} - LDR r6, [sp, #16] - VMOV.S32 q15, #1 - @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - SUBS r6, r6, #4 - BLT celt_pitch_xcorr_neon_process4_done -celt_pitch_xcorr_neon_process4: - @ xcorr_kernel_neon parameters: - @ r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} - MOV r4, r0 - MOV r5, r1 - VEOR q0, q0, q0 - @ xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. - @ So we don't save/restore any other registers. - BL xcorr_kernel_neon_start - SUBS r6, r6, #4 - VST1.32 {q0}, [r2]! - @ _y += 4 - ADD r1, r1, #8 - VMAX.S32 q15, q15, q0 - @ if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - BGE celt_pitch_xcorr_neon_process4 -@ We have less than 4 sums left to compute. -celt_pitch_xcorr_neon_process4_done: - ADDS r6, r6, #4 - @ Reduce maxcorr to a single value - VMAX.S32 d30, d30, d31 - VPMAX.S32 d30, d30, d30 - @ if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done - BLE celt_pitch_xcorr_neon_done -@ Now compute each remaining sum one at a time. -celt_pitch_xcorr_neon_process_remaining: - MOV r4, r0 - MOV r5, r1 - VMOV.I32 q0, #0 - SUBS r12, r3, #8 - BLT celt_pitch_xcorr_neon_process_remaining4 -@ Sum terms 8 at a time. -celt_pitch_xcorr_neon_process_remaining_loop8: - @ Load x[0...7] - VLD1.16 {q1}, [r4]! - @ Load y[0...7] - VLD1.16 {q2}, [r5]! - SUBS r12, r12, #8 - VMLAL.S16 q0, d4, d2 - VMLAL.S16 q0, d5, d3 - BGE celt_pitch_xcorr_neon_process_remaining_loop8 -@ Sum terms 4 at a time. -celt_pitch_xcorr_neon_process_remaining4: - ADDS r12, r12, #4 - BLT celt_pitch_xcorr_neon_process_remaining4_done - @ Load x[0...3] - VLD1.16 {d2}, [r4]! - @ Load y[0...3] - VLD1.16 {d3}, [r5]! - SUB r12, r12, #4 - VMLAL.S16 q0, d3, d2 -celt_pitch_xcorr_neon_process_remaining4_done: - @ Reduce the sum to a single value. - VADD.S32 d0, d0, d1 - VPADDL.S32 d0, d0 - ADDS r12, r12, #4 - BLE celt_pitch_xcorr_neon_process_remaining_loop_done -@ Sum terms 1 at a time. -celt_pitch_xcorr_neon_process_remaining_loop1: - VLD1.16 {d2[]}, [r4]! - VLD1.16 {d3[]}, [r5]! - SUBS r12, r12, #1 - VMLAL.S16 q0, d2, d3 - BGT celt_pitch_xcorr_neon_process_remaining_loop1 -celt_pitch_xcorr_neon_process_remaining_loop_done: - VST1.32 {d0[0]}, [r2]! - VMAX.S32 d30, d30, d0 - SUBS r6, r6, #1 - @ _y++ - ADD r1, r1, #2 - @ if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining - BGT celt_pitch_xcorr_neon_process_remaining -celt_pitch_xcorr_neon_done: - VMOV.32 r0, d30[0] - LDMFD sp!, {r4-r6, pc} - .size celt_pitch_xcorr_neon, .-celt_pitch_xcorr_neon @ ENDP - - .endif - - .if OPUS_ARM_MAY_HAVE_EDSP - -@ This will get used on ARMv7 devices without NEON, so it has been optimized -@ to take advantage of dual-issuing where possible. -; xcorr_kernel_edsp: @ PROC -xcorr_kernel_edsp_start: - @ input: - @ r3 = int len - @ r4 = opus_val16 *_x (must be 32-bit aligned) - @ r5 = opus_val16 *_y (must be 32-bit aligned) - @ r6...r9 = opus_val32 sum[4] - @ output: - @ r6...r9 = opus_val32 sum[4] - @ preserved: r0-r5 - @ internal usage - @ r2 = int j - @ r12,r14 = opus_val16 x[4] - @ r10,r11 = opus_val16 y[4] - STMFD sp!, {r2,r4,r5,lr} - LDR r10, [r5], #4 @ Load y[0...1] - SUBS r2, r3, #4 @ j = len-4 - LDR r11, [r5], #4 @ Load y[2...3] - BLE xcorr_kernel_edsp_process4_done - LDR r12, [r4], #4 @ Load x[0...1] - @ Stall -xcorr_kernel_edsp_process4: - @ The multiplies must issue from pipeline 0, and can't dual-issue with each - @ other. Every other instruction here dual-issues with a multiply, and is - @ thus "free". There should be no stalls in the body of the loop. - SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_0,y_0) - LDR r14, [r4], #4 @ Load x[2...3] - SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x_0,y_1) - SUBS r2, r2, #4 @ j-=4 - SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_0,y_2) - SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x_0,y_3) - SMLATT r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x_1,y_1) - LDR r10, [r5], #4 @ Load y[4...5] - SMLATB r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],x_1,y_2) - SMLATT r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x_1,y_3) - SMLATB r9, r12, r10, r9 @ sum[3] = MAC16_16(sum[3],x_1,y_4) - LDRGT r12, [r4], #4 @ Load x[0...1] - SMLABB r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_2,y_2) - SMLABT r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x_2,y_3) - SMLABB r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_2,y_4) - SMLABT r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x_2,y_5) - SMLATT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],x_3,y_3) - LDR r11, [r5], #4 @ Load y[6...7] - SMLATB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],x_3,y_4) - SMLATT r8, r14, r10, r8 @ sum[2] = MAC16_16(sum[2],x_3,y_5) - SMLATB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],x_3,y_6) - BGT xcorr_kernel_edsp_process4 -xcorr_kernel_edsp_process4_done: - ADDS r2, r2, #4 - BLE xcorr_kernel_edsp_done - LDRH r12, [r4], #2 @ r12 = *x++ - SUBS r2, r2, #1 @ j-- - @ Stall - SMLABB r6, r12, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_0) - LDRHGT r14, [r4], #2 @ r14 = *x++ - SMLABT r7, r12, r10, r7 @ sum[1] = MAC16_16(sum[1],x,y_1) - SMLABB r8, r12, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_2) - SMLABT r9, r12, r11, r9 @ sum[3] = MAC16_16(sum[3],x,y_3) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r10, r6 @ sum[0] = MAC16_16(sum[0],x,y_1) - SUBS r2, r2, #1 @ j-- - SMLABB r7, r14, r11, r7 @ sum[1] = MAC16_16(sum[1],x,y_2) - LDRH r10, [r5], #2 @ r10 = y_4 = *y++ - SMLABT r8, r14, r11, r8 @ sum[2] = MAC16_16(sum[2],x,y_3) - LDRHGT r12, [r4], #2 @ r12 = *x++ - SMLABB r9, r14, r10, r9 @ sum[3] = MAC16_16(sum[3],x,y_4) - BLE xcorr_kernel_edsp_done - SMLABB r6, r12, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_2) - CMP r2, #1 @ j-- - SMLABT r7, r12, r11, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_3) - LDRH r2, [r5], #2 @ r2 = y_5 = *y++ - SMLABB r8, r12, r10, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_4) - LDRHGT r14, [r4] @ r14 = *x - SMLABB r9, r12, r2, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_5) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r11, r6 @ sum[0] = MAC16_16(sum[0],tmp,y_3) - LDRH r11, [r5] @ r11 = y_6 = *y - SMLABB r7, r14, r10, r7 @ sum[1] = MAC16_16(sum[1],tmp,y_4) - SMLABB r8, r14, r2, r8 @ sum[2] = MAC16_16(sum[2],tmp,y_5) - SMLABB r9, r14, r11, r9 @ sum[3] = MAC16_16(sum[3],tmp,y_6) -xcorr_kernel_edsp_done: - LDMFD sp!, {r2,r4,r5,pc} - .size xcorr_kernel_edsp, .-xcorr_kernel_edsp @ ENDP - -; celt_pitch_xcorr_edsp: @ PROC - @ input: - @ r0 = opus_val16 *_x (must be 32-bit aligned) - @ r1 = opus_val16 *_y (only needs to be 16-bit aligned) - @ r2 = opus_val32 *xcorr - @ r3 = int len - @ output: - @ r0 = maxcorr - @ internal usage - @ r4 = opus_val16 *x - @ r5 = opus_val16 *y - @ r6 = opus_val32 sum0 - @ r7 = opus_val32 sum1 - @ r8 = opus_val32 sum2 - @ r9 = opus_val32 sum3 - @ r1 = int max_pitch - @ r12 = int j - STMFD sp!, {r4-r11, lr} - MOV r5, r1 - LDR r1, [sp, #36] - MOV r4, r0 - TST r5, #3 - @ maxcorr = 1 - MOV r0, #1 - BEQ celt_pitch_xcorr_edsp_process1u_done -@ Compute one sum at the start to make y 32-bit aligned. - SUBS r12, r3, #4 - @ r14 = sum = 0 - MOV r14, #0 - LDRH r8, [r5], #2 - BLE celt_pitch_xcorr_edsp_process1u_loop4_done - LDR r6, [r4], #4 - MOV r8, r8, LSL #16 -celt_pitch_xcorr_edsp_process1u_loop4: - LDR r9, [r5], #4 - SMLABT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) - LDR r7, [r4], #4 - SMLATB r14, r6, r9, r14 @ sum = MAC16_16(sum, x_1, y_1) - LDR r8, [r5], #4 - SMLABT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) - SUBS r12, r12, #4 @ j-=4 - SMLATB r14, r7, r8, r14 @ sum = MAC16_16(sum, x_3, y_3) - LDRGT r6, [r4], #4 - BGT celt_pitch_xcorr_edsp_process1u_loop4 - MOV r8, r8, LSR #16 -celt_pitch_xcorr_edsp_process1u_loop4_done: - ADDS r12, r12, #4 -celt_pitch_xcorr_edsp_process1u_loop1: - LDRHGE r6, [r4], #2 - @ Stall - SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) - SUBSGE r12, r12, #1 - LDRHGT r8, [r5], #2 - BGT celt_pitch_xcorr_edsp_process1u_loop1 - @ Restore _x - SUB r4, r4, r3, LSL #1 - @ Restore and advance _y - SUB r5, r5, r3, LSL #1 - @ maxcorr = max(maxcorr, sum) - CMP r0, r14 - ADD r5, r5, #2 - MOVLT r0, r14 - SUBS r1, r1, #1 - @ xcorr[i] = sum - STR r14, [r2], #4 - BLE celt_pitch_xcorr_edsp_done -celt_pitch_xcorr_edsp_process1u_done: - @ if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 - SUBS r1, r1, #4 - BLT celt_pitch_xcorr_edsp_process2 -celt_pitch_xcorr_edsp_process4: - @ xcorr_kernel_edsp parameters: - @ r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} - MOV r6, #0 - MOV r7, #0 - MOV r8, #0 - MOV r9, #0 - BL xcorr_kernel_edsp_start @ xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) - @ maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) - CMP r0, r6 - @ _y+=4 - ADD r5, r5, #8 - MOVLT r0, r6 - CMP r0, r7 - MOVLT r0, r7 - CMP r0, r8 - MOVLT r0, r8 - CMP r0, r9 - MOVLT r0, r9 - STMIA r2!, {r6-r9} - SUBS r1, r1, #4 - BGE celt_pitch_xcorr_edsp_process4 -celt_pitch_xcorr_edsp_process2: - ADDS r1, r1, #2 - BLT celt_pitch_xcorr_edsp_process1a - SUBS r12, r3, #4 - @ {r10, r11} = {sum0, sum1} = {0, 0} - MOV r10, #0 - MOV r11, #0 - LDR r8, [r5], #4 - BLE celt_pitch_xcorr_edsp_process2_loop_done - LDR r6, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process2_loop4: - SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) - LDR r7, [r4], #4 - SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) - SUBS r12, r12, #4 @ j-=4 - SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) - LDR r8, [r5], #4 - SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) - LDRGT r6, [r4], #4 - SMLABB r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_2, y_2) - SMLABT r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_2, y_3) - SMLATT r10, r7, r9, r10 @ sum0 = MAC16_16(sum0, x_3, y_3) - LDRGT r9, [r5], #4 - SMLATB r11, r7, r8, r11 @ sum1 = MAC16_16(sum1, x_3, y_4) - BGT celt_pitch_xcorr_edsp_process2_loop4 -celt_pitch_xcorr_edsp_process2_loop_done: - ADDS r12, r12, #2 - BLE celt_pitch_xcorr_edsp_process2_1 - LDR r6, [r4], #4 - @ Stall - SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) - LDR r9, [r5], #4 - SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) - SUB r12, r12, #2 - SMLATT r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_1, y_1) - MOV r8, r9 - SMLATB r11, r6, r9, r11 @ sum1 = MAC16_16(sum1, x_1, y_2) -celt_pitch_xcorr_edsp_process2_1: - LDRH r6, [r4], #2 - ADDS r12, r12, #1 - @ Stall - SMLABB r10, r6, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_0) - LDRHGT r7, [r4], #2 - SMLABT r11, r6, r8, r11 @ sum1 = MAC16_16(sum1, x_0, y_1) - BLE celt_pitch_xcorr_edsp_process2_done - LDRH r9, [r5], #2 - SMLABT r10, r7, r8, r10 @ sum0 = MAC16_16(sum0, x_0, y_1) - SMLABB r11, r7, r9, r11 @ sum1 = MAC16_16(sum1, x_0, y_2) -celt_pitch_xcorr_edsp_process2_done: - @ Restore _x - SUB r4, r4, r3, LSL #1 - @ Restore and advance _y - SUB r5, r5, r3, LSL #1 - @ maxcorr = max(maxcorr, sum0) - CMP r0, r10 - ADD r5, r5, #2 - MOVLT r0, r10 - SUB r1, r1, #2 - @ maxcorr = max(maxcorr, sum1) - CMP r0, r11 - @ xcorr[i] = sum - STR r10, [r2], #4 - MOVLT r0, r11 - STR r11, [r2], #4 -celt_pitch_xcorr_edsp_process1a: - ADDS r1, r1, #1 - BLT celt_pitch_xcorr_edsp_done - SUBS r12, r3, #4 - @ r14 = sum = 0 - MOV r14, #0 - BLT celt_pitch_xcorr_edsp_process1a_loop_done - LDR r6, [r4], #4 - LDR r8, [r5], #4 - LDR r7, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process1a_loop4: - SMLABB r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) - SUBS r12, r12, #4 @ j-=4 - SMLATT r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) - LDRGE r6, [r4], #4 - SMLABB r14, r7, r9, r14 @ sum = MAC16_16(sum, x_2, y_2) - LDRGE r8, [r5], #4 - SMLATT r14, r7, r9, r14 @ sum = MAC16_16(sum, x_3, y_3) - LDRGE r7, [r4], #4 - LDRGE r9, [r5], #4 - BGE celt_pitch_xcorr_edsp_process1a_loop4 -celt_pitch_xcorr_edsp_process1a_loop_done: - ADDS r12, r12, #2 - LDRGE r6, [r4], #4 - LDRGE r8, [r5], #4 - @ Stall - SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_0, y_0) - SUBGE r12, r12, #2 - SMLATTGE r14, r6, r8, r14 @ sum = MAC16_16(sum, x_1, y_1) - ADDS r12, r12, #1 - LDRHGE r6, [r4], #2 - LDRHGE r8, [r5], #2 - @ Stall - SMLABBGE r14, r6, r8, r14 @ sum = MAC16_16(sum, *x, *y) - @ maxcorr = max(maxcorr, sum) - CMP r0, r14 - @ xcorr[i] = sum - STR r14, [r2], #4 - MOVLT r0, r14 -celt_pitch_xcorr_edsp_done: - LDMFD sp!, {r4-r11, pc} - .size celt_pitch_xcorr_edsp, .-celt_pitch_xcorr_edsp @ ENDP - - .endif - -@ END: - .section .note.GNU-stack,"",%progbits diff --git a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s b/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s deleted file mode 100644 index f96e0a88bb..0000000000 --- a/thirdparty/opus/celt/arm/celt_pitch_xcorr_arm.s +++ /dev/null @@ -1,547 +0,0 @@ -; Copyright (c) 2007-2008 CSIRO -; Copyright (c) 2007-2009 Xiph.Org Foundation -; Copyright (c) 2013 Parrot -; Written by Aurélien Zanelli -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - AREA |.text|, CODE, READONLY - - GET celt/arm/armopts.s - -IF OPUS_ARM_MAY_HAVE_EDSP - EXPORT celt_pitch_xcorr_edsp -ENDIF - -IF OPUS_ARM_MAY_HAVE_NEON - EXPORT celt_pitch_xcorr_neon -ENDIF - -IF OPUS_ARM_MAY_HAVE_NEON - -; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 -xcorr_kernel_neon PROC -xcorr_kernel_neon_start - ; input: - ; r3 = int len - ; r4 = opus_val16 *x - ; r5 = opus_val16 *y - ; q0 = opus_val32 sum[4] - ; output: - ; q0 = opus_val32 sum[4] - ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 - ; internal usage: - ; r12 = int j - ; d3 = y_3|y_2|y_1|y_0 - ; q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 - ; q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 - ; q8 = scratch - ; - ; Load y[0...3] - ; This requires len>0 to always be valid (which we assert in the C code). - VLD1.16 {d5}, [r5]! - SUBS r12, r3, #8 - BLE xcorr_kernel_neon_process4 -; Process 8 samples at a time. -; This loop loads one y value more than we actually need. Therefore we have to -; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid -; reading past the end of the array. -xcorr_kernel_neon_process8 - ; This loop has 19 total instructions (10 cycles to issue, minimum), with - ; - 2 cycles of ARM insrtuctions, - ; - 10 cycles of load/store/byte permute instructions, and - ; - 9 cycles of data processing instructions. - ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the - ; latter two categories, meaning the whole loop should run in 10 cycles per - ; iteration, barring cache misses. - ; - ; Load x[0...7] - VLD1.16 {d6, d7}, [r4]! - ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get - ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. - VAND d3, d5, d5 - SUBS r12, r12, #8 - ; Load y[4...11] - VLD1.16 {d4, d5}, [r5]! - VMLAL.S16 q0, d3, d6[0] - VEXT.16 d16, d3, d4, #1 - VMLAL.S16 q0, d4, d7[0] - VEXT.16 d17, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d3, d4, #2 - VMLAL.S16 q0, d17, d7[1] - VEXT.16 d17, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d3, d4, #3 - VMLAL.S16 q0, d17, d7[2] - VEXT.16 d17, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] - VMLAL.S16 q0, d17, d7[3] - BGT xcorr_kernel_neon_process8 -; Process 4 samples here if we have > 4 left (still reading one extra y value). -xcorr_kernel_neon_process4 - ADDS r12, r12, #4 - BLE xcorr_kernel_neon_process2 - ; Load x[0...3] - VLD1.16 d6, [r4]! - ; Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #4 - ; Load y[4...7] - VLD1.16 d5, [r5]! - VMLAL.S16 q0, d4, d6[0] - VEXT.16 d16, d4, d5, #1 - VMLAL.S16 q0, d16, d6[1] - VEXT.16 d16, d4, d5, #2 - VMLAL.S16 q0, d16, d6[2] - VEXT.16 d16, d4, d5, #3 - VMLAL.S16 q0, d16, d6[3] -; Process 2 samples here if we have > 2 left (still reading one extra y value). -xcorr_kernel_neon_process2 - ADDS r12, r12, #2 - BLE xcorr_kernel_neon_process1 - ; Load x[0...1] - VLD2.16 {d6[],d7[]}, [r4]! - ; Use VAND since it's a data processing instruction again. - VAND d4, d5, d5 - SUB r12, r12, #2 - ; Load y[4...5] - VLD1.32 {d5[]}, [r5]! - VMLAL.S16 q0, d4, d6 - VEXT.16 d16, d4, d5, #1 - ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI - ; instead of VEXT, since it's a data-processing instruction. - VSRI.64 d5, d4, #32 - VMLAL.S16 q0, d16, d7 -; Process 1 sample using the extra y value we loaded above. -xcorr_kernel_neon_process1 - ; Load next *x - VLD1.16 {d6[]}, [r4]! - ADDS r12, r12, #1 - ; y[0...3] are left in d5 from prior iteration(s) (if any) - VMLAL.S16 q0, d5, d6 - MOVLE pc, lr -; Now process 1 last sample, not reading ahead. - ; Load last *y - VLD1.16 {d4[]}, [r5]! - VSRI.64 d4, d5, #16 - ; Load last *x - VLD1.16 {d6[]}, [r4]! - VMLAL.S16 q0, d4, d6 - MOV pc, lr - ENDP - -; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, -; opus_val32 *xcorr, int len, int max_pitch) -celt_pitch_xcorr_neon PROC - ; input: - ; r0 = opus_val16 *_x - ; r1 = opus_val16 *_y - ; r2 = opus_val32 *xcorr - ; r3 = int len - ; output: - ; r0 = int maxcorr - ; internal usage: - ; r4 = opus_val16 *x (for xcorr_kernel_neon()) - ; r5 = opus_val16 *y (for xcorr_kernel_neon()) - ; r6 = int max_pitch - ; r12 = int j - ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) - STMFD sp!, {r4-r6, lr} - LDR r6, [sp, #16] - VMOV.S32 q15, #1 - ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - SUBS r6, r6, #4 - BLT celt_pitch_xcorr_neon_process4_done -celt_pitch_xcorr_neon_process4 - ; xcorr_kernel_neon parameters: - ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} - MOV r4, r0 - MOV r5, r1 - VEOR q0, q0, q0 - ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. - ; So we don't save/restore any other registers. - BL xcorr_kernel_neon_start - SUBS r6, r6, #4 - VST1.32 {q0}, [r2]! - ; _y += 4 - ADD r1, r1, #8 - VMAX.S32 q15, q15, q0 - ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done - BGE celt_pitch_xcorr_neon_process4 -; We have less than 4 sums left to compute. -celt_pitch_xcorr_neon_process4_done - ADDS r6, r6, #4 - ; Reduce maxcorr to a single value - VMAX.S32 d30, d30, d31 - VPMAX.S32 d30, d30, d30 - ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done - BLE celt_pitch_xcorr_neon_done -; Now compute each remaining sum one at a time. -celt_pitch_xcorr_neon_process_remaining - MOV r4, r0 - MOV r5, r1 - VMOV.I32 q0, #0 - SUBS r12, r3, #8 - BLT celt_pitch_xcorr_neon_process_remaining4 -; Sum terms 8 at a time. -celt_pitch_xcorr_neon_process_remaining_loop8 - ; Load x[0...7] - VLD1.16 {q1}, [r4]! - ; Load y[0...7] - VLD1.16 {q2}, [r5]! - SUBS r12, r12, #8 - VMLAL.S16 q0, d4, d2 - VMLAL.S16 q0, d5, d3 - BGE celt_pitch_xcorr_neon_process_remaining_loop8 -; Sum terms 4 at a time. -celt_pitch_xcorr_neon_process_remaining4 - ADDS r12, r12, #4 - BLT celt_pitch_xcorr_neon_process_remaining4_done - ; Load x[0...3] - VLD1.16 {d2}, [r4]! - ; Load y[0...3] - VLD1.16 {d3}, [r5]! - SUB r12, r12, #4 - VMLAL.S16 q0, d3, d2 -celt_pitch_xcorr_neon_process_remaining4_done - ; Reduce the sum to a single value. - VADD.S32 d0, d0, d1 - VPADDL.S32 d0, d0 - ADDS r12, r12, #4 - BLE celt_pitch_xcorr_neon_process_remaining_loop_done -; Sum terms 1 at a time. -celt_pitch_xcorr_neon_process_remaining_loop1 - VLD1.16 {d2[]}, [r4]! - VLD1.16 {d3[]}, [r5]! - SUBS r12, r12, #1 - VMLAL.S16 q0, d2, d3 - BGT celt_pitch_xcorr_neon_process_remaining_loop1 -celt_pitch_xcorr_neon_process_remaining_loop_done - VST1.32 {d0[0]}, [r2]! - VMAX.S32 d30, d30, d0 - SUBS r6, r6, #1 - ; _y++ - ADD r1, r1, #2 - ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining - BGT celt_pitch_xcorr_neon_process_remaining -celt_pitch_xcorr_neon_done - VMOV.32 r0, d30[0] - LDMFD sp!, {r4-r6, pc} - ENDP - -ENDIF - -IF OPUS_ARM_MAY_HAVE_EDSP - -; This will get used on ARMv7 devices without NEON, so it has been optimized -; to take advantage of dual-issuing where possible. -xcorr_kernel_edsp PROC -xcorr_kernel_edsp_start - ; input: - ; r3 = int len - ; r4 = opus_val16 *_x (must be 32-bit aligned) - ; r5 = opus_val16 *_y (must be 32-bit aligned) - ; r6...r9 = opus_val32 sum[4] - ; output: - ; r6...r9 = opus_val32 sum[4] - ; preserved: r0-r5 - ; internal usage - ; r2 = int j - ; r12,r14 = opus_val16 x[4] - ; r10,r11 = opus_val16 y[4] - STMFD sp!, {r2,r4,r5,lr} - LDR r10, [r5], #4 ; Load y[0...1] - SUBS r2, r3, #4 ; j = len-4 - LDR r11, [r5], #4 ; Load y[2...3] - BLE xcorr_kernel_edsp_process4_done - LDR r12, [r4], #4 ; Load x[0...1] - ; Stall -xcorr_kernel_edsp_process4 - ; The multiplies must issue from pipeline 0, and can't dual-issue with each - ; other. Every other instruction here dual-issues with a multiply, and is - ; thus "free". There should be no stalls in the body of the loop. - SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_0,y_0) - LDR r14, [r4], #4 ; Load x[2...3] - SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x_0,y_1) - SUBS r2, r2, #4 ; j-=4 - SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_0,y_2) - SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x_0,y_3) - SMLATT r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_1,y_1) - LDR r10, [r5], #4 ; Load y[4...5] - SMLATB r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],x_1,y_2) - SMLATT r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_1,y_3) - SMLATB r9, r12, r10, r9 ; sum[3] = MAC16_16(sum[3],x_1,y_4) - LDRGT r12, [r4], #4 ; Load x[0...1] - SMLABB r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_2,y_2) - SMLABT r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x_2,y_3) - SMLABB r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_2,y_4) - SMLABT r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x_2,y_5) - SMLATT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_3,y_3) - LDR r11, [r5], #4 ; Load y[6...7] - SMLATB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],x_3,y_4) - SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5) - SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6) - BGT xcorr_kernel_edsp_process4 -xcorr_kernel_edsp_process4_done - ADDS r2, r2, #4 - BLE xcorr_kernel_edsp_done - LDRH r12, [r4], #2 ; r12 = *x++ - SUBS r2, r2, #1 ; j-- - ; Stall - SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) - LDRHGT r14, [r4], #2 ; r14 = *x++ - SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) - SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) - SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1) - SUBS r2, r2, #1 ; j-- - SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) - LDRH r10, [r5], #2 ; r10 = y_4 = *y++ - SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) - LDRHGT r12, [r4], #2 ; r12 = *x++ - SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) - BLE xcorr_kernel_edsp_done - SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) - CMP r2, #1 ; j-- - SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) - LDRH r2, [r5], #2 ; r2 = y_5 = *y++ - SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) - LDRHGT r14, [r4] ; r14 = *x - SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) - BLE xcorr_kernel_edsp_done - SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) - LDRH r11, [r5] ; r11 = y_6 = *y - SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4) - SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5) - SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6) -xcorr_kernel_edsp_done - LDMFD sp!, {r2,r4,r5,pc} - ENDP - -celt_pitch_xcorr_edsp PROC - ; input: - ; r0 = opus_val16 *_x (must be 32-bit aligned) - ; r1 = opus_val16 *_y (only needs to be 16-bit aligned) - ; r2 = opus_val32 *xcorr - ; r3 = int len - ; output: - ; r0 = maxcorr - ; internal usage - ; r4 = opus_val16 *x - ; r5 = opus_val16 *y - ; r6 = opus_val32 sum0 - ; r7 = opus_val32 sum1 - ; r8 = opus_val32 sum2 - ; r9 = opus_val32 sum3 - ; r1 = int max_pitch - ; r12 = int j - STMFD sp!, {r4-r11, lr} - MOV r5, r1 - LDR r1, [sp, #36] - MOV r4, r0 - TST r5, #3 - ; maxcorr = 1 - MOV r0, #1 - BEQ celt_pitch_xcorr_edsp_process1u_done -; Compute one sum at the start to make y 32-bit aligned. - SUBS r12, r3, #4 - ; r14 = sum = 0 - MOV r14, #0 - LDRH r8, [r5], #2 - BLE celt_pitch_xcorr_edsp_process1u_loop4_done - LDR r6, [r4], #4 - MOV r8, r8, LSL #16 -celt_pitch_xcorr_edsp_process1u_loop4 - LDR r9, [r5], #4 - SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - LDR r7, [r4], #4 - SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1) - LDR r8, [r5], #4 - SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) - SUBS r12, r12, #4 ; j-=4 - SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) - LDRGT r6, [r4], #4 - BGT celt_pitch_xcorr_edsp_process1u_loop4 - MOV r8, r8, LSR #16 -celt_pitch_xcorr_edsp_process1u_loop4_done - ADDS r12, r12, #4 -celt_pitch_xcorr_edsp_process1u_loop1 - LDRHGE r6, [r4], #2 - ; Stall - SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) - SUBSGE r12, r12, #1 - LDRHGT r8, [r5], #2 - BGT celt_pitch_xcorr_edsp_process1u_loop1 - ; Restore _x - SUB r4, r4, r3, LSL #1 - ; Restore and advance _y - SUB r5, r5, r3, LSL #1 - ; maxcorr = max(maxcorr, sum) - CMP r0, r14 - ADD r5, r5, #2 - MOVLT r0, r14 - SUBS r1, r1, #1 - ; xcorr[i] = sum - STR r14, [r2], #4 - BLE celt_pitch_xcorr_edsp_done -celt_pitch_xcorr_edsp_process1u_done - ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 - SUBS r1, r1, #4 - BLT celt_pitch_xcorr_edsp_process2 -celt_pitch_xcorr_edsp_process4 - ; xcorr_kernel_edsp parameters: - ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} - MOV r6, #0 - MOV r7, #0 - MOV r8, #0 - MOV r9, #0 - BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) - ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) - CMP r0, r6 - ; _y+=4 - ADD r5, r5, #8 - MOVLT r0, r6 - CMP r0, r7 - MOVLT r0, r7 - CMP r0, r8 - MOVLT r0, r8 - CMP r0, r9 - MOVLT r0, r9 - STMIA r2!, {r6-r9} - SUBS r1, r1, #4 - BGE celt_pitch_xcorr_edsp_process4 -celt_pitch_xcorr_edsp_process2 - ADDS r1, r1, #2 - BLT celt_pitch_xcorr_edsp_process1a - SUBS r12, r3, #4 - ; {r10, r11} = {sum0, sum1} = {0, 0} - MOV r10, #0 - MOV r11, #0 - LDR r8, [r5], #4 - BLE celt_pitch_xcorr_edsp_process2_loop_done - LDR r6, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process2_loop4 - SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDR r7, [r4], #4 - SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) - SUBS r12, r12, #4 ; j-=4 - SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) - LDR r8, [r5], #4 - SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) - LDRGT r6, [r4], #4 - SMLABB r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_2, y_2) - SMLABT r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_2, y_3) - SMLATT r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_3, y_3) - LDRGT r9, [r5], #4 - SMLATB r11, r7, r8, r11 ; sum1 = MAC16_16(sum1, x_3, y_4) - BGT celt_pitch_xcorr_edsp_process2_loop4 -celt_pitch_xcorr_edsp_process2_loop_done - ADDS r12, r12, #2 - BLE celt_pitch_xcorr_edsp_process2_1 - LDR r6, [r4], #4 - ; Stall - SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDR r9, [r5], #4 - SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) - SUB r12, r12, #2 - SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) - MOV r8, r9 - SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) -celt_pitch_xcorr_edsp_process2_1 - LDRH r6, [r4], #2 - ADDS r12, r12, #1 - ; Stall - SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) - LDRHGT r7, [r4], #2 - SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) - BLE celt_pitch_xcorr_edsp_process2_done - LDRH r9, [r5], #2 - SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1) - SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2) -celt_pitch_xcorr_edsp_process2_done - ; Restore _x - SUB r4, r4, r3, LSL #1 - ; Restore and advance _y - SUB r5, r5, r3, LSL #1 - ; maxcorr = max(maxcorr, sum0) - CMP r0, r10 - ADD r5, r5, #2 - MOVLT r0, r10 - SUB r1, r1, #2 - ; maxcorr = max(maxcorr, sum1) - CMP r0, r11 - ; xcorr[i] = sum - STR r10, [r2], #4 - MOVLT r0, r11 - STR r11, [r2], #4 -celt_pitch_xcorr_edsp_process1a - ADDS r1, r1, #1 - BLT celt_pitch_xcorr_edsp_done - SUBS r12, r3, #4 - ; r14 = sum = 0 - MOV r14, #0 - BLT celt_pitch_xcorr_edsp_process1a_loop_done - LDR r6, [r4], #4 - LDR r8, [r5], #4 - LDR r7, [r4], #4 - LDR r9, [r5], #4 -celt_pitch_xcorr_edsp_process1a_loop4 - SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - SUBS r12, r12, #4 ; j-=4 - SMLATT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) - LDRGE r6, [r4], #4 - SMLABB r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) - LDRGE r8, [r5], #4 - SMLATT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_3, y_3) - LDRGE r7, [r4], #4 - LDRGE r9, [r5], #4 - BGE celt_pitch_xcorr_edsp_process1a_loop4 -celt_pitch_xcorr_edsp_process1a_loop_done - ADDS r12, r12, #2 - LDRGE r6, [r4], #4 - LDRGE r8, [r5], #4 - ; Stall - SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) - SUBGE r12, r12, #2 - SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) - ADDS r12, r12, #1 - LDRHGE r6, [r4], #2 - LDRHGE r8, [r5], #2 - ; Stall - SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) - ; maxcorr = max(maxcorr, sum) - CMP r0, r14 - ; xcorr[i] = sum - STR r14, [r2], #4 - MOVLT r0, r14 -celt_pitch_xcorr_edsp_done - LDMFD sp!, {r4-r11, pc} - ENDP - -ENDIF - -END diff --git a/thirdparty/opus/celt/arm/fft_arm.h b/thirdparty/opus/celt/arm/fft_arm.h deleted file mode 100644 index 0cb55d8e22..0000000000 --- a/thirdparty/opus/celt/arm/fft_arm.h +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file fft_arm.h - @brief ARM Neon Intrinsic optimizations for fft using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#if !defined(FFT_ARM_H) -#define FFT_ARM_H - -#include "config.h" -#include "kiss_fft.h" - -#if defined(HAVE_ARM_NE10) - -int opus_fft_alloc_arm_neon(kiss_fft_state *st); -void opus_fft_free_arm_neon(kiss_fft_state *st); - -void opus_fft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout); - -void opus_ifft_neon(const kiss_fft_state *st, - const kiss_fft_cpx *fin, - kiss_fft_cpx *fout); - -#if !defined(OPUS_HAVE_RTCD) -#define OVERRIDE_OPUS_FFT (1) - -#define opus_fft_alloc_arch(_st, arch) \ - ((void)(arch), opus_fft_alloc_arm_neon(_st)) - -#define opus_fft_free_arch(_st, arch) \ - ((void)(arch), opus_fft_free_arm_neon(_st)) - -#define opus_fft(_st, _fin, _fout, arch) \ - ((void)(arch), opus_fft_neon(_st, _fin, _fout)) - -#define opus_ifft(_st, _fin, _fout, arch) \ - ((void)(arch), opus_ifft_neon(_st, _fin, _fout)) - -#endif /* OPUS_HAVE_RTCD */ - -#endif /* HAVE_ARM_NE10 */ - -#endif diff --git a/thirdparty/opus/celt/arm/fixed_arm64.h b/thirdparty/opus/celt/arm/fixed_arm64.h deleted file mode 100644 index c6fbd3db2c..0000000000 --- a/thirdparty/opus/celt/arm/fixed_arm64.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (C) 2015 Vidyo */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_ARM64_H -#define FIXED_ARM64_H - -#include <arm_neon.h> - -#undef SIG2WORD16 -#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT))) - -#endif diff --git a/thirdparty/opus/celt/arm/fixed_armv4.h b/thirdparty/opus/celt/arm/fixed_armv4.h deleted file mode 100644 index efb3b1896a..0000000000 --- a/thirdparty/opus/celt/arm/fixed_armv4.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (C) 2013 Xiph.Org Foundation and contributors */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_ARMv4_H -#define FIXED_ARMv4_H - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q16 -static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#MULT16_32_Q16\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b),"r"(a<<16) - ); - return rd_hi; -} -#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b)) - - -/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q15 -static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#MULT16_32_Q15\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b), "r"(a<<16) - ); - /*We intentionally don't OR in the high bit of rd_lo for speed.*/ - return rd_hi<<1; -} -#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b)) - - -/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. - b must fit in 31 bits. - Result fits in 32 bits. */ -#undef MAC16_32_Q15 -#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) - -/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. - Result fits in 32 bits. */ -#undef MAC16_32_Q16 -#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) - -/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ -#undef MULT32_32_Q31 -#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31) - -#endif diff --git a/thirdparty/opus/celt/arm/fixed_armv5e.h b/thirdparty/opus/celt/arm/fixed_armv5e.h deleted file mode 100644 index 36a6321101..0000000000 --- a/thirdparty/opus/celt/arm/fixed_armv5e.h +++ /dev/null @@ -1,151 +0,0 @@ -/* Copyright (C) 2007-2009 Xiph.Org Foundation - Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2008 CSIRO - Copyright (C) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_ARMv5E_H -#define FIXED_ARMv5E_H - -#include "fixed_armv4.h" - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q16 -static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) -{ - int res; - __asm__( - "#MULT16_32_Q16\n\t" - "smulwb %0, %1, %2\n\t" - : "=r"(res) - : "r"(b),"r"(a) - ); - return res; -} -#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b)) - - -/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ -#undef MULT16_32_Q15 -static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) -{ - int res; - __asm__( - "#MULT16_32_Q15\n\t" - "smulwb %0, %1, %2\n\t" - : "=r"(res) - : "r"(b), "r"(a) - ); - return res<<1; -} -#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) - - -/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. - b must fit in 31 bits. - Result fits in 32 bits. */ -#undef MAC16_32_Q15 -static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, - opus_val32 b) -{ - int res; - __asm__( - "#MAC16_32_Q15\n\t" - "smlawb %0, %1, %2, %3;\n" - : "=r"(res) - : "r"(b<<1), "r"(a), "r"(c) - ); - return res; -} -#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) - -/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. - Result fits in 32 bits. */ -#undef MAC16_32_Q16 -static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, - opus_val32 b) -{ - int res; - __asm__( - "#MAC16_32_Q16\n\t" - "smlawb %0, %1, %2, %3;\n" - : "=r"(res) - : "r"(b), "r"(a), "r"(c) - ); - return res; -} -#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) - -/** 16x16 multiply-add where the result fits in 32 bits */ -#undef MAC16_16 -static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, - opus_val16 b) -{ - int res; - __asm__( - "#MAC16_16\n\t" - "smlabb %0, %1, %2, %3;\n" - : "=r"(res) - : "r"(a), "r"(b), "r"(c) - ); - return res; -} -#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b)) - -/** 16x16 multiplication where the result fits in 32 bits */ -#undef MULT16_16 -static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) -{ - int res; - __asm__( - "#MULT16_16\n\t" - "smulbb %0, %1, %2;\n" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) - -#ifdef OPUS_ARM_INLINE_MEDIA - -#undef SIG2WORD16 -static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) -{ - celt_sig res; - __asm__( - "#SIG2WORD16\n\t" - "ssat %0, #16, %1, ASR #12\n\t" - : "=r"(res) - : "r"(x+2048) - ); - return EXTRACT16(res); -} -#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) - -#endif /* OPUS_ARM_INLINE_MEDIA */ - -#endif diff --git a/thirdparty/opus/celt/arm/kiss_fft_armv4.h b/thirdparty/opus/celt/arm/kiss_fft_armv4.h deleted file mode 100644 index e4faad6f2b..0000000000 --- a/thirdparty/opus/celt/arm/kiss_fft_armv4.h +++ /dev/null @@ -1,121 +0,0 @@ -/*Copyright (c) 2013, Xiph.Org Foundation and contributors. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_ARMv4_H -#define KISS_FFT_ARMv4_H - -#if !defined(KISS_FFT_GUTS_H) -#error "This file should only be included from _kiss_fft_guts.h" -#endif - -#ifdef FIXED_POINT - -#undef C_MUL -#define C_MUL(m,a,b) \ - do{ \ - int br__; \ - int bi__; \ - int tt__; \ - __asm__ __volatile__( \ - "#C_MUL\n\t" \ - "ldrsh %[br], [%[bp], #0]\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ - "ldrsh %[bi], [%[bp], #2]\n\t" \ - "smull %[tt], %[mi], r1, %[br]\n\t" \ - "smlal %[tt], %[mi], r0, %[bi]\n\t" \ - "rsb %[bi], %[bi], #0\n\t" \ - "smull %[br], %[mr], r0, %[br]\n\t" \ - "mov %[tt], %[tt], lsr #15\n\t" \ - "smlal %[br], %[mr], r1, %[bi]\n\t" \ - "orr %[mi], %[tt], %[mi], lsl #17\n\t" \ - "mov %[br], %[br], lsr #15\n\t" \ - "orr %[mr], %[br], %[mr], lsl #17\n\t" \ - : [mr]"=r"((m).r), [mi]"=r"((m).i), \ - [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ - : [ap]"r"(&(a)), [bp]"r"(&(b)) \ - : "r0", "r1" \ - ); \ - } \ - while(0) - -#undef C_MUL4 -#define C_MUL4(m,a,b) \ - do{ \ - int br__; \ - int bi__; \ - int tt__; \ - __asm__ __volatile__( \ - "#C_MUL4\n\t" \ - "ldrsh %[br], [%[bp], #0]\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ - "ldrsh %[bi], [%[bp], #2]\n\t" \ - "smull %[tt], %[mi], r1, %[br]\n\t" \ - "smlal %[tt], %[mi], r0, %[bi]\n\t" \ - "rsb %[bi], %[bi], #0\n\t" \ - "smull %[br], %[mr], r0, %[br]\n\t" \ - "mov %[tt], %[tt], lsr #17\n\t" \ - "smlal %[br], %[mr], r1, %[bi]\n\t" \ - "orr %[mi], %[tt], %[mi], lsl #15\n\t" \ - "mov %[br], %[br], lsr #17\n\t" \ - "orr %[mr], %[br], %[mr], lsl #15\n\t" \ - : [mr]"=r"((m).r), [mi]"=r"((m).i), \ - [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ - : [ap]"r"(&(a)), [bp]"r"(&(b)) \ - : "r0", "r1" \ - ); \ - } \ - while(0) - -#undef C_MULC -#define C_MULC(m,a,b) \ - do{ \ - int br__; \ - int bi__; \ - int tt__; \ - __asm__ __volatile__( \ - "#C_MULC\n\t" \ - "ldrsh %[br], [%[bp], #0]\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ - "ldrsh %[bi], [%[bp], #2]\n\t" \ - "smull %[tt], %[mr], r0, %[br]\n\t" \ - "smlal %[tt], %[mr], r1, %[bi]\n\t" \ - "rsb %[bi], %[bi], #0\n\t" \ - "smull %[br], %[mi], r1, %[br]\n\t" \ - "mov %[tt], %[tt], lsr #15\n\t" \ - "smlal %[br], %[mi], r0, %[bi]\n\t" \ - "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ - "mov %[br], %[br], lsr #15\n\t" \ - "orr %[mi], %[br], %[mi], lsl #17\n\t" \ - : [mr]"=r"((m).r), [mi]"=r"((m).i), \ - [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ - : [ap]"r"(&(a)), [bp]"r"(&(b)) \ - : "r0", "r1" \ - ); \ - } \ - while(0) - -#endif /* FIXED_POINT */ - -#endif /* KISS_FFT_ARMv4_H */ diff --git a/thirdparty/opus/celt/arm/kiss_fft_armv5e.h b/thirdparty/opus/celt/arm/kiss_fft_armv5e.h deleted file mode 100644 index 9eca183d77..0000000000 --- a/thirdparty/opus/celt/arm/kiss_fft_armv5e.h +++ /dev/null @@ -1,118 +0,0 @@ -/*Copyright (c) 2013, Xiph.Org Foundation and contributors. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_ARMv5E_H -#define KISS_FFT_ARMv5E_H - -#if !defined(KISS_FFT_GUTS_H) -#error "This file should only be included from _kiss_fft_guts.h" -#endif - -#ifdef FIXED_POINT - -#if defined(__thumb__)||defined(__thumb2__) -#define LDRD_CONS "Q" -#else -#define LDRD_CONS "Uq" -#endif - -#undef C_MUL -#define C_MUL(m,a,b) \ - do{ \ - int mr1__; \ - int mr2__; \ - int mi__; \ - long long aval__; \ - int bval__; \ - __asm__( \ - "#C_MUL\n\t" \ - "ldrd %[aval], %H[aval], %[ap]\n\t" \ - "ldr %[bval], %[bp]\n\t" \ - "smulwb %[mi], %H[aval], %[bval]\n\t" \ - "smulwb %[mr1], %[aval], %[bval]\n\t" \ - "smulwt %[mr2], %H[aval], %[bval]\n\t" \ - "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ - : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ - [aval]"=&r"(aval__), [bval]"=r"(bval__) \ - : [ap]LDRD_CONS(a), [bp]"m"(b) \ - ); \ - (m).r = SHL32(SUB32(mr1__, mr2__), 1); \ - (m).i = SHL32(mi__, 1); \ - } \ - while(0) - -#undef C_MUL4 -#define C_MUL4(m,a,b) \ - do{ \ - int mr1__; \ - int mr2__; \ - int mi__; \ - long long aval__; \ - int bval__; \ - __asm__( \ - "#C_MUL4\n\t" \ - "ldrd %[aval], %H[aval], %[ap]\n\t" \ - "ldr %[bval], %[bp]\n\t" \ - "smulwb %[mi], %H[aval], %[bval]\n\t" \ - "smulwb %[mr1], %[aval], %[bval]\n\t" \ - "smulwt %[mr2], %H[aval], %[bval]\n\t" \ - "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ - : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ - [aval]"=&r"(aval__), [bval]"=r"(bval__) \ - : [ap]LDRD_CONS(a), [bp]"m"(b) \ - ); \ - (m).r = SHR32(SUB32(mr1__, mr2__), 1); \ - (m).i = SHR32(mi__, 1); \ - } \ - while(0) - -#undef C_MULC -#define C_MULC(m,a,b) \ - do{ \ - int mr__; \ - int mi1__; \ - int mi2__; \ - long long aval__; \ - int bval__; \ - __asm__( \ - "#C_MULC\n\t" \ - "ldrd %[aval], %H[aval], %[ap]\n\t" \ - "ldr %[bval], %[bp]\n\t" \ - "smulwb %[mr], %[aval], %[bval]\n\t" \ - "smulwb %[mi1], %H[aval], %[bval]\n\t" \ - "smulwt %[mi2], %[aval], %[bval]\n\t" \ - "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \ - : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \ - [aval]"=&r"(aval__), [bval]"=r"(bval__) \ - : [ap]LDRD_CONS(a), [bp]"m"(b) \ - ); \ - (m).r = SHL32(mr__, 1); \ - (m).i = SHL32(SUB32(mi1__, mi2__), 1); \ - } \ - while(0) - -#endif /* FIXED_POINT */ - -#endif /* KISS_FFT_GUTS_H */ diff --git a/thirdparty/opus/celt/arm/mdct_arm.h b/thirdparty/opus/celt/arm/mdct_arm.h deleted file mode 100644 index 49cbb44576..0000000000 --- a/thirdparty/opus/celt/arm/mdct_arm.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2015 Xiph.Org Foundation - Written by Viswanath Puttagunta */ -/** - @file arm_mdct.h - @brief ARM Neon Intrinsic optimizations for mdct using NE10 library - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(MDCT_ARM_H) -#define MDCT_ARM_H - -#include "config.h" -#include "mdct.h" - -#if defined(HAVE_ARM_NE10) -/** Compute a forward MDCT and scale by 4/N, trashes the input array */ -void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, - int shift, int stride, int arch); - -void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, - int shift, int stride, int arch); - -#if !defined(OPUS_HAVE_RTCD) -#define OVERRIDE_OPUS_MDCT (1) -#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ - clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) -#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \ - clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch) -#endif /* OPUS_HAVE_RTCD */ -#endif /* HAVE_ARM_NE10 */ - -#endif diff --git a/thirdparty/opus/celt/arm/pitch_arm.h b/thirdparty/opus/celt/arm/pitch_arm.h deleted file mode 100644 index 14331169ee..0000000000 --- a/thirdparty/opus/celt/arm/pitch_arm.h +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(PITCH_ARM_H) -# define PITCH_ARM_H - -# include "armcpu.h" - -# if defined(FIXED_POINT) - -# if defined(OPUS_ARM_MAY_HAVE_NEON) -opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); -# endif - -# if defined(OPUS_ARM_MAY_HAVE_MEDIA) -# define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr) -# endif - -# if defined(OPUS_ARM_MAY_HAVE_EDSP) -opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); -# endif - -# if defined(OPUS_HAVE_RTCD) && \ - ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \ - (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \ - (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP))) -extern opus_val32 -(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int); -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch)) - -# elif defined(OPUS_ARM_PRESUME_EDSP) || \ - defined(OPUS_ARM_PRESUME_MEDIA) || \ - defined(OPUS_ARM_PRESUME_NEON) -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) - -# endif - -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void xcorr_kernel_neon_fixed( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); -# endif - -# if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) - -extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); - -# define OVERRIDE_XCORR_KERNEL (1) -# define xcorr_kernel(x, y, sum, len, arch) \ - ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) - -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) -# define OVERRIDE_XCORR_KERNEL (1) -# define xcorr_kernel(x, y, sum, len, arch) \ - ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len)) - -# endif - -#else /* Start !FIXED_POINT */ -/* Float case */ -#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); -#endif - -# if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)) -extern void -(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, - const opus_val16 *, opus_val32 *, int, int); - -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ - xcorr, len, max_pitch)) - -# elif defined(OPUS_ARM_PRESUME_NEON_INTR) - -# define OVERRIDE_PITCH_XCORR (1) -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch)) - -# endif - -#endif /* end !FIXED_POINT */ - -#endif diff --git a/thirdparty/opus/celt/bands.c b/thirdparty/opus/celt/bands.c deleted file mode 100644 index 87eaa6c031..0000000000 --- a/thirdparty/opus/celt/bands.c +++ /dev/null @@ -1,1529 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008-2009 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <math.h> -#include "bands.h" -#include "modes.h" -#include "vq.h" -#include "cwrs.h" -#include "stack_alloc.h" -#include "os_support.h" -#include "mathops.h" -#include "rate.h" -#include "quant_bands.h" -#include "pitch.h" - -int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev) -{ - int i; - for (i=0;i<N;i++) - { - if (val < thresholds[i]) - break; - } - if (i>prev && val < thresholds[prev]+hysteresis[prev]) - i=prev; - if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1]) - i=prev; - return i; -} - -opus_uint32 celt_lcg_rand(opus_uint32 seed) -{ - return 1664525 * seed + 1013904223; -} - -/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness - with this approximation is important because it has an impact on the bit allocation */ -static opus_int16 bitexact_cos(opus_int16 x) -{ - opus_int32 tmp; - opus_int16 x2; - tmp = (4096+((opus_int32)(x)*(x)))>>13; - celt_assert(tmp<=32767); - x2 = tmp; - x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2))))); - celt_assert(x2<=32766); - return 1+x2; -} - -static int bitexact_log2tan(int isin,int icos) -{ - int lc; - int ls; - lc=EC_ILOG(icos); - ls=EC_ILOG(isin); - icos<<=15-lc; - isin<<=15-ls; - return (ls-lc)*(1<<11) - +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932) - -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932); -} - -#ifdef FIXED_POINT -/* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) -{ - int i, c, N; - const opus_int16 *eBands = m->eBands; - N = m->shortMdctSize<<LM; - c=0; do { - for (i=0;i<end;i++) - { - int j; - opus_val32 maxval=0; - opus_val32 sum = 0; - - maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); - if (maxval > 0) - { - int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); - j=eBands[i]<<LM; - if (shift>0) - { - do { - sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), - EXTRACT16(SHR32(X[j+c*N],shift))); - } while (++j<eBands[i+1]<<LM); - } else { - do { - sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)), - EXTRACT16(SHL32(X[j+c*N],-shift))); - } while (++j<eBands[i+1]<<LM); - } - /* We're adding one here to ensure the normalized band isn't larger than unity norm */ - bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); - } else { - bandE[i+c*m->nbEBands] = EPSILON; - } - /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ - } - } while (++c<C); - /*printf ("\n");*/ -} - -/* Normalise each band such that the energy is one. */ -void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M) -{ - int i, c, N; - const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; - c=0; do { - i=0; do { - opus_val16 g; - int j,shift; - opus_val16 E; - shift = celt_zlog2(bandE[i+c*m->nbEBands])-13; - E = VSHR32(bandE[i+c*m->nbEBands], shift); - g = EXTRACT16(celt_rcp(SHL32(E,3))); - j=M*eBands[i]; do { - X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g); - } while (++j<M*eBands[i+1]); - } while (++i<end); - } while (++c<C); -} - -#else /* FIXED_POINT */ -/* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) -{ - int i, c, N; - const opus_int16 *eBands = m->eBands; - N = m->shortMdctSize<<LM; - c=0; do { - for (i=0;i<end;i++) - { - opus_val32 sum; - sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); - bandE[i+c*m->nbEBands] = celt_sqrt(sum); - /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ - } - } while (++c<C); - /*printf ("\n");*/ -} - -/* Normalise each band such that the energy is one. */ -void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M) -{ - int i, c, N; - const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; - c=0; do { - for (i=0;i<end;i++) - { - int j; - opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]); - for (j=M*eBands[i];j<M*eBands[i+1];j++) - X[j+c*N] = freq[j+c*N]*g; - } - } while (++c<C); -} - -#endif /* FIXED_POINT */ - -/* De-normalise the energy to produce the synthesis from the unit-energy bands */ -void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, - int end, int M, int downsample, int silence) -{ - int i, N; - int bound; - celt_sig * OPUS_RESTRICT f; - const celt_norm * OPUS_RESTRICT x; - const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; - bound = M*eBands[end]; - if (downsample!=1) - bound = IMIN(bound, N/downsample); - if (silence) - { - bound = 0; - start = end = 0; - } - f = freq; - x = X+M*eBands[start]; - for (i=0;i<M*eBands[start];i++) - *f++ = 0; - for (i=start;i<end;i++) - { - int j, band_end; - opus_val16 g; - opus_val16 lg; -#ifdef FIXED_POINT - int shift; -#endif - j=M*eBands[i]; - band_end = M*eBands[i+1]; - lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); -#ifndef FIXED_POINT - g = celt_exp2(lg); -#else - /* Handle the integer part of the log energy */ - shift = 16-(lg>>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); - } - /* Handle extreme gains with negative shift. */ - if (shift<0) - { - /* For shift < -2 we'd be likely to overflow, so we're capping - the gain here. This shouldn't happen unless the bitstream is - already corrupted. */ - if (shift < -2) - { - g = 32767; - shift = -2; - } - do { - *f++ = SHL32(MULT16_16(*x++, g), -shift); - } while (++j<band_end); - } else -#endif - /* Be careful of the fixed-point "else" just above when changing this code */ - do { - *f++ = SHR32(MULT16_16(*x++, g), shift); - } while (++j<band_end); - } - celt_assert(start <= end); - OPUS_CLEAR(&freq[bound], N-bound); -} - -/* This prevents energy collapse for transients with multiple short MDCTs */ -void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, - int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, - const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch) -{ - int c, i, j, k; - for (i=start;i<end;i++) - { - int N0; - opus_val16 thresh, sqrt_1; - int depth; -#ifdef FIXED_POINT - int shift; - opus_val32 thresh32; -#endif - - N0 = m->eBands[i+1]-m->eBands[i]; - /* depth in 1/8 bits */ - celt_assert(pulses[i]>=0); - depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; - -#ifdef FIXED_POINT - thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); - thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32)); - { - opus_val32 t; - t = N0<<LM; - shift = celt_ilog2(t)>>1; - t = SHL32(t, (7-shift)<<1); - sqrt_1 = celt_rsqrt_norm(t); - } -#else - thresh = .5f*celt_exp2(-.125f*depth); - sqrt_1 = celt_rsqrt(N0<<LM); -#endif - - c=0; do - { - celt_norm *X; - opus_val16 prev1; - opus_val16 prev2; - opus_val32 Ediff; - opus_val16 r; - int renormalize=0; - prev1 = prev1logE[c*m->nbEBands+i]; - prev2 = prev2logE[c*m->nbEBands+i]; - if (C==1) - { - prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]); - prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]); - } - Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2)); - Ediff = MAX32(0, Ediff); - -#ifdef FIXED_POINT - if (Ediff < 16384) - { - opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1); - r = 2*MIN16(16383,r32); - } else { - r = 0; - } - if (LM==3) - r = MULT16_16_Q14(23170, MIN32(23169, r)); - r = SHR16(MIN16(thresh, r),1); - r = SHR32(MULT16_16_Q15(sqrt_1, r),shift); -#else - /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because - short blocks don't have the same energy as long */ - r = 2.f*celt_exp2(-Ediff); - if (LM==3) - r *= 1.41421356f; - r = MIN16(thresh, r); - r = r*sqrt_1; -#endif - X = X_+c*size+(m->eBands[i]<<LM); - for (k=0;k<1<<LM;k++) - { - /* Detect collapse */ - if (!(collapse_masks[i*C+c]&1<<k)) - { - /* Fill with noise */ - for (j=0;j<N0;j++) - { - seed = celt_lcg_rand(seed); - X[(j<<LM)+k] = (seed&0x8000 ? r : -r); - } - renormalize = 1; - } - } - /* We just added some energy, so we need to renormalise */ - if (renormalize) - renormalise_vector(X, N0<<LM, Q15ONE, arch); - } while (++c<C); - } -} - -static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) -{ - int i = bandID; - int j; - opus_val16 a1, a2; - opus_val16 left, right; - opus_val16 norm; -#ifdef FIXED_POINT - int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13; -#endif - left = VSHR32(bandE[i],shift); - right = VSHR32(bandE[i+m->nbEBands],shift); - norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right)); - a1 = DIV32_16(SHL32(EXTEND32(left),14),norm); - a2 = DIV32_16(SHL32(EXTEND32(right),14),norm); - for (j=0;j<N;j++) - { - celt_norm r, l; - l = X[j]; - r = Y[j]; - X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); - /* Side is not encoded, no need to calculate */ - } -} - -static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) -{ - int j; - for (j=0;j<N;j++) - { - opus_val32 r, l; - l = MULT16_16(QCONST16(.70710678f, 15), X[j]); - r = MULT16_16(QCONST16(.70710678f, 15), Y[j]); - X[j] = EXTRACT16(SHR32(ADD32(l, r), 15)); - Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15)); - } -} - -static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch) -{ - int j; - opus_val32 xp=0, side=0; - opus_val32 El, Er; - opus_val16 mid2; -#ifdef FIXED_POINT - int kl, kr; -#endif - opus_val32 t, lgain, rgain; - - /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ - dual_inner_prod(Y, X, Y, N, &xp, &side, arch); - /* Compensating for the mid normalization */ - xp = MULT16_32_Q15(mid, xp); - /* mid and side are in Q15, not Q14 like X and Y */ - mid2 = SHR16(mid, 1); - El = MULT16_16(mid2, mid2) + side - 2*xp; - Er = MULT16_16(mid2, mid2) + side + 2*xp; - if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) - { - OPUS_COPY(Y, X, N); - return; - } - -#ifdef FIXED_POINT - kl = celt_ilog2(El)>>1; - kr = celt_ilog2(Er)>>1; -#endif - t = VSHR32(El, (kl-7)<<1); - lgain = celt_rsqrt_norm(t); - t = VSHR32(Er, (kr-7)<<1); - rgain = celt_rsqrt_norm(t); - -#ifdef FIXED_POINT - if (kl < 7) - kl = 7; - if (kr < 7) - kr = 7; -#endif - - for (j=0;j<N;j++) - { - celt_norm r, l; - /* Apply mid scaling (side is already scaled) */ - l = MULT16_16_P15(mid, X[j]); - r = Y[j]; - X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); - Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); - } -} - -/* Decide whether we should spread the pulses in the current frame */ -int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, - int last_decision, int *hf_average, int *tapset_decision, int update_hf, - int end, int C, int M) -{ - int i, c, N0; - int sum = 0, nbBands=0; - const opus_int16 * OPUS_RESTRICT eBands = m->eBands; - int decision; - int hf_sum=0; - - celt_assert(end>0); - - N0 = M*m->shortMdctSize; - - if (M*(eBands[end]-eBands[end-1]) <= 8) - return SPREAD_NONE; - c=0; do { - for (i=0;i<end;i++) - { - int j, N, tmp=0; - int tcount[3] = {0,0,0}; - const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; - N = M*(eBands[i+1]-eBands[i]); - if (N<=8) - continue; - /* Compute rough CDF of |x[j]| */ - for (j=0;j<N;j++) - { - opus_val32 x2N; /* Q13 */ - - x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N); - if (x2N < QCONST16(0.25f,13)) - tcount[0]++; - if (x2N < QCONST16(0.0625f,13)) - tcount[1]++; - if (x2N < QCONST16(0.015625f,13)) - tcount[2]++; - } - - /* Only include four last bands (8 kHz and up) */ - if (i>m->nbEBands-4) - hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); - tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); - sum += tmp*256; - nbBands++; - } - } while (++c<C); - - if (update_hf) - { - if (hf_sum) - hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); - *hf_average = (*hf_average+hf_sum)>>1; - hf_sum = *hf_average; - if (*tapset_decision==2) - hf_sum += 4; - else if (*tapset_decision==0) - hf_sum -= 4; - if (hf_sum > 22) - *tapset_decision=2; - else if (hf_sum > 18) - *tapset_decision=1; - else - *tapset_decision=0; - } - /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ - celt_assert(nbBands>0); /* end has to be non-zero */ - celt_assert(sum>=0); - sum = celt_udiv(sum, nbBands); - /* Recursive averaging */ - sum = (sum+*average)>>1; - *average = sum; - /* Hysteresis */ - sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2; - if (sum < 80) - { - decision = SPREAD_AGGRESSIVE; - } else if (sum < 256) - { - decision = SPREAD_NORMAL; - } else if (sum < 384) - { - decision = SPREAD_LIGHT; - } else { - decision = SPREAD_NONE; - } -#ifdef FUZZING - decision = rand()&0x3; - *tapset_decision=rand()%3; -#endif - return decision; -} - -/* Indexing table for converting from natural Hadamard to ordery Hadamard - This is essentially a bit-reversed Gray, on top of which we've added - an inversion of the order because we want the DC at the end rather than - the beginning. The lines are for N=2, 4, 8, 16 */ -static const int ordery_table[] = { - 1, 0, - 3, 0, 2, 1, - 7, 0, 4, 3, 6, 1, 5, 2, - 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5, -}; - -static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) -{ - int i,j; - VARDECL(celt_norm, tmp); - int N; - SAVE_STACK; - N = N0*stride; - ALLOC(tmp, N, celt_norm); - celt_assert(stride>0); - if (hadamard) - { - const int *ordery = ordery_table+stride-2; - for (i=0;i<stride;i++) - { - for (j=0;j<N0;j++) - tmp[ordery[i]*N0+j] = X[j*stride+i]; - } - } else { - for (i=0;i<stride;i++) - for (j=0;j<N0;j++) - tmp[i*N0+j] = X[j*stride+i]; - } - OPUS_COPY(X, tmp, N); - RESTORE_STACK; -} - -static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) -{ - int i,j; - VARDECL(celt_norm, tmp); - int N; - SAVE_STACK; - N = N0*stride; - ALLOC(tmp, N, celt_norm); - if (hadamard) - { - const int *ordery = ordery_table+stride-2; - for (i=0;i<stride;i++) - for (j=0;j<N0;j++) - tmp[j*stride+i] = X[ordery[i]*N0+j]; - } else { - for (i=0;i<stride;i++) - for (j=0;j<N0;j++) - tmp[j*stride+i] = X[i*N0+j]; - } - OPUS_COPY(X, tmp, N); - RESTORE_STACK; -} - -void haar1(celt_norm *X, int N0, int stride) -{ - int i, j; - N0 >>= 1; - for (i=0;i<stride;i++) - for (j=0;j<N0;j++) - { - opus_val32 tmp1, tmp2; - tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]); - tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); - X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15)); - X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15)); - } -} - -static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) -{ - static const opus_int16 exp2_table8[8] = - {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048}; - int qn, qb; - int N2 = 2*N-1; - if (stereo && N==2) - N2--; - /* The upper limit ensures that in a stereo split with itheta==16384, we'll - always have enough bits left over to code at least one pulse in the - side; otherwise it would collapse, since it doesn't get folded. */ - qb = celt_sudiv(b+N2*offset, N2); - qb = IMIN(b-pulse_cap-(4<<BITRES), qb); - - qb = IMIN(8<<BITRES, qb); - - if (qb<(1<<BITRES>>1)) { - qn = 1; - } else { - qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES)); - qn = (qn+1)>>1<<1; - } - celt_assert(qn <= 256); - return qn; -} - -struct band_ctx { - int encode; - const CELTMode *m; - int i; - int intensity; - int spread; - int tf_change; - ec_ctx *ec; - opus_int32 remaining_bits; - const celt_ener *bandE; - opus_uint32 seed; - int arch; -}; - -struct split_ctx { - int inv; - int imid; - int iside; - int delta; - int itheta; - int qalloc; -}; - -static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, - celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0, - int LM, - int stereo, int *fill) -{ - int qn; - int itheta=0; - int delta; - int imid, iside; - int qalloc; - int pulse_cap; - int offset; - opus_int32 tell; - int inv=0; - int encode; - const CELTMode *m; - int i; - int intensity; - ec_ctx *ec; - const celt_ener *bandE; - - encode = ctx->encode; - m = ctx->m; - i = ctx->i; - intensity = ctx->intensity; - ec = ctx->ec; - bandE = ctx->bandE; - - /* Decide on the resolution to give to the split parameter theta */ - pulse_cap = m->logN[i]+LM*(1<<BITRES); - offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET); - qn = compute_qn(N, *b, offset, pulse_cap, stereo); - if (stereo && i>=intensity) - qn = 1; - if (encode) - { - /* theta is the atan() of the ratio between the (normalized) - side and mid. With just that parameter, we can re-scale both - mid and side because we know that 1) they have unit norm and - 2) they are orthogonal. */ - itheta = stereo_itheta(X, Y, stereo, N, ctx->arch); - } - tell = ec_tell_frac(ec); - if (qn!=1) - { - if (encode) - itheta = (itheta*(opus_int32)qn+8192)>>14; - - /* Entropy coding of the angle. We use a uniform pdf for the - time split, a step for stereo, and a triangular one for the rest. */ - if (stereo && N>2) - { - int p0 = 3; - int x = itheta; - int x0 = qn/2; - int ft = p0*(x0+1) + x0; - /* Use a probability of p0 up to itheta=8192 and then use 1 after */ - if (encode) - { - ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); - } else { - int fs; - fs=ec_decode(ec,ft); - if (fs<(x0+1)*p0) - x=fs/p0; - else - x=x0+1+(fs-(x0+1)*p0); - ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); - itheta = x; - } - } else if (B0>1 || stereo) { - /* Uniform pdf */ - if (encode) - ec_enc_uint(ec, itheta, qn+1); - else - itheta = ec_dec_uint(ec, qn+1); - } else { - int fs=1, ft; - ft = ((qn>>1)+1)*((qn>>1)+1); - if (encode) - { - int fl; - - fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta; - fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 : - ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); - - ec_encode(ec, fl, fl+fs, ft); - } else { - /* Triangular pdf */ - int fl=0; - int fm; - fm = ec_decode(ec, ft); - - if (fm < ((qn>>1)*((qn>>1) + 1)>>1)) - { - itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1; - fs = itheta + 1; - fl = itheta*(itheta + 1)>>1; - } - else - { - itheta = (2*(qn + 1) - - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1; - fs = qn + 1 - itheta; - fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); - } - - ec_dec_update(ec, fl, fl+fs, ft); - } - } - celt_assert(itheta>=0); - itheta = celt_udiv((opus_int32)itheta*16384, qn); - if (encode && stereo) - { - if (itheta==0) - intensity_stereo(m, X, Y, bandE, i, N); - else - stereo_split(X, Y, N); - } - /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. - Let's do that at higher complexity */ - } else if (stereo) { - if (encode) - { - inv = itheta > 8192; - if (inv) - { - int j; - for (j=0;j<N;j++) - Y[j] = -Y[j]; - } - intensity_stereo(m, X, Y, bandE, i, N); - } - if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES) - { - if (encode) - ec_enc_bit_logp(ec, inv, 2); - else - inv = ec_dec_bit_logp(ec, 2); - } else - inv = 0; - itheta = 0; - } - qalloc = ec_tell_frac(ec) - tell; - *b -= qalloc; - - if (itheta == 0) - { - imid = 32767; - iside = 0; - *fill &= (1<<B)-1; - delta = -16384; - } else if (itheta == 16384) - { - imid = 0; - iside = 32767; - *fill &= ((1<<B)-1)<<B; - delta = 16384; - } else { - imid = bitexact_cos((opus_int16)itheta); - iside = bitexact_cos((opus_int16)(16384-itheta)); - /* This is the mid vs side allocation that minimizes squared error - in that band. */ - delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid)); - } - - sctx->inv = inv; - sctx->imid = imid; - sctx->iside = iside; - sctx->delta = delta; - sctx->itheta = itheta; - sctx->qalloc = qalloc; -} -static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, - celt_norm *lowband_out) -{ -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !ctx->encode; -#endif - int c; - int stereo; - celt_norm *x = X; - int encode; - ec_ctx *ec; - - encode = ctx->encode; - ec = ctx->ec; - - stereo = Y != NULL; - c=0; do { - int sign=0; - if (ctx->remaining_bits>=1<<BITRES) - { - if (encode) - { - sign = x[0]<0; - ec_enc_bits(ec, sign, 1); - } else { - sign = ec_dec_bits(ec, 1); - } - ctx->remaining_bits -= 1<<BITRES; - b-=1<<BITRES; - } - if (resynth) - x[0] = sign ? -NORM_SCALING : NORM_SCALING; - x = Y; - } while (++c<1+stereo); - if (lowband_out) - lowband_out[0] = SHR16(X[0],4); - return 1; -} - -/* This function is responsible for encoding and decoding a mono partition. - It can split the band in two and transmit the energy difference with - the two half-bands. It can be called recursively so bands can end up being - split in 8 parts. */ -static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, - int N, int b, int B, celt_norm *lowband, - int LM, - opus_val16 gain, int fill) -{ - const unsigned char *cache; - int q; - int curr_bits; - int imid=0, iside=0; - int B0=B; - opus_val16 mid=0, side=0; - unsigned cm=0; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !ctx->encode; -#endif - celt_norm *Y=NULL; - int encode; - const CELTMode *m; - int i; - int spread; - ec_ctx *ec; - - encode = ctx->encode; - m = ctx->m; - i = ctx->i; - spread = ctx->spread; - ec = ctx->ec; - - /* If we need 1.5 more bit than we can produce, split the band in two. */ - cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; - if (LM != -1 && b > cache[cache[0]]+12 && N>2) - { - int mbits, sbits, delta; - int itheta; - int qalloc; - struct split_ctx sctx; - celt_norm *next_lowband2=NULL; - opus_int32 rebalance; - - N >>= 1; - Y = X+N; - LM -= 1; - if (B==1) - fill = (fill&1)|(fill<<1); - B = (B+1)>>1; - - compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, - LM, 0, &fill); - imid = sctx.imid; - iside = sctx.iside; - delta = sctx.delta; - itheta = sctx.itheta; - qalloc = sctx.qalloc; -#ifdef FIXED_POINT - mid = imid; - side = iside; -#else - mid = (1.f/32768)*imid; - side = (1.f/32768)*iside; -#endif - - /* Give more bits to low-energy MDCTs than they would otherwise deserve */ - if (B0>1 && (itheta&0x3fff)) - { - if (itheta > 8192) - /* Rough approximation for pre-echo masking */ - delta -= delta>>(4-LM); - else - /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */ - delta = IMIN(0, delta + (N<<BITRES>>(5-LM))); - } - mbits = IMAX(0, IMIN(b, (b-delta)/2)); - sbits = b-mbits; - ctx->remaining_bits -= qalloc; - - if (lowband) - next_lowband2 = lowband+N; /* >32-bit split case */ - - rebalance = ctx->remaining_bits; - if (mbits >= sbits) - { - cm = quant_partition(ctx, X, N, mbits, B, - lowband, LM, - MULT16_16_P15(gain,mid), fill); - rebalance = mbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<<BITRES && itheta!=0) - sbits += rebalance - (3<<BITRES); - cm |= quant_partition(ctx, Y, N, sbits, B, - next_lowband2, LM, - MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); - } else { - cm = quant_partition(ctx, Y, N, sbits, B, - next_lowband2, LM, - MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); - rebalance = sbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<<BITRES && itheta!=16384) - mbits += rebalance - (3<<BITRES); - cm |= quant_partition(ctx, X, N, mbits, B, - lowband, LM, - MULT16_16_P15(gain,mid), fill); - } - } else { - /* This is the basic no-split case */ - q = bits2pulses(m, i, LM, b); - curr_bits = pulses2bits(m, i, LM, q); - ctx->remaining_bits -= curr_bits; - - /* Ensures we can never bust the budget */ - while (ctx->remaining_bits < 0 && q > 0) - { - ctx->remaining_bits += curr_bits; - q--; - curr_bits = pulses2bits(m, i, LM, q); - ctx->remaining_bits -= curr_bits; - } - - if (q!=0) - { - int K = get_pulses(q); - - /* Finally do the actual quantization */ - if (encode) - { - cm = alg_quant(X, N, K, spread, B, ec -#ifdef RESYNTH - , gain -#endif - ); - } else { - cm = alg_unquant(X, N, K, spread, B, ec, gain); - } - } else { - /* If there's no pulse, fill the band anyway */ - int j; - if (resynth) - { - unsigned cm_mask; - /* B can be as large as 16, so this shift might overflow an int on a - 16-bit platform; use a long to get defined behavior.*/ - cm_mask = (unsigned)(1UL<<B)-1; - fill &= cm_mask; - if (!fill) - { - OPUS_CLEAR(X, N); - } else { - if (lowband == NULL) - { - /* Noise */ - for (j=0;j<N;j++) - { - ctx->seed = celt_lcg_rand(ctx->seed); - X[j] = (celt_norm)((opus_int32)ctx->seed>>20); - } - cm = cm_mask; - } else { - /* Folded spectrum */ - for (j=0;j<N;j++) - { - opus_val16 tmp; - ctx->seed = celt_lcg_rand(ctx->seed); - /* About 48 dB below the "normal" folding level */ - tmp = QCONST16(1.0f/256, 10); - tmp = (ctx->seed)&0x8000 ? tmp : -tmp; - X[j] = lowband[j]+tmp; - } - cm = fill; - } - renormalise_vector(X, N, gain, ctx->arch); - } - } - } - } - - return cm; -} - - -/* This function is responsible for encoding and decoding a band for the mono case. */ -static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, - int N, int b, int B, celt_norm *lowband, - int LM, celt_norm *lowband_out, - opus_val16 gain, celt_norm *lowband_scratch, int fill) -{ - int N0=N; - int N_B=N; - int N_B0; - int B0=B; - int time_divide=0; - int recombine=0; - int longBlocks; - unsigned cm=0; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !ctx->encode; -#endif - int k; - int encode; - int tf_change; - - encode = ctx->encode; - tf_change = ctx->tf_change; - - longBlocks = B0==1; - - N_B = celt_udiv(N_B, B); - - /* Special case for one sample */ - if (N==1) - { - return quant_band_n1(ctx, X, NULL, b, lowband_out); - } - - if (tf_change>0) - recombine = tf_change; - /* Band recombining to increase frequency resolution */ - - if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) - { - OPUS_COPY(lowband_scratch, lowband, N); - lowband = lowband_scratch; - } - - for (k=0;k<recombine;k++) - { - static const unsigned char bit_interleave_table[16]={ - 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3 - }; - if (encode) - haar1(X, N>>k, 1<<k); - if (lowband) - haar1(lowband, N>>k, 1<<k); - fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2; - } - B>>=recombine; - N_B<<=recombine; - - /* Increasing the time resolution */ - while ((N_B&1) == 0 && tf_change<0) - { - if (encode) - haar1(X, N_B, B); - if (lowband) - haar1(lowband, N_B, B); - fill |= fill<<B; - B <<= 1; - N_B >>= 1; - time_divide++; - tf_change++; - } - B0=B; - N_B0 = N_B; - - /* Reorganize the samples in time order instead of frequency order */ - if (B0>1) - { - if (encode) - deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks); - if (lowband) - deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks); - } - - cm = quant_partition(ctx, X, N, b, B, lowband, - LM, gain, fill); - - /* This code is used by the decoder and by the resynthesis-enabled encoder */ - if (resynth) - { - /* Undo the sample reorganization going from time order to frequency order */ - if (B0>1) - interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks); - - /* Undo time-freq changes that we did earlier */ - N_B = N_B0; - B = B0; - for (k=0;k<time_divide;k++) - { - B >>= 1; - N_B <<= 1; - cm |= cm>>B; - haar1(X, N_B, B); - } - - for (k=0;k<recombine;k++) - { - static const unsigned char bit_deinterleave_table[16]={ - 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F, - 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF - }; - cm = bit_deinterleave_table[cm]; - haar1(X, N0>>k, 1<<k); - } - B<<=recombine; - - /* Scale output for later folding */ - if (lowband_out) - { - int j; - opus_val16 n; - n = celt_sqrt(SHL32(EXTEND32(N0),22)); - for (j=0;j<N0;j++) - lowband_out[j] = MULT16_16_Q15(n,X[j]); - } - cm &= (1<<B)-1; - } - return cm; -} - - -/* This function is responsible for encoding and decoding a band for the stereo case. */ -static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, - int N, int b, int B, celt_norm *lowband, - int LM, celt_norm *lowband_out, - celt_norm *lowband_scratch, int fill) -{ - int imid=0, iside=0; - int inv = 0; - opus_val16 mid=0, side=0; - unsigned cm=0; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !ctx->encode; -#endif - int mbits, sbits, delta; - int itheta; - int qalloc; - struct split_ctx sctx; - int orig_fill; - int encode; - ec_ctx *ec; - - encode = ctx->encode; - ec = ctx->ec; - - /* Special case for one sample */ - if (N==1) - { - return quant_band_n1(ctx, X, Y, b, lowband_out); - } - - orig_fill = fill; - - compute_theta(ctx, &sctx, X, Y, N, &b, B, B, - LM, 1, &fill); - inv = sctx.inv; - imid = sctx.imid; - iside = sctx.iside; - delta = sctx.delta; - itheta = sctx.itheta; - qalloc = sctx.qalloc; -#ifdef FIXED_POINT - mid = imid; - side = iside; -#else - mid = (1.f/32768)*imid; - side = (1.f/32768)*iside; -#endif - - /* This is a special case for N=2 that only works for stereo and takes - advantage of the fact that mid and side are orthogonal to encode - the side with just one bit. */ - if (N==2) - { - int c; - int sign=0; - celt_norm *x2, *y2; - mbits = b; - sbits = 0; - /* Only need one bit for the side. */ - if (itheta != 0 && itheta != 16384) - sbits = 1<<BITRES; - mbits -= sbits; - c = itheta > 8192; - ctx->remaining_bits -= qalloc+sbits; - - x2 = c ? Y : X; - y2 = c ? X : Y; - if (sbits) - { - if (encode) - { - /* Here we only need to encode a sign for the side. */ - sign = x2[0]*y2[1] - x2[1]*y2[0] < 0; - ec_enc_bits(ec, sign, 1); - } else { - sign = ec_dec_bits(ec, 1); - } - } - sign = 1-2*sign; - /* We use orig_fill here because we want to fold the side, but if - itheta==16384, we'll have cleared the low bits of fill. */ - cm = quant_band(ctx, x2, N, mbits, B, lowband, - LM, lowband_out, Q15ONE, lowband_scratch, orig_fill); - /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), - and there's no need to worry about mixing with the other channel. */ - y2[0] = -sign*x2[1]; - y2[1] = sign*x2[0]; - if (resynth) - { - celt_norm tmp; - X[0] = MULT16_16_Q15(mid, X[0]); - X[1] = MULT16_16_Q15(mid, X[1]); - Y[0] = MULT16_16_Q15(side, Y[0]); - Y[1] = MULT16_16_Q15(side, Y[1]); - tmp = X[0]; - X[0] = SUB16(tmp,Y[0]); - Y[0] = ADD16(tmp,Y[0]); - tmp = X[1]; - X[1] = SUB16(tmp,Y[1]); - Y[1] = ADD16(tmp,Y[1]); - } - } else { - /* "Normal" split code */ - opus_int32 rebalance; - - mbits = IMAX(0, IMIN(b, (b-delta)/2)); - sbits = b-mbits; - ctx->remaining_bits -= qalloc; - - rebalance = ctx->remaining_bits; - if (mbits >= sbits) - { - /* In stereo mode, we do not apply a scaling to the mid because we need the normalized - mid for folding later. */ - cm = quant_band(ctx, X, N, mbits, B, - lowband, LM, lowband_out, - Q15ONE, lowband_scratch, fill); - rebalance = mbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<<BITRES && itheta!=0) - sbits += rebalance - (3<<BITRES); - - /* For a stereo split, the high bits of fill are always zero, so no - folding will be done to the side. */ - cm |= quant_band(ctx, Y, N, sbits, B, - NULL, LM, NULL, - side, NULL, fill>>B); - } else { - /* For a stereo split, the high bits of fill are always zero, so no - folding will be done to the side. */ - cm = quant_band(ctx, Y, N, sbits, B, - NULL, LM, NULL, - side, NULL, fill>>B); - rebalance = sbits - (rebalance-ctx->remaining_bits); - if (rebalance > 3<<BITRES && itheta!=16384) - mbits += rebalance - (3<<BITRES); - /* In stereo mode, we do not apply a scaling to the mid because we need the normalized - mid for folding later. */ - cm |= quant_band(ctx, X, N, mbits, B, - lowband, LM, lowband_out, - Q15ONE, lowband_scratch, fill); - } - } - - - /* This code is used by the decoder and by the resynthesis-enabled encoder */ - if (resynth) - { - if (N!=2) - stereo_merge(X, Y, mid, N, ctx->arch); - if (inv) - { - int j; - for (j=0;j<N;j++) - Y[j] = -Y[j]; - } - } - return cm; -} - - -void quant_all_bands(int encode, const CELTMode *m, int start, int end, - celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, - const celt_ener *bandE, int *pulses, int shortBlocks, int spread, - int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, - opus_int32 balance, ec_ctx *ec, int LM, int codedBands, - opus_uint32 *seed, int arch) -{ - int i; - opus_int32 remaining_bits; - const opus_int16 * OPUS_RESTRICT eBands = m->eBands; - celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2; - VARDECL(celt_norm, _norm); - celt_norm *lowband_scratch; - int B; - int M; - int lowband_offset; - int update_lowband = 1; - int C = Y_ != NULL ? 2 : 1; - int norm_offset; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !encode; -#endif - struct band_ctx ctx; - SAVE_STACK; - - M = 1<<LM; - B = shortBlocks ? M : 1; - norm_offset = M*eBands[start]; - /* No need to allocate norm for the last band because we don't need an - output in that band. */ - ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm); - norm = _norm; - norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset; - /* We can use the last band as scratch space because we don't need that - scratch space for the last band. */ - lowband_scratch = X_+M*eBands[m->nbEBands-1]; - - lowband_offset = 0; - ctx.bandE = bandE; - ctx.ec = ec; - ctx.encode = encode; - ctx.intensity = intensity; - ctx.m = m; - ctx.seed = *seed; - ctx.spread = spread; - ctx.arch = arch; - for (i=start;i<end;i++) - { - opus_int32 tell; - int b; - int N; - opus_int32 curr_balance; - int effective_lowband=-1; - celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y; - int tf_change=0; - unsigned x_cm; - unsigned y_cm; - int last; - - ctx.i = i; - last = (i==end-1); - - X = X_+M*eBands[i]; - if (Y_!=NULL) - Y = Y_+M*eBands[i]; - else - Y = NULL; - N = M*eBands[i+1]-M*eBands[i]; - tell = ec_tell_frac(ec); - - /* Compute how many bits we want to allocate to this band */ - if (i != start) - balance -= tell; - remaining_bits = total_bits-tell-1; - ctx.remaining_bits = remaining_bits; - if (i <= codedBands-1) - { - curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i)); - b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); - } else { - b = 0; - } - - if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0)) - lowband_offset = i; - - tf_change = tf_res[i]; - ctx.tf_change = tf_change; - if (i>=m->effEBands) - { - X=norm; - if (Y_!=NULL) - Y = norm; - lowband_scratch = NULL; - } - if (i==end-1) - lowband_scratch = NULL; - - /* Get a conservative estimate of the collapse_mask's for the bands we're - going to be folding from. */ - if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0)) - { - int fold_start; - int fold_end; - int fold_i; - /* This ensures we never repeat spectral content within one band */ - effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N); - fold_start = lowband_offset; - while(M*eBands[--fold_start] > effective_lowband+norm_offset); - fold_end = lowband_offset-1; - while(M*eBands[++fold_end] < effective_lowband+norm_offset+N); - x_cm = y_cm = 0; - fold_i = fold_start; do { - x_cm |= collapse_masks[fold_i*C+0]; - y_cm |= collapse_masks[fold_i*C+C-1]; - } while (++fold_i<fold_end); - } - /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost - always) be non-zero. */ - else - x_cm = y_cm = (1<<B)-1; - - if (dual_stereo && i==intensity) - { - int j; - - /* Switch off dual stereo to do intensity. */ - dual_stereo = 0; - if (resynth) - for (j=0;j<M*eBands[i]-norm_offset;j++) - norm[j] = HALF32(norm[j]+norm2[j]); - } - if (dual_stereo) - { - x_cm = quant_band(&ctx, X, N, b/2, B, - effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm); - y_cm = quant_band(&ctx, Y, N, b/2, B, - effective_lowband != -1 ? norm2+effective_lowband : NULL, LM, - last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm); - } else { - if (Y!=NULL) - { - x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, - effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm); - } else { - x_cm = quant_band(&ctx, X, N, b, B, - effective_lowband != -1 ? norm+effective_lowband : NULL, LM, - last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm); - } - y_cm = x_cm; - } - collapse_masks[i*C+0] = (unsigned char)x_cm; - collapse_masks[i*C+C-1] = (unsigned char)y_cm; - balance += pulses[i] + tell; - - /* Update the folding position only as long as we have 1 bit/sample depth. */ - update_lowband = b>(N<<BITRES); - } - *seed = ctx.seed; - - RESTORE_STACK; -} - diff --git a/thirdparty/opus/celt/bands.h b/thirdparty/opus/celt/bands.h deleted file mode 100644 index e8bef4bad0..0000000000 --- a/thirdparty/opus/celt/bands.h +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008-2009 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef BANDS_H -#define BANDS_H - -#include "arch.h" -#include "modes.h" -#include "entenc.h" -#include "entdec.h" -#include "rate.h" - -/** Compute the amplitude (sqrt energy) in each of the bands - * @param m Mode data - * @param X Spectrum - * @param bandE Square root of the energy for each band (returned) - */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM); - -/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ - -/** Normalise each band of X such that the energy in each band is - equal to 1 - * @param m Mode data - * @param X Spectrum (returned normalised) - * @param bandE Square root of the energy for each band - */ -void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M); - -/** Denormalise each band of X to restore full amplitude - * @param m Mode data - * @param X Spectrum (returned de-normalised) - * @param bandE Square root of the energy for each band - */ -void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, - int end, int M, int downsample, int silence); - -#define SPREAD_NONE (0) -#define SPREAD_LIGHT (1) -#define SPREAD_NORMAL (2) -#define SPREAD_AGGRESSIVE (3) - -int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, - int last_decision, int *hf_average, int *tapset_decision, int update_hf, - int end, int C, int M); - -#ifdef MEASURE_NORM_MSE -void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C); -#endif - -void haar1(celt_norm *X, int N0, int stride); - -/** Quantisation/encoding of the residual spectrum - * @param encode flag that indicates whether we're encoding (1) or decoding (0) - * @param m Mode data - * @param start First band to process - * @param end Last band to process + 1 - * @param X Residual (normalised) - * @param Y Residual (normalised) for second channel (or NULL for mono) - * @param collapse_masks Anti-collapse tracking mask - * @param bandE Square root of the energy for each band - * @param pulses Bit allocation (per band) for PVQ - * @param shortBlocks Zero for long blocks, non-zero for short blocks - * @param spread Amount of spreading to use - * @param dual_stereo Zero for MS stereo, non-zero for dual stereo - * @param intensity First band to use intensity stereo - * @param tf_res Time-frequency resolution change - * @param total_bits Total number of bits that can be used for the frame (including the ones already spent) - * @param balance Number of unallocated bits - * @param en Entropy coder state - * @param LM log2() of the number of 2.5 subframes in the frame - * @param codedBands Last band to receive bits + 1 - * @param seed Random generator seed - * @param arch Run-time architecture (see opus_select_arch()) - */ -void quant_all_bands(int encode, const CELTMode *m, int start, int end, - celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, - const celt_ener *bandE, int *pulses, int shortBlocks, int spread, - int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, - opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed, - int arch); - -void anti_collapse(const CELTMode *m, celt_norm *X_, - unsigned char *collapse_masks, int LM, int C, int size, int start, - int end, const opus_val16 *logE, const opus_val16 *prev1logE, - const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, - int arch); - -opus_uint32 celt_lcg_rand(opus_uint32 seed); - -int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev); - -#endif /* BANDS_H */ diff --git a/thirdparty/opus/celt/celt.c b/thirdparty/opus/celt/celt.c deleted file mode 100644 index b121c51a1f..0000000000 --- a/thirdparty/opus/celt/celt.c +++ /dev/null @@ -1,299 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_C - -#include "os_support.h" -#include "mdct.h" -#include <math.h> -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include <stdarg.h> -#include "celt_lpc.h" -#include "vq.h" - -#ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "unknown" -#endif - -#if defined(MIPSr1_ASM) -#include "mips/celt_mipsr1.h" -#endif - - -int resampling_factor(opus_int32 rate) -{ - int ret; - switch (rate) - { - case 48000: - ret = 1; - break; - case 24000: - ret = 2; - break; - case 16000: - ret = 3; - break; - case 12000: - ret = 4; - break; - case 8000: - ret = 6; - break; - default: -#ifndef CUSTOM_MODES - celt_assert(0); -#endif - ret = 0; - break; - } - return ret; -} - -#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C) -/* This version should be faster on ARM */ -#ifdef OPUS_ARM_ASM -#ifndef NON_STATIC_COMB_FILTER_CONST_C -static -#endif -void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12) -{ - opus_val32 x0, x1, x2, x3, x4; - int i; - x4 = SHL32(x[-T-2], 1); - x3 = SHL32(x[-T-1], 1); - x2 = SHL32(x[-T], 1); - x1 = SHL32(x[-T+1], 1); - for (i=0;i<N-4;i+=5) - { - opus_val32 t; - x0=SHL32(x[i-T+2],1); - t = MAC16_32_Q16(x[i], g10, x2); - t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); - t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); - y[i] = t; - x4=SHL32(x[i-T+3],1); - t = MAC16_32_Q16(x[i+1], g10, x1); - t = MAC16_32_Q16(t, g11, ADD32(x0,x2)); - t = MAC16_32_Q16(t, g12, ADD32(x4,x3)); - y[i+1] = t; - x3=SHL32(x[i-T+4],1); - t = MAC16_32_Q16(x[i+2], g10, x0); - t = MAC16_32_Q16(t, g11, ADD32(x4,x1)); - t = MAC16_32_Q16(t, g12, ADD32(x3,x2)); - y[i+2] = t; - x2=SHL32(x[i-T+5],1); - t = MAC16_32_Q16(x[i+3], g10, x4); - t = MAC16_32_Q16(t, g11, ADD32(x3,x0)); - t = MAC16_32_Q16(t, g12, ADD32(x2,x1)); - y[i+3] = t; - x1=SHL32(x[i-T+6],1); - t = MAC16_32_Q16(x[i+4], g10, x3); - t = MAC16_32_Q16(t, g11, ADD32(x2,x4)); - t = MAC16_32_Q16(t, g12, ADD32(x1,x0)); - y[i+4] = t; - } -#ifdef CUSTOM_MODES - for (;i<N;i++) - { - opus_val32 t; - x0=SHL32(x[i-T+2],1); - t = MAC16_32_Q16(x[i], g10, x2); - t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); - t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); - y[i] = t; - x4=x3; - x3=x2; - x2=x1; - x1=x0; - } -#endif -} -#else -#ifndef NON_STATIC_COMB_FILTER_CONST_C -static -#endif -void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12) -{ - opus_val32 x0, x1, x2, x3, x4; - int i; - x4 = x[-T-2]; - x3 = x[-T-1]; - x2 = x[-T]; - x1 = x[-T+1]; - for (i=0;i<N;i++) - { - x0=x[i-T+2]; - y[i] = x[i] - + MULT16_32_Q15(g10,x2) - + MULT16_32_Q15(g11,ADD32(x1,x3)) - + MULT16_32_Q15(g12,ADD32(x0,x4)); - x4=x3; - x3=x2; - x2=x1; - x1=x0; - } - -} -#endif -#endif - -#ifndef OVERRIDE_comb_filter -void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, - opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, - const opus_val16 *window, int overlap, int arch) -{ - int i; - /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ - opus_val16 g00, g01, g02, g10, g11, g12; - opus_val32 x0, x1, x2, x3, x4; - static const opus_val16 gains[3][3] = { - {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, - {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, - {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; - - if (g0==0 && g1==0) - { - /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ - if (x!=y) - OPUS_MOVE(y, x, N); - return; - } - g00 = MULT16_16_P15(g0, gains[tapset0][0]); - g01 = MULT16_16_P15(g0, gains[tapset0][1]); - g02 = MULT16_16_P15(g0, gains[tapset0][2]); - g10 = MULT16_16_P15(g1, gains[tapset1][0]); - g11 = MULT16_16_P15(g1, gains[tapset1][1]); - g12 = MULT16_16_P15(g1, gains[tapset1][2]); - x1 = x[-T1+1]; - x2 = x[-T1 ]; - x3 = x[-T1-1]; - x4 = x[-T1-2]; - /* If the filter didn't change, we don't need the overlap */ - if (g0==g1 && T0==T1 && tapset0==tapset1) - overlap=0; - for (i=0;i<overlap;i++) - { - opus_val16 f; - x0=x[i-T1+2]; - f = MULT16_16_Q15(window[i],window[i]); - y[i] = x[i] - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0]) - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1])) - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2])) - + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2) - + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3)) - + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4)); - x4=x3; - x3=x2; - x2=x1; - x1=x0; - - } - if (g1==0) - { - /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ - if (x!=y) - OPUS_MOVE(y+overlap, x+overlap, N-overlap); - return; - } - - /* Compute the part with the constant filter. */ - comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch); -} -#endif /* OVERRIDE_comb_filter */ - -const signed char tf_select_table[4][8] = { - {0, -1, 0, -1, 0,-1, 0,-1}, - {0, -1, 0, -2, 1, 0, 1,-1}, - {0, -2, 0, -3, 2, 0, 1,-1}, - {0, -2, 0, -3, 3, 0, 1,-1}, -}; - - -void init_caps(const CELTMode *m,int *cap,int LM,int C) -{ - int i; - for (i=0;i<m->nbEBands;i++) - { - int N; - N=(m->eBands[i+1]-m->eBands[i])<<LM; - cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2; - } -} - - - -const char *opus_strerror(int error) -{ - static const char * const error_strings[8] = { - "success", - "invalid argument", - "buffer too small", - "internal error", - "corrupted stream", - "request not implemented", - "invalid state", - "memory allocation failed" - }; - if (error > 0 || error < -7) - return "unknown error"; - else - return error_strings[-error]; -} - -const char *opus_get_version_string(void) -{ - return "libopus " PACKAGE_VERSION - /* Applications may rely on the presence of this substring in the version - string to determine if they have a fixed-point or floating-point build - at runtime. */ -#ifdef FIXED_POINT - "-fixed" -#endif -#ifdef FUZZING - "-fuzzing" -#endif - ; -} diff --git a/thirdparty/opus/celt/celt.h b/thirdparty/opus/celt/celt.h deleted file mode 100644 index d1f7eb690d..0000000000 --- a/thirdparty/opus/celt/celt.h +++ /dev/null @@ -1,229 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/** - @file celt.h - @brief Contains all the functions for encoding and decoding audio - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CELT_H -#define CELT_H - -#include "opus_types.h" -#include "opus_defines.h" -#include "opus_custom.h" -#include "entenc.h" -#include "entdec.h" -#include "arch.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define CELTEncoder OpusCustomEncoder -#define CELTDecoder OpusCustomDecoder -#define CELTMode OpusCustomMode - -typedef struct { - int valid; - float tonality; - float tonality_slope; - float noisiness; - float activity; - float music_prob; - int bandwidth; -}AnalysisInfo; - -#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) - -#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) - -/* Encoder/decoder Requests */ - -/* Expose this option again when variable framesize actually works */ -#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ - - -#define CELT_SET_PREDICTION_REQUEST 10002 -/** Controls the use of interframe prediction. - 0=Independent frames - 1=Short term interframe prediction allowed - 2=Long term prediction allowed - */ -#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x) - -#define CELT_SET_INPUT_CLIPPING_REQUEST 10004 -#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x) - -#define CELT_GET_AND_CLEAR_ERROR_REQUEST 10007 -#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x) - -#define CELT_SET_CHANNELS_REQUEST 10008 -#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x) - - -/* Internal */ -#define CELT_SET_START_BAND_REQUEST 10010 -#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x) - -#define CELT_SET_END_BAND_REQUEST 10012 -#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x) - -#define CELT_GET_MODE_REQUEST 10015 -/** Get the CELTMode used by an encoder or decoder */ -#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x) - -#define CELT_SET_SIGNALLING_REQUEST 10016 -#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) - -#define CELT_SET_TONALITY_REQUEST 10018 -#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x) -#define CELT_SET_TONALITY_SLOPE_REQUEST 10020 -#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) - -#define CELT_SET_ANALYSIS_REQUEST 10022 -#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) - -#define OPUS_SET_LFE_REQUEST 10024 -#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) - -#define OPUS_SET_ENERGY_MASK_REQUEST 10026 -#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) - -/* Encoder stuff */ - -int celt_encoder_get_size(int channels); - -int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc); - -int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, - int arch); - - - -/* Decoder stuff */ - -int celt_decoder_get_size(int channels); - - -int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); - -int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, - int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum); - -#define celt_encoder_ctl opus_custom_encoder_ctl -#define celt_decoder_ctl opus_custom_decoder_ctl - - -#ifdef CUSTOM_MODES -#define OPUS_CUSTOM_NOSTATIC -#else -#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE -#endif - -static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0}; -/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */ -static const unsigned char spread_icdf[4] = {25, 23, 2, 0}; - -static const unsigned char tapset_icdf[3]={2,1,0}; - -#ifdef CUSTOM_MODES -static const unsigned char toOpusTable[20] = { - 0xE0, 0xE8, 0xF0, 0xF8, - 0xC0, 0xC8, 0xD0, 0xD8, - 0xA0, 0xA8, 0xB0, 0xB8, - 0x00, 0x00, 0x00, 0x00, - 0x80, 0x88, 0x90, 0x98, -}; - -static const unsigned char fromOpusTable[16] = { - 0x80, 0x88, 0x90, 0x98, - 0x40, 0x48, 0x50, 0x58, - 0x20, 0x28, 0x30, 0x38, - 0x00, 0x08, 0x10, 0x18 -}; - -static OPUS_INLINE int toOpus(unsigned char c) -{ - int ret=0; - if (c<0xA0) - ret = toOpusTable[c>>3]; - if (ret == 0) - return -1; - else - return ret|(c&0x7); -} - -static OPUS_INLINE int fromOpus(unsigned char c) -{ - if (c<0x80) - return -1; - else - return fromOpusTable[(c>>3)-16] | (c&0x7); -} -#endif /* CUSTOM_MODES */ - -#define COMBFILTER_MAXPERIOD 1024 -#define COMBFILTER_MINPERIOD 15 - -extern const signed char tf_select_table[4][8]; - -int resampling_factor(opus_int32 rate); - -void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, - int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip); - -void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, - opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, - const opus_val16 *window, int overlap, int arch); - -#ifdef NON_STATIC_COMB_FILTER_CONST_C -void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12); -#endif - -#ifndef OVERRIDE_COMB_FILTER_CONST -# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ - ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12)) -#endif - -void init_caps(const CELTMode *m,int *cap,int LM,int C); - -#ifdef RESYNTH -void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem); -void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], - opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, - int LM, int downsample, int silence); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* CELT_H */ diff --git a/thirdparty/opus/celt/celt_decoder.c b/thirdparty/opus/celt/celt_decoder.c deleted file mode 100644 index b978bb34d1..0000000000 --- a/thirdparty/opus/celt/celt_decoder.c +++ /dev/null @@ -1,1248 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_DECODER_C - -#include "cpu_support.h" -#include "os_support.h" -#include "mdct.h" -#include <math.h> -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include <stdarg.h> -#include "celt_lpc.h" -#include "vq.h" - -#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) -#define NORM_ALIASING_HACK -#endif -/**********************************************************************/ -/* */ -/* DECODER */ -/* */ -/**********************************************************************/ -#define DECODE_BUFFER_SIZE 2048 - -/** Decoder state - @brief Decoder state - */ -struct OpusCustomDecoder { - const OpusCustomMode *mode; - int overlap; - int channels; - int stream_channels; - - int downsample; - int start, end; - int signalling; - int arch; - - /* Everything beyond this point gets cleared on a reset */ -#define DECODER_RESET_START rng - - opus_uint32 rng; - int error; - int last_pitch_index; - int loss_count; - int skip_plc; - int postfilter_period; - int postfilter_period_old; - opus_val16 postfilter_gain; - opus_val16 postfilter_gain_old; - int postfilter_tapset; - int postfilter_tapset_old; - - celt_sig preemph_memD[2]; - - celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */ - /* opus_val16 lpc[], Size = channels*LPC_ORDER */ - /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */ - /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */ - /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */ - /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */ -}; - -int celt_decoder_get_size(int channels) -{ - const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); - return opus_custom_decoder_get_size(mode, channels); -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels) -{ - int size = sizeof(struct CELTDecoder) - + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig) - + channels*LPC_ORDER*sizeof(opus_val16) - + 4*2*mode->nbEBands*sizeof(opus_val16); - return size; -} - -#ifdef CUSTOM_MODES -CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error) -{ - int ret; - CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels)); - ret = opus_custom_decoder_init(st, mode, channels); - if (ret != OPUS_OK) - { - opus_custom_decoder_destroy(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} -#endif /* CUSTOM_MODES */ - -int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels) -{ - int ret; - ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels); - if (ret != OPUS_OK) - return ret; - st->downsample = resampling_factor(sampling_rate); - if (st->downsample==0) - return OPUS_BAD_ARG; - else - return OPUS_OK; -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels) -{ - if (channels < 0 || channels > 2) - return OPUS_BAD_ARG; - - if (st==NULL) - return OPUS_ALLOC_FAIL; - - OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels)); - - st->mode = mode; - st->overlap = mode->overlap; - st->stream_channels = st->channels = channels; - - st->downsample = 1; - st->start = 0; - st->end = st->mode->effEBands; - st->signalling = 1; - st->arch = opus_select_arch(); - - opus_custom_decoder_ctl(st, OPUS_RESET_STATE); - - return OPUS_OK; -} - -#ifdef CUSTOM_MODES -void opus_custom_decoder_destroy(CELTDecoder *st) -{ - opus_free(st); -} -#endif /* CUSTOM_MODES */ - - -#ifndef RESYNTH -static -#endif -void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, - celt_sig *mem, int accum) -{ - int c; - int Nd; - int apply_downsampling=0; - opus_val16 coef0; - VARDECL(celt_sig, scratch); - SAVE_STACK; -#ifndef FIXED_POINT - (void)accum; - celt_assert(accum==0); -#endif - ALLOC(scratch, N, celt_sig); - coef0 = coef[0]; - Nd = N/downsample; - c=0; do { - int j; - celt_sig * OPUS_RESTRICT x; - opus_val16 * OPUS_RESTRICT y; - celt_sig m = mem[c]; - x =in[c]; - y = pcm+c; -#ifdef CUSTOM_MODES - if (coef[1] != 0) - { - opus_val16 coef1 = coef[1]; - opus_val16 coef3 = coef[3]; - for (j=0;j<N;j++) - { - celt_sig tmp = x[j] + m + VERY_SMALL; - m = MULT16_32_Q15(coef0, tmp) - - MULT16_32_Q15(coef1, x[j]); - tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2); - scratch[j] = tmp; - } - apply_downsampling=1; - } else -#endif - if (downsample>1) - { - /* Shortcut for the standard (non-custom modes) case */ - for (j=0;j<N;j++) - { - celt_sig tmp = x[j] + m + VERY_SMALL; - m = MULT16_32_Q15(coef0, tmp); - scratch[j] = tmp; - } - apply_downsampling=1; - } else { - /* Shortcut for the standard (non-custom modes) case */ -#ifdef FIXED_POINT - if (accum) - { - for (j=0;j<N;j++) - { - celt_sig tmp = x[j] + m + VERY_SMALL; - m = MULT16_32_Q15(coef0, tmp); - y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp)))); - } - } else -#endif - { - for (j=0;j<N;j++) - { - celt_sig tmp = x[j] + m + VERY_SMALL; - m = MULT16_32_Q15(coef0, tmp); - y[j*C] = SCALEOUT(SIG2WORD16(tmp)); - } - } - } - mem[c] = m; - - if (apply_downsampling) - { - /* Perform down-sampling */ -#ifdef FIXED_POINT - if (accum) - { - for (j=0;j<Nd;j++) - y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample])))); - } else -#endif - { - for (j=0;j<Nd;j++) - y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); - } - } - } while (++c<C); - RESTORE_STACK; -} - -#ifndef RESYNTH -static -#endif -void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], - opus_val16 *oldBandE, int start, int effEnd, int C, int CC, - int isTransient, int LM, int downsample, - int silence, int arch) -{ - int c, i; - int M; - int b; - int B; - int N, NB; - int shift; - int nbEBands; - int overlap; - VARDECL(celt_sig, freq); - SAVE_STACK; - - overlap = mode->overlap; - nbEBands = mode->nbEBands; - N = mode->shortMdctSize<<LM; - ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */ - M = 1<<LM; - - if (isTransient) - { - B = M; - NB = mode->shortMdctSize; - shift = mode->maxLM; - } else { - B = 1; - NB = mode->shortMdctSize<<LM; - shift = mode->maxLM-LM; - } - - if (CC==2&&C==1) - { - /* Copying a mono streams to two channels */ - celt_sig *freq2; - denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, - downsample, silence); - /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ - freq2 = out_syn[1]+overlap/2; - OPUS_COPY(freq2, freq, N); - for (b=0;b<B;b++) - clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); - for (b=0;b<B;b++) - clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch); - } else if (CC==1&&C==2) - { - /* Downmixing a stereo stream to mono */ - celt_sig *freq2; - freq2 = out_syn[0]+overlap/2; - denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, - downsample, silence); - /* Use the output buffer as temp array before downmixing. */ - denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, - downsample, silence); - for (i=0;i<N;i++) - freq[i] = HALF32(ADD32(freq[i],freq2[i])); - for (b=0;b<B;b++) - clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch); - } else { - /* Normal case (mono or stereo) */ - c=0; do { - denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, - downsample, silence); - for (b=0;b<B;b++) - clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch); - } while (++c<CC); - } - RESTORE_STACK; -} - -static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) -{ - int i, curr, tf_select; - int tf_select_rsv; - int tf_changed; - int logp; - opus_uint32 budget; - opus_uint32 tell; - - budget = dec->storage*8; - tell = ec_tell(dec); - logp = isTransient ? 2 : 4; - tf_select_rsv = LM>0 && tell+logp+1<=budget; - budget -= tf_select_rsv; - tf_changed = curr = 0; - for (i=start;i<end;i++) - { - if (tell+logp<=budget) - { - curr ^= ec_dec_bit_logp(dec, logp); - tell = ec_tell(dec); - tf_changed |= curr; - } - tf_res[i] = curr; - logp = isTransient ? 4 : 5; - } - tf_select = 0; - if (tf_select_rsv && - tf_select_table[LM][4*isTransient+0+tf_changed] != - tf_select_table[LM][4*isTransient+2+tf_changed]) - { - tf_select = ec_dec_bit_logp(dec, 1); - } - for (i=start;i<end;i++) - { - tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; - } -} - -/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save - CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The - current value corresponds to a pitch of 66.67 Hz. */ -#define PLC_PITCH_LAG_MAX (720) -/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a - pitch of 480 Hz. */ -#define PLC_PITCH_LAG_MIN (100) - -static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) -{ - int pitch_index; - VARDECL( opus_val16, lp_pitch_buf ); - SAVE_STACK; - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - RESTORE_STACK; - return pitch_index; -} - -static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) -{ - int c; - int i; - const int C = st->channels; - celt_sig *decode_mem[2]; - celt_sig *out_syn[2]; - opus_val16 *lpc; - opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; - const OpusCustomMode *mode; - int nbEBands; - int overlap; - int start; - int loss_count; - int noise_based; - const opus_int16 *eBands; - SAVE_STACK; - - mode = st->mode; - nbEBands = mode->nbEBands; - overlap = mode->overlap; - eBands = mode->eBands; - - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; - } while (++c<C); - lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C); - oldBandE = lpc+C*LPC_ORDER; - oldLogE = oldBandE + 2*nbEBands; - oldLogE2 = oldLogE + 2*nbEBands; - backgroundLogE = oldLogE2 + 2*nbEBands; - - loss_count = st->loss_count; - start = st->start; - noise_based = loss_count >= 5 || start != 0 || st->skip_plc; - if (noise_based) - { - /* Noise-based PLC/CNG */ -#ifdef NORM_ALIASING_HACK - celt_norm *X; -#else - VARDECL(celt_norm, X); -#endif - opus_uint32 seed; - int end; - int effEnd; - opus_val16 decay; - end = st->end; - effEnd = IMAX(start, IMIN(end, mode->effEBands)); - -#ifdef NORM_ALIASING_HACK - /* This is an ugly hack that breaks aliasing rules and would be easily broken, - but it saves almost 4kB of stack. */ - X = (celt_norm*)(out_syn[C-1]+overlap/2); -#else - ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ -#endif - - /* Energy decay */ - decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); - c=0; do - { - for (i=start;i<end;i++) - oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay); - } while (++c<C); - seed = st->rng; - for (c=0;c<C;c++) - { - for (i=start;i<effEnd;i++) - { - int j; - int boffs; - int blen; - boffs = N*c+(eBands[i]<<LM); - blen = (eBands[i+1]-eBands[i])<<LM; - for (j=0;j<blen;j++) - { - seed = celt_lcg_rand(seed); - X[boffs+j] = (celt_norm)((opus_int32)seed>>20); - } - renormalise_vector(X+boffs, blen, Q15ONE, st->arch); - } - } - st->rng = seed; - - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, - DECODE_BUFFER_SIZE-N+(overlap>>1)); - } while (++c<C); - - celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch); - } else { - /* Pitch-based PLC */ - const opus_val16 *window; - opus_val16 fade = Q15ONE; - int pitch_index; - VARDECL(opus_val32, etmp); - VARDECL(opus_val16, exc); - - if (loss_count == 0) - { - st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); - } else { - pitch_index = st->last_pitch_index; - fade = QCONST16(.8f,15); - } - - ALLOC(etmp, overlap, opus_val32); - ALLOC(exc, MAX_PERIOD, opus_val16); - window = mode->window; - c=0; do { - opus_val16 decay; - opus_val16 attenuation; - opus_val32 S1=0; - celt_sig *buf; - int extrapolation_offset; - int extrapolation_len; - int exc_length; - int j; - - buf = decode_mem[c]; - for (i=0;i<MAX_PERIOD;i++) { - exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT); - } - - if (loss_count == 0) - { - opus_val32 ac[LPC_ORDER+1]; - /* Compute LPC coefficients for the last MAX_PERIOD samples before - the first loss so we can work in the excitation-filter domain. */ - _celt_autocorr(exc, ac, window, overlap, - LPC_ORDER, MAX_PERIOD, st->arch); - /* Add a noise floor of -40 dB. */ -#ifdef FIXED_POINT - ac[0] += SHR32(ac[0],13); -#else - ac[0] *= 1.0001f; -#endif - /* Use lag windowing to stabilize the Levinson-Durbin recursion. */ - for (i=1;i<=LPC_ORDER;i++) - { - /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ -#ifdef FIXED_POINT - ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); -#else - ac[i] -= ac[i]*(0.008f*0.008f)*i*i; -#endif - } - _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER); - } - /* We want the excitation for 2 pitch periods in order to look for a - decaying signal, but we can't get more than MAX_PERIOD. */ - exc_length = IMIN(2*pitch_index, MAX_PERIOD); - /* Initialize the LPC history with the samples just before the start - of the region for which we're computing the excitation. */ - { - opus_val16 lpc_mem[LPC_ORDER]; - for (i=0;i<LPC_ORDER;i++) - { - lpc_mem[i] = - ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT); - } - /* Compute the excitation for exc_length samples before the loss. */ - celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER, - exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch); - } - - /* Check if the waveform is decaying, and if so how fast. - We do this to avoid adding energy when concealing in a segment - with decaying energy. */ - { - opus_val32 E1=1, E2=1; - int decay_length; -#ifdef FIXED_POINT - int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20); -#endif - decay_length = exc_length>>1; - for (i=0;i<decay_length;i++) - { - opus_val16 e; - e = exc[MAX_PERIOD-decay_length+i]; - E1 += SHR32(MULT16_16(e, e), shift); - e = exc[MAX_PERIOD-2*decay_length+i]; - E2 += SHR32(MULT16_16(e, e), shift); - } - E1 = MIN32(E1, E2); - decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2)); - } - - /* Move the decoder memory one frame to the left to give us room to - add the data for the new frame. We ignore the overlap that extends - past the end of the buffer, because we aren't going to use it. */ - OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N); - - /* Extrapolate from the end of the excitation with a period of - "pitch_index", scaling down each period by an additional factor of - "decay". */ - extrapolation_offset = MAX_PERIOD-pitch_index; - /* We need to extrapolate enough samples to cover a complete MDCT - window (including overlap/2 samples on both sides). */ - extrapolation_len = N+overlap; - /* We also apply fading if this is not the first loss. */ - attenuation = MULT16_16_Q15(fade, decay); - for (i=j=0;i<extrapolation_len;i++,j++) - { - opus_val16 tmp; - if (j >= pitch_index) { - j -= pitch_index; - attenuation = MULT16_16_Q15(attenuation, decay); - } - buf[DECODE_BUFFER_SIZE-N+i] = - SHL32(EXTEND32(MULT16_16_Q15(attenuation, - exc[extrapolation_offset+j])), SIG_SHIFT); - /* Compute the energy of the previously decoded signal whose - excitation we're copying. */ - tmp = ROUND16( - buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j], - SIG_SHIFT); - S1 += SHR32(MULT16_16(tmp, tmp), 8); - } - - { - opus_val16 lpc_mem[LPC_ORDER]; - /* Copy the last decoded samples (prior to the overlap region) to - synthesis filter memory so we can have a continuous signal. */ - for (i=0;i<LPC_ORDER;i++) - lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT); - /* Apply the synthesis filter to convert the excitation back into - the signal domain. */ - celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER, - buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, - lpc_mem, st->arch); - } - - /* Check if the synthesis energy is higher than expected, which can - happen with the signal changes during our window. If so, - attenuate. */ - { - opus_val32 S2=0; - for (i=0;i<extrapolation_len;i++) - { - opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT); - S2 += SHR32(MULT16_16(tmp, tmp), 8); - } - /* This checks for an "explosion" in the synthesis. */ -#ifdef FIXED_POINT - if (!(S1 > SHR32(S2,2))) -#else - /* The float test is written this way to catch NaNs in the output - of the IIR filter at the same time. */ - if (!(S1 > 0.2f*S2)) -#endif - { - for (i=0;i<extrapolation_len;i++) - buf[DECODE_BUFFER_SIZE-N+i] = 0; - } else if (S1 < S2) - { - opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1)); - for (i=0;i<overlap;i++) - { - opus_val16 tmp_g = Q15ONE - - MULT16_16_Q15(window[i], Q15ONE-ratio); - buf[DECODE_BUFFER_SIZE-N+i] = - MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]); - } - for (i=overlap;i<extrapolation_len;i++) - { - buf[DECODE_BUFFER_SIZE-N+i] = - MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]); - } - } - } - - /* Apply the pre-filter to the MDCT overlap for the next frame because - the post-filter will be re-applied in the decoder after the MDCT - overlap. */ - comb_filter(etmp, buf+DECODE_BUFFER_SIZE, - st->postfilter_period, st->postfilter_period, overlap, - -st->postfilter_gain, -st->postfilter_gain, - st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch); - - /* Simulate TDAC on the concealed audio so that it blends with the - MDCT of the next frame. */ - for (i=0;i<overlap/2;i++) - { - buf[DECODE_BUFFER_SIZE+i] = - MULT16_32_Q15(window[i], etmp[overlap-1-i]) - + MULT16_32_Q15(window[overlap-i-1], etmp[i]); - } - } while (++c<C); - } - - st->loss_count = loss_count+1; - - RESTORE_STACK; -} - -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, - int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) -{ - int c, i, N; - int spread_decision; - opus_int32 bits; - ec_dec _dec; -#ifdef NORM_ALIASING_HACK - celt_norm *X; -#else - VARDECL(celt_norm, X); -#endif - VARDECL(int, fine_quant); - VARDECL(int, pulses); - VARDECL(int, cap); - VARDECL(int, offsets); - VARDECL(int, fine_priority); - VARDECL(int, tf_res); - VARDECL(unsigned char, collapse_masks); - celt_sig *decode_mem[2]; - celt_sig *out_syn[2]; - opus_val16 *lpc; - opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; - - int shortBlocks; - int isTransient; - int intra_ener; - const int CC = st->channels; - int LM, M; - int start; - int end; - int effEnd; - int codedBands; - int alloc_trim; - int postfilter_pitch; - opus_val16 postfilter_gain; - int intensity=0; - int dual_stereo=0; - opus_int32 total_bits; - opus_int32 balance; - opus_int32 tell; - int dynalloc_logp; - int postfilter_tapset; - int anti_collapse_rsv; - int anti_collapse_on=0; - int silence; - int C = st->stream_channels; - const OpusCustomMode *mode; - int nbEBands; - int overlap; - const opus_int16 *eBands; - ALLOC_STACK; - - mode = st->mode; - nbEBands = mode->nbEBands; - overlap = mode->overlap; - eBands = mode->eBands; - start = st->start; - end = st->end; - frame_size *= st->downsample; - - lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); - oldBandE = lpc+CC*LPC_ORDER; - oldLogE = oldBandE + 2*nbEBands; - oldLogE2 = oldLogE + 2*nbEBands; - backgroundLogE = oldLogE2 + 2*nbEBands; - -#ifdef CUSTOM_MODES - if (st->signalling && data!=NULL) - { - int data0=data[0]; - /* Convert "standard mode" to Opus header */ - if (mode->Fs==48000 && mode->shortMdctSize==120) - { - data0 = fromOpus(data0); - if (data0<0) - return OPUS_INVALID_PACKET; - } - st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); - LM = (data0>>3)&0x3; - C = 1 + ((data0>>2)&0x1); - data++; - len--; - if (LM>mode->maxLM) - return OPUS_INVALID_PACKET; - if (frame_size < mode->shortMdctSize<<LM) - return OPUS_BUFFER_TOO_SMALL; - else - frame_size = mode->shortMdctSize<<LM; - } else { -#else - { -#endif - for (LM=0;LM<=mode->maxLM;LM++) - if (mode->shortMdctSize<<LM==frame_size) - break; - if (LM>mode->maxLM) - return OPUS_BAD_ARG; - } - M=1<<LM; - - if (len<0 || len>1275 || pcm==NULL) - return OPUS_BAD_ARG; - - N = M*mode->shortMdctSize; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; - } while (++c<CC); - - effEnd = end; - if (effEnd > mode->effEBands) - effEnd = mode->effEBands; - - if (data == NULL || len<=1) - { - celt_decode_lost(st, N, LM); - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); - RESTORE_STACK; - return frame_size/st->downsample; - } - - /* Check if there are at least two packets received consecutively before - * turning on the pitch-based PLC */ - st->skip_plc = st->loss_count != 0; - - if (dec == NULL) - { - ec_dec_init(&_dec,(unsigned char*)data,len); - dec = &_dec; - } - - if (C==1) - { - for (i=0;i<nbEBands;i++) - oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]); - } - - total_bits = len*8; - tell = ec_tell(dec); - - if (tell >= total_bits) - silence = 1; - else if (tell==1) - silence = ec_dec_bit_logp(dec, 15); - else - silence = 0; - if (silence) - { - /* Pretend we've read all the remaining bits */ - tell = len*8; - dec->nbits_total+=tell-ec_tell(dec); - } - - postfilter_gain = 0; - postfilter_pitch = 0; - postfilter_tapset = 0; - if (start==0 && tell+16 <= total_bits) - { - if(ec_dec_bit_logp(dec, 1)) - { - int qg, octave; - octave = ec_dec_uint(dec, 6); - postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1; - qg = ec_dec_bits(dec, 3); - if (ec_tell(dec)+2<=total_bits) - postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2); - postfilter_gain = QCONST16(.09375f,15)*(qg+1); - } - tell = ec_tell(dec); - } - - if (LM > 0 && tell+3 <= total_bits) - { - isTransient = ec_dec_bit_logp(dec, 3); - tell = ec_tell(dec); - } - else - isTransient = 0; - - if (isTransient) - shortBlocks = M; - else - shortBlocks = 0; - - /* Decode the global flags (first symbols in the stream) */ - intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; - /* Get band energies */ - unquant_coarse_energy(mode, start, end, oldBandE, - intra_ener, dec, C, LM); - - ALLOC(tf_res, nbEBands, int); - tf_decode(start, end, isTransient, tf_res, LM, dec); - - tell = ec_tell(dec); - spread_decision = SPREAD_NORMAL; - if (tell+4 <= total_bits) - spread_decision = ec_dec_icdf(dec, spread_icdf, 5); - - ALLOC(cap, nbEBands, int); - - init_caps(mode,cap,LM,C); - - ALLOC(offsets, nbEBands, int); - - dynalloc_logp = 6; - total_bits<<=BITRES; - tell = ec_tell_frac(dec); - for (i=start;i<end;i++) - { - int width, quanta; - int dynalloc_loop_logp; - int boost; - width = C*(eBands[i+1]-eBands[i])<<LM; - /* quanta is 6 bits, but no more than 1 bit/sample - and no less than 1/8 bit/sample */ - quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); - dynalloc_loop_logp = dynalloc_logp; - boost = 0; - while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i]) - { - int flag; - flag = ec_dec_bit_logp(dec, dynalloc_loop_logp); - tell = ec_tell_frac(dec); - if (!flag) - break; - boost += quanta; - total_bits -= quanta; - dynalloc_loop_logp = 1; - } - offsets[i] = boost; - /* Making dynalloc more likely */ - if (boost>0) - dynalloc_logp = IMAX(2, dynalloc_logp-1); - } - - ALLOC(fine_quant, nbEBands, int); - alloc_trim = tell+(6<<BITRES) <= total_bits ? - ec_dec_icdf(dec, trim_icdf, 7) : 5; - - bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1; - anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; - bits -= anti_collapse_rsv; - - ALLOC(pulses, nbEBands, int); - ALLOC(fine_priority, nbEBands, int); - - codedBands = compute_allocation(mode, start, end, offsets, cap, - alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, - fine_quant, fine_priority, C, LM, dec, 0, 0, 0); - - unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); - - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); - } while (++c<CC); - - /* Decode fixed codebook */ - ALLOC(collapse_masks, C*nbEBands, unsigned char); - -#ifdef NORM_ALIASING_HACK - /* This is an ugly hack that breaks aliasing rules and would be easily broken, - but it saves almost 4kB of stack. */ - X = (celt_norm*)(out_syn[CC-1]+overlap/2); -#else - ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ -#endif - - quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, - NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, - len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, st->arch); - - if (anti_collapse_rsv > 0) - { - anti_collapse_on = ec_dec_bits(dec, 1); - } - - unquant_energy_finalise(mode, start, end, oldBandE, - fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); - - if (anti_collapse_on) - anti_collapse(mode, X, collapse_masks, LM, C, N, - start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch); - - if (silence) - { - for (i=0;i<C*nbEBands;i++) - oldBandE[i] = -QCONST16(28.f,DB_SHIFT); - } - - celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, - C, CC, isTransient, LM, st->downsample, silence, st->arch); - - c=0; do { - st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); - st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); - comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize, - st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, - mode->window, overlap, st->arch); - if (LM!=0) - comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize, - st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, - mode->window, overlap, st->arch); - - } while (++c<CC); - st->postfilter_period_old = st->postfilter_period; - st->postfilter_gain_old = st->postfilter_gain; - st->postfilter_tapset_old = st->postfilter_tapset; - st->postfilter_period = postfilter_pitch; - st->postfilter_gain = postfilter_gain; - st->postfilter_tapset = postfilter_tapset; - if (LM!=0) - { - st->postfilter_period_old = st->postfilter_period; - st->postfilter_gain_old = st->postfilter_gain; - st->postfilter_tapset_old = st->postfilter_tapset; - } - - if (C==1) - OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); - - /* In case start or end were to change */ - if (!isTransient) - { - opus_val16 max_background_increase; - OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); - OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); - /* In normal circumstances, we only allow the noise floor to increase by - up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB - increase for each update.*/ - if (st->loss_count < 10) - max_background_increase = M*QCONST16(0.001f,DB_SHIFT); - else - max_background_increase = QCONST16(1.f,DB_SHIFT); - for (i=0;i<2*nbEBands;i++) - backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]); - } else { - for (i=0;i<2*nbEBands;i++) - oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); - } - c=0; do - { - for (i=0;i<start;i++) - { - oldBandE[c*nbEBands+i]=0; - oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); - } - for (i=end;i<nbEBands;i++) - { - oldBandE[c*nbEBands+i]=0; - oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); - } - } while (++c<2); - st->rng = dec->rng; - - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); - st->loss_count = 0; - RESTORE_STACK; - if (ec_tell(dec) > 8*len) - return OPUS_INTERNAL_ERROR; - if(ec_get_error(dec)) - st->error = 1; - return frame_size/st->downsample; -} - - -#ifdef CUSTOM_MODES - -#ifdef FIXED_POINT -int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) -{ - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); -} - -#ifndef DISABLE_FLOAT_API -int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) -{ - int j, ret, C, N; - VARDECL(opus_int16, out); - ALLOC_STACK; - - if (pcm==NULL) - return OPUS_BAD_ARG; - - C = st->channels; - N = frame_size; - - ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); - if (ret>0) - for (j=0;j<C*ret;j++) - pcm[j]=out[j]*(1.f/32768.f); - - RESTORE_STACK; - return ret; -} -#endif /* DISABLE_FLOAT_API */ - -#else - -int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) -{ - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); -} - -int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) -{ - int j, ret, C, N; - VARDECL(celt_sig, out); - ALLOC_STACK; - - if (pcm==NULL) - return OPUS_BAD_ARG; - - C = st->channels; - N = frame_size; - ALLOC(out, C*N, celt_sig); - - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); - - if (ret>0) - for (j=0;j<C*ret;j++) - pcm[j] = FLOAT2INT16 (out[j]); - - RESTORE_STACK; - return ret; -} - -#endif -#endif /* CUSTOM_MODES */ - -int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...) -{ - va_list ap; - - va_start(ap, request); - switch (request) - { - case CELT_SET_START_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<0 || value>=st->mode->nbEBands) - goto bad_arg; - st->start = value; - } - break; - case CELT_SET_END_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>st->mode->nbEBands) - goto bad_arg; - st->end = value; - } - break; - case CELT_SET_CHANNELS_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>2) - goto bad_arg; - st->stream_channels = value; - } - break; - case CELT_GET_AND_CLEAR_ERROR_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) - goto bad_arg; - *value=st->error; - st->error = 0; - } - break; - case OPUS_GET_LOOKAHEAD_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) - goto bad_arg; - *value = st->overlap/st->downsample; - } - break; - case OPUS_RESET_STATE: - { - int i; - opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2; - lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels); - oldBandE = lpc+st->channels*LPC_ORDER; - oldLogE = oldBandE + 2*st->mode->nbEBands; - oldLogE2 = oldLogE + 2*st->mode->nbEBands; - OPUS_CLEAR((char*)&st->DECODER_RESET_START, - opus_custom_decoder_get_size(st->mode, st->channels)- - ((char*)&st->DECODER_RESET_START - (char*)st)); - for (i=0;i<2*st->mode->nbEBands;i++) - oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); - st->skip_plc = 1; - } - break; - case OPUS_GET_PITCH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) - goto bad_arg; - *value = st->postfilter_period; - } - break; - case CELT_GET_MODE_REQUEST: - { - const CELTMode ** value = va_arg(ap, const CELTMode**); - if (value==0) - goto bad_arg; - *value=st->mode; - } - break; - case CELT_SET_SIGNALLING_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->signalling = value; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 * value = va_arg(ap, opus_uint32 *); - if (value==0) - goto bad_arg; - *value=st->rng; - } - break; - default: - goto bad_request; - } - va_end(ap); - return OPUS_OK; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -bad_request: - va_end(ap); - return OPUS_UNIMPLEMENTED; -} diff --git a/thirdparty/opus/celt/celt_encoder.c b/thirdparty/opus/celt/celt_encoder.c deleted file mode 100644 index 3ee7a4d3f7..0000000000 --- a/thirdparty/opus/celt/celt_encoder.c +++ /dev/null @@ -1,2410 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_ENCODER_C - -#include "cpu_support.h" -#include "os_support.h" -#include "mdct.h" -#include <math.h> -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include <stdarg.h> -#include "celt_lpc.h" -#include "vq.h" - - -/** Encoder state - @brief Encoder state - */ -struct OpusCustomEncoder { - const OpusCustomMode *mode; /**< Mode used by the encoder */ - int channels; - int stream_channels; - - int force_intra; - int clip; - int disable_pf; - int complexity; - int upsample; - int start, end; - - opus_int32 bitrate; - int vbr; - int signalling; - int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ - int loss_rate; - int lsb_depth; - int variable_duration; - int lfe; - int arch; - - /* Everything beyond this point gets cleared on a reset */ -#define ENCODER_RESET_START rng - - opus_uint32 rng; - int spread_decision; - opus_val32 delayedIntra; - int tonal_average; - int lastCodedBands; - int hf_average; - int tapset_decision; - - int prefilter_period; - opus_val16 prefilter_gain; - int prefilter_tapset; -#ifdef RESYNTH - int prefilter_period_old; - opus_val16 prefilter_gain_old; - int prefilter_tapset_old; -#endif - int consec_transient; - AnalysisInfo analysis; - - opus_val32 preemph_memE[2]; - opus_val32 preemph_memD[2]; - - /* VBR-related parameters */ - opus_int32 vbr_reservoir; - opus_int32 vbr_drift; - opus_int32 vbr_offset; - opus_int32 vbr_count; - opus_val32 overlap_max; - opus_val16 stereo_saving; - int intensity; - opus_val16 *energy_mask; - opus_val16 spec_avg; - -#ifdef RESYNTH - /* +MAX_PERIOD/2 to make space for overlap */ - celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2]; -#endif - - celt_sig in_mem[1]; /* Size = channels*mode->overlap */ - /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */ - /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */ - /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */ - /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */ -}; - -int celt_encoder_get_size(int channels) -{ - CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); - return opus_custom_encoder_get_size(mode, channels); -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels) -{ - int size = sizeof(struct CELTEncoder) - + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ - + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ - + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ - /* opus_val16 oldLogE[channels*mode->nbEBands]; */ - /* opus_val16 oldLogE2[channels*mode->nbEBands]; */ - return size; -} - -#ifdef CUSTOM_MODES -CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error) -{ - int ret; - CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels)); - /* init will handle the NULL case */ - ret = opus_custom_encoder_init(st, mode, channels); - if (ret != OPUS_OK) - { - opus_custom_encoder_destroy(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} -#endif /* CUSTOM_MODES */ - -static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, - int channels, int arch) -{ - if (channels < 0 || channels > 2) - return OPUS_BAD_ARG; - - if (st==NULL || mode==NULL) - return OPUS_ALLOC_FAIL; - - OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); - - st->mode = mode; - st->stream_channels = st->channels = channels; - - st->upsample = 1; - st->start = 0; - st->end = st->mode->effEBands; - st->signalling = 1; - - st->arch = arch; - - st->constrained_vbr = 1; - st->clip = 1; - - st->bitrate = OPUS_BITRATE_MAX; - st->vbr = 0; - st->force_intra = 0; - st->complexity = 5; - st->lsb_depth=24; - - opus_custom_encoder_ctl(st, OPUS_RESET_STATE); - - return OPUS_OK; -} - -#ifdef CUSTOM_MODES -int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) -{ - return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch()); -} -#endif - -int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, - int arch) -{ - int ret; - ret = opus_custom_encoder_init_arch(st, - opus_custom_mode_create(48000, 960, NULL), channels, arch); - if (ret != OPUS_OK) - return ret; - st->upsample = resampling_factor(sampling_rate); - return OPUS_OK; -} - -#ifdef CUSTOM_MODES -void opus_custom_encoder_destroy(CELTEncoder *st) -{ - opus_free(st); -} -#endif /* CUSTOM_MODES */ - - -static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, - opus_val16 *tf_estimate, int *tf_chan) -{ - int i; - VARDECL(opus_val16, tmp); - opus_val32 mem0,mem1; - int is_transient = 0; - opus_int32 mask_metric = 0; - int c; - opus_val16 tf_max; - int len2; - /* Table of 6*64/x, trained on real data to minimize the average error */ - static const unsigned char inv_table[128] = { - 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, - 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, - 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, - 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, - }; - SAVE_STACK; - ALLOC(tmp, len, opus_val16); - - len2=len/2; - for (c=0;c<C;c++) - { - opus_val32 mean; - opus_int32 unmask=0; - opus_val32 norm; - opus_val16 maxE; - mem0=0; - mem1=0; - /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ - for (i=0;i<len;i++) - { - opus_val32 x,y; - x = SHR32(in[i+c*len],SIG_SHIFT); - y = ADD32(mem0, x); -#ifdef FIXED_POINT - mem0 = mem1 + y - SHL32(x,1); - mem1 = x - SHR32(y,1); -#else - mem0 = mem1 + y - 2*x; - mem1 = x - .5f*y; -#endif - tmp[i] = EXTRACT16(SHR32(y,2)); - /*printf("%f ", tmp[i]);*/ - } - /*printf("\n");*/ - /* First few samples are bad because we don't propagate the memory */ - OPUS_CLEAR(tmp, 12); - -#ifdef FIXED_POINT - /* Normalize tmp to max range */ - { - int shift=0; - shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len)); - if (shift!=0) - { - for (i=0;i<len;i++) - tmp[i] = SHL16(tmp[i], shift); - } - } -#endif - - mean=0; - mem0=0; - /* Grouping by two to reduce complexity */ - /* Forward pass to compute the post-echo threshold*/ - for (i=0;i<len2;i++) - { - opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16); - mean += x2; -#ifdef FIXED_POINT - /* FIXME: Use PSHR16() instead */ - tmp[i] = mem0 + PSHR32(x2-mem0,4); -#else - tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0); -#endif - mem0 = tmp[i]; - } - - mem0=0; - maxE=0; - /* Backward pass to compute the pre-echo threshold */ - for (i=len2-1;i>=0;i--) - { -#ifdef FIXED_POINT - /* FIXME: Use PSHR16() instead */ - tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3); -#else - tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0); -#endif - mem0 = tmp[i]; - maxE = MAX16(maxE, mem0); - } - /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/ - - /* Compute the ratio of the "frame energy" over the harmonic mean of the energy. - This essentially corresponds to a bitrate-normalized temporal noise-to-mask - ratio */ - - /* As a compromise with the old transient detector, frame energy is the - geometric mean of the energy and half the max */ -#ifdef FIXED_POINT - /* Costs two sqrt() to avoid overflows */ - mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1))); -#else - mean = celt_sqrt(mean * maxE*.5*len2); -#endif - /* Inverse of the mean energy in Q15+6 */ - norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1)); - /* Compute harmonic mean discarding the unreliable boundaries - The data is smooth, so we only take 1/4th of the samples */ - unmask=0; - for (i=12;i<len2-5;i+=4) - { - int id; -#ifdef FIXED_POINT - id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ -#else - id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ -#endif - unmask += inv_table[id]; - } - /*printf("%d\n", unmask);*/ - /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */ - unmask = 64*unmask*4/(6*(len2-17)); - if (unmask>mask_metric) - { - *tf_chan = c; - mask_metric = unmask; - } - } - is_transient = mask_metric>200; - - /* Arbitrary metric for VBR boost */ - tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); - /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ - *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); - /*printf("%d %f\n", tf_max, mask_metric);*/ - RESTORE_STACK; -#ifdef FUZZING - is_transient = rand()&0x1; -#endif - /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/ - return is_transient; -} - -/* Looks for sudden increases of energy to decide whether we need to patch - the transient decision */ -static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, - int start, int end, int C) -{ - int i, c; - opus_val32 mean_diff=0; - opus_val16 spread_old[26]; - /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to - avoid false detection caused by irrelevant bands */ - if (C==1) - { - spread_old[start] = oldE[start]; - for (i=start+1;i<end;i++) - spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]); - } else { - spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]); - for (i=start+1;i<end;i++) - spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), - MAX16(oldE[i],oldE[i+nbEBands])); - } - for (i=end-2;i>=start;i--) - spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); - /* Compute mean increase */ - c=0; do { - for (i=IMAX(2,start);i<end-1;i++) - { - opus_val16 x1, x2; - x1 = MAX16(0, newE[i + c*nbEBands]); - x2 = MAX16(0, spread_old[i]); - mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2)))); - } - } while (++c<C); - mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start))); - /*printf("%f %f %d\n", mean_diff, max_diff, count);*/ - return mean_diff > QCONST16(1.f, DB_SHIFT); -} - -/** Apply window and compute the MDCT for all sub-frames and - all channels in a frame */ -static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, - int arch) -{ - const int overlap = mode->overlap; - int N; - int B; - int shift; - int i, b, c; - if (shortBlocks) - { - B = shortBlocks; - N = mode->shortMdctSize; - shift = mode->maxLM; - } else { - B = 1; - N = mode->shortMdctSize<<LM; - shift = mode->maxLM-LM; - } - c=0; do { - for (b=0;b<B;b++) - { - /* Interleaving the sub-frames while doing the MDCTs */ - clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, - &out[b+c*N*B], mode->window, overlap, shift, B, - arch); - } - } while (++c<CC); - if (CC==2&&C==1) - { - for (i=0;i<B*N;i++) - out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i])); - } - if (upsample != 1) - { - c=0; do - { - int bound = B*N/upsample; - for (i=0;i<bound;i++) - out[c*B*N+i] *= upsample; - OPUS_CLEAR(&out[c*B*N+bound], B*N-bound); - } while (++c<C); - } -} - - -void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, - int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) -{ - int i; - opus_val16 coef0; - celt_sig m; - int Nu; - - coef0 = coef[0]; - m = *mem; - - /* Fast path for the normal 48kHz case and no clipping */ - if (coef[1] == 0 && upsample == 1 && !clip) - { - for (i=0;i<N;i++) - { - opus_val16 x; - x = SCALEIN(pcmp[CC*i]); - /* Apply pre-emphasis */ - inp[i] = SHL32(x, SIG_SHIFT) - m; - m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT); - } - *mem = m; - return; - } - - Nu = N/upsample; - if (upsample!=1) - { - OPUS_CLEAR(inp, N); - } - for (i=0;i<Nu;i++) - inp[i*upsample] = SCALEIN(pcmp[CC*i]); - -#ifndef FIXED_POINT - if (clip) - { - /* Clip input to avoid encoding non-portable files */ - for (i=0;i<Nu;i++) - inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample])); - } -#else - (void)clip; /* Avoids a warning about clip being unused. */ -#endif -#ifdef CUSTOM_MODES - if (coef[1] != 0) - { - opus_val16 coef1 = coef[1]; - opus_val16 coef2 = coef[2]; - for (i=0;i<N;i++) - { - celt_sig x, tmp; - x = inp[i]; - /* Apply pre-emphasis */ - tmp = MULT16_16(coef2, x); - inp[i] = tmp + m; - m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp); - } - } else -#endif - { - for (i=0;i<N;i++) - { - opus_val16 x; - x = inp[i]; - /* Apply pre-emphasis */ - inp[i] = SHL32(x, SIG_SHIFT) - m; - m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT); - } - } - *mem = m; -} - - - -static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias) -{ - int i; - opus_val32 L1; - L1 = 0; - for (i=0;i<N;i++) - L1 += EXTEND32(ABS16(tmp[i])); - /* When in doubt, prefer good freq resolution */ - L1 = MAC16_32_Q15(L1, LM*bias, L1); - return L1; - -} - -static int tf_analysis(const CELTMode *m, int len, int isTransient, - int *tf_res, int lambda, celt_norm *X, int N0, int LM, - int *tf_sum, opus_val16 tf_estimate, int tf_chan) -{ - int i; - VARDECL(int, metric); - int cost0; - int cost1; - VARDECL(int, path0); - VARDECL(int, path1); - VARDECL(celt_norm, tmp); - VARDECL(celt_norm, tmp_1); - int sel; - int selcost[2]; - int tf_select=0; - opus_val16 bias; - - SAVE_STACK; - bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate)); - /*printf("%f ", bias);*/ - - ALLOC(metric, len, int); - ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); - ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); - ALLOC(path0, len, int); - ALLOC(path1, len, int); - - *tf_sum = 0; - for (i=0;i<len;i++) - { - int k, N; - int narrow; - opus_val32 L1, best_L1; - int best_level=0; - N = (m->eBands[i+1]-m->eBands[i])<<LM; - /* band is too narrow to be split down to LM=-1 */ - narrow = (m->eBands[i+1]-m->eBands[i])==1; - OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N); - /* Just add the right channel if we're in stereo */ - /*if (C==2) - for (j=0;j<N;j++) - tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/ - L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias); - best_L1 = L1; - /* Check the -1 case for transients */ - if (isTransient && !narrow) - { - OPUS_COPY(tmp_1, tmp, N); - haar1(tmp_1, N>>LM, 1<<LM); - L1 = l1_metric(tmp_1, N, LM+1, bias); - if (L1<best_L1) - { - best_L1 = L1; - best_level = -1; - } - } - /*printf ("%f ", L1);*/ - for (k=0;k<LM+!(isTransient||narrow);k++) - { - int B; - - if (isTransient) - B = (LM-k-1); - else - B = k+1; - - haar1(tmp, N>>k, 1<<k); - - L1 = l1_metric(tmp, N, B, bias); - - if (L1 < best_L1) - { - best_L1 = L1; - best_level = k+1; - } - } - /*printf ("%d ", isTransient ? LM-best_level : best_level);*/ - /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */ - if (isTransient) - metric[i] = 2*best_level; - else - metric[i] = -2*best_level; - *tf_sum += (isTransient ? LM : 0) - metric[i]/2; - /* For bands that can't be split to -1, set the metric to the half-way point to avoid - biasing the decision */ - if (narrow && (metric[i]==0 || metric[i]==-2*LM)) - metric[i]-=1; - /*printf("%d ", metric[i]);*/ - } - /*printf("\n");*/ - /* Search for the optimal tf resolution, including tf_select */ - tf_select = 0; - for (sel=0;sel<2;sel++) - { - cost0 = 0; - cost1 = isTransient ? 0 : lambda; - for (i=1;i<len;i++) - { - int curr0, curr1; - curr0 = IMIN(cost0, cost1 + lambda); - curr1 = IMIN(cost0 + lambda, cost1); - cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); - cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); - } - cost0 = IMIN(cost0, cost1); - selcost[sel]=cost0; - } - /* For now, we're conservative and only allow tf_select=1 for transients. - * If tests confirm it's useful for non-transients, we could allow it. */ - if (selcost[1]<selcost[0] && isTransient) - tf_select=1; - cost0 = 0; - cost1 = isTransient ? 0 : lambda; - /* Viterbi forward pass */ - for (i=1;i<len;i++) - { - int curr0, curr1; - int from0, from1; - - from0 = cost0; - from1 = cost1 + lambda; - if (from0 < from1) - { - curr0 = from0; - path0[i]= 0; - } else { - curr0 = from1; - path0[i]= 1; - } - - from0 = cost0 + lambda; - from1 = cost1; - if (from0 < from1) - { - curr1 = from0; - path1[i]= 0; - } else { - curr1 = from1; - path1[i]= 1; - } - cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); - cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); - } - tf_res[len-1] = cost0 < cost1 ? 0 : 1; - /* Viterbi backward pass to check the decisions */ - for (i=len-2;i>=0;i--) - { - if (tf_res[i+1] == 1) - tf_res[i] = path1[i+1]; - else - tf_res[i] = path0[i+1]; - } - /*printf("%d %f\n", *tf_sum, tf_estimate);*/ - RESTORE_STACK; -#ifdef FUZZING - tf_select = rand()&0x1; - tf_res[0] = rand()&0x1; - for (i=1;i<len;i++) - tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0); -#endif - return tf_select; -} - -static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) -{ - int curr, i; - int tf_select_rsv; - int tf_changed; - int logp; - opus_uint32 budget; - opus_uint32 tell; - budget = enc->storage*8; - tell = ec_tell(enc); - logp = isTransient ? 2 : 4; - /* Reserve space to code the tf_select decision. */ - tf_select_rsv = LM>0 && tell+logp+1 <= budget; - budget -= tf_select_rsv; - curr = tf_changed = 0; - for (i=start;i<end;i++) - { - if (tell+logp<=budget) - { - ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp); - tell = ec_tell(enc); - curr = tf_res[i]; - tf_changed |= curr; - } - else - tf_res[i] = curr; - logp = isTransient ? 4 : 5; - } - /* Only code tf_select if it would actually make a difference. */ - if (tf_select_rsv && - tf_select_table[LM][4*isTransient+0+tf_changed]!= - tf_select_table[LM][4*isTransient+2+tf_changed]) - ec_enc_bit_logp(enc, tf_select, 1); - else - tf_select = 0; - for (i=start;i<end;i++) - tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; - /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/ -} - - -static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, - const opus_val16 *bandLogE, int end, int LM, int C, int N0, - AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, - int intensity, opus_val16 surround_trim, int arch) -{ - int i; - opus_val32 diff=0; - int c; - int trim_index; - opus_val16 trim = QCONST16(5.f, 8); - opus_val16 logXC, logXC2; - if (C==2) - { - opus_val16 sum = 0; /* Q10 */ - opus_val16 minXC; /* Q10 */ - /* Compute inter-channel correlation for low frequencies */ - for (i=0;i<8;i++) - { - opus_val32 partial; - partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], - (m->eBands[i+1]-m->eBands[i])<<LM, arch); - sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); - } - sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); - sum = MIN16(QCONST16(1.f, 10), ABS16(sum)); - minXC = sum; - for (i=8;i<intensity;i++) - { - opus_val32 partial; - partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], - (m->eBands[i+1]-m->eBands[i])<<LM, arch); - minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18)))); - } - minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC)); - /*printf ("%f\n", sum);*/ - /* mid-side savings estimations based on the LF average*/ - logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); - /* mid-side savings estimations based on min correlation */ - logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC))); -#ifdef FIXED_POINT - /* Compensate for Q20 vs Q14 input and convert output to Q8 */ - logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); - logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); -#endif - - trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC)); - *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2)); - } - - /* Estimate spectral tilt */ - c=0; do { - for (i=0;i<end-1;i++) - { - diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end); - } - } while (++c<C); - diff /= C*(end-1); - /*printf("%f\n", diff);*/ - trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); - trim -= SHR16(surround_trim, DB_SHIFT-8); - trim -= 2*SHR16(tf_estimate, 14-8); -#ifndef DISABLE_FLOAT_API - if (analysis->valid) - { - trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), - (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); - } -#else - (void)analysis; -#endif - -#ifdef FIXED_POINT - trim_index = PSHR32(trim, 8); -#else - trim_index = (int)floor(.5f+trim); -#endif - trim_index = IMAX(0, IMIN(10, trim_index)); - /*printf("%d\n", trim_index);*/ -#ifdef FUZZING - trim_index = rand()%11; -#endif - return trim_index; -} - -static int stereo_analysis(const CELTMode *m, const celt_norm *X, - int LM, int N0) -{ - int i; - int thetas; - opus_val32 sumLR = EPSILON, sumMS = EPSILON; - - /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */ - for (i=0;i<13;i++) - { - int j; - for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) - { - opus_val32 L, R, M, S; - /* We cast to 32-bit first because of the -32768 case */ - L = EXTEND32(X[j]); - R = EXTEND32(X[N0+j]); - M = ADD32(L, R); - S = SUB32(L, R); - sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R))); - sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S))); - } - } - sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS); - thetas = 13; - /* We don't need thetas for lower bands with LM<=1 */ - if (LM<=1) - thetas -= 8; - return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS) - > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); -} - -#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0) -static opus_val16 median_of_5(const opus_val16 *x) -{ - opus_val16 t0, t1, t2, t3, t4; - t2 = x[2]; - if (x[0] > x[1]) - { - t0 = x[1]; - t1 = x[0]; - } else { - t0 = x[0]; - t1 = x[1]; - } - if (x[3] > x[4]) - { - t3 = x[4]; - t4 = x[3]; - } else { - t3 = x[3]; - t4 = x[4]; - } - if (t0 > t3) - { - MSWAP(t0, t3); - MSWAP(t1, t4); - } - if (t2 > t1) - { - if (t1 < t3) - return MIN16(t2, t3); - else - return MIN16(t4, t1); - } else { - if (t2 < t3) - return MIN16(t1, t3); - else - return MIN16(t2, t4); - } -} - -static opus_val16 median_of_3(const opus_val16 *x) -{ - opus_val16 t0, t1, t2; - if (x[0] > x[1]) - { - t0 = x[1]; - t1 = x[0]; - } else { - t0 = x[0]; - t1 = x[1]; - } - t2 = x[2]; - if (t1 < t2) - return t1; - else if (t0 < t2) - return t2; - else - return t0; -} - -static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, - int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, - int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) -{ - int i, c; - opus_int32 tot_boost=0; - opus_val16 maxDepth; - VARDECL(opus_val16, follower); - VARDECL(opus_val16, noise_floor); - SAVE_STACK; - ALLOC(follower, C*nbEBands, opus_val16); - ALLOC(noise_floor, C*nbEBands, opus_val16); - OPUS_CLEAR(offsets, nbEBands); - /* Dynamic allocation code */ - maxDepth=-QCONST16(31.9f, DB_SHIFT); - for (i=0;i<end;i++) - { - /* Noise floor must take into account eMeans, the depth, the width of the bands - and the preemphasis filter (approx. square of bark band ID) */ - noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i]) - +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6) - +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5)); - } - c=0;do - { - for (i=0;i<end;i++) - maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); - } while (++c<C); - /* Make sure that dynamic allocation can't make us bust the budget */ - if (effectiveBytes > 50 && LM>=1 && !lfe) - { - int last=0; - c=0;do - { - opus_val16 offset; - opus_val16 tmp; - opus_val16 *f; - f = &follower[c*nbEBands]; - f[0] = bandLogE2[c*nbEBands]; - for (i=1;i<end;i++) - { - /* The last band to be at least 3 dB higher than the previous one - is the last we'll consider. Otherwise, we run into problems on - bandlimited signals. */ - if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) - last=i; - f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); - } - for (i=last-1;i>=0;i--) - f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); - - /* Combine with a median filter to avoid dynalloc triggering unnecessarily. - The "offset" value controls how conservative we are -- a higher offset - reduces the impact of the median filter and makes dynalloc use more bits. */ - offset = QCONST16(1.f, DB_SHIFT); - for (i=2;i<end-2;i++) - f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset); - tmp = median_of_3(&bandLogE2[c*nbEBands])-offset; - f[0] = MAX16(f[0], tmp); - f[1] = MAX16(f[1], tmp); - tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset; - f[end-2] = MAX16(f[end-2], tmp); - f[end-1] = MAX16(f[end-1], tmp); - - for (i=0;i<end;i++) - f[i] = MAX16(f[i], noise_floor[i]); - } while (++c<C); - if (C==2) - { - for (i=start;i<end;i++) - { - /* Consider 24 dB "cross-talk" */ - follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT)); - follower[ i] = MAX16(follower[ i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT)); - follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i])); - } - } else { - for (i=start;i<end;i++) - { - follower[i] = MAX16(0, bandLogE[i]-follower[i]); - } - } - for (i=start;i<end;i++) - follower[i] = MAX16(follower[i], surround_dynalloc[i]); - /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ - if ((!vbr || constrained_vbr)&&!isTransient) - { - for (i=start;i<end;i++) - follower[i] = HALF16(follower[i]); - } - for (i=start;i<end;i++) - { - int width; - int boost; - int boost_bits; - - if (i<8) - follower[i] *= 2; - if (i>=12) - follower[i] = HALF16(follower[i]); - follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); - - width = C*(eBands[i+1]-eBands[i])<<LM; - if (width<6) - { - boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT); - boost_bits = boost*width<<BITRES; - } else if (width > 48) { - boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT); - boost_bits = (boost*width<<BITRES)/8; - } else { - boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT); - boost_bits = boost*6<<BITRES; - } - /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */ - if ((!vbr || (constrained_vbr&&!isTransient)) - && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4) - { - opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3); - offsets[i] = cap-tot_boost; - tot_boost = cap; - break; - } else { - offsets[i] = boost; - tot_boost += boost_bits; - } - } - } - *tot_boost_ = tot_boost; - RESTORE_STACK; - return maxDepth; -} - - -static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N, - int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes) -{ - int c; - VARDECL(celt_sig, _pre); - celt_sig *pre[2]; - const CELTMode *mode; - int pitch_index; - opus_val16 gain1; - opus_val16 pf_threshold; - int pf_on; - int qg; - int overlap; - SAVE_STACK; - - mode = st->mode; - overlap = mode->overlap; - ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); - - pre[0] = _pre; - pre[1] = _pre + (N+COMBFILTER_MAXPERIOD); - - - c=0; do { - OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); - OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N); - } while (++c<CC); - - if (enabled) - { - VARDECL(opus_val16, pitch_buf); - ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); - - pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch); - /* Don't search for the fir last 1.5 octave of the range because - there's too many false-positives due to short-term correlation */ - pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, - COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index, - st->arch); - pitch_index = COMBFILTER_MAXPERIOD-pitch_index; - - gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, - N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); - if (pitch_index > COMBFILTER_MAXPERIOD-2) - pitch_index = COMBFILTER_MAXPERIOD-2; - gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); - /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ - if (st->loss_rate>2) - gain1 = HALF32(gain1); - if (st->loss_rate>4) - gain1 = HALF32(gain1); - if (st->loss_rate>8) - gain1 = 0; - } else { - gain1 = 0; - pitch_index = COMBFILTER_MINPERIOD; - } - - /* Gain threshold for enabling the prefilter/postfilter */ - pf_threshold = QCONST16(.2f,15); - - /* Adjusting the threshold based on rate and continuity */ - if (abs(pitch_index-st->prefilter_period)*10>pitch_index) - pf_threshold += QCONST16(.2f,15); - if (nbAvailableBytes<25) - pf_threshold += QCONST16(.1f,15); - if (nbAvailableBytes<35) - pf_threshold += QCONST16(.1f,15); - if (st->prefilter_gain > QCONST16(.4f,15)) - pf_threshold -= QCONST16(.1f,15); - if (st->prefilter_gain > QCONST16(.55f,15)) - pf_threshold -= QCONST16(.1f,15); - - /* Hard threshold at 0.2 */ - pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15)); - if (gain1<pf_threshold) - { - gain1 = 0; - pf_on = 0; - qg = 0; - } else { - /*This block is not gated by a total bits check only because - of the nbAvailableBytes check above.*/ - if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15)) - gain1=st->prefilter_gain; - -#ifdef FIXED_POINT - qg = ((gain1+1536)>>10)/3-1; -#else - qg = (int)floor(.5f+gain1*32/3)-1; -#endif - qg = IMAX(0, IMIN(7, qg)); - gain1 = QCONST16(0.09375f,15)*(qg+1); - pf_on = 1; - } - /*printf("%d %f\n", pitch_index, gain1);*/ - - c=0; do { - int offset = mode->shortMdctSize-overlap; - st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); - if (offset) - comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD, - st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, - st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); - - comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, - st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, - st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); - OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); - - if (N>COMBFILTER_MAXPERIOD) - { - OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); - } else { - OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); - OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); - } - } while (++c<CC); - - RESTORE_STACK; - *gain = gain1; - *pitch = pitch_index; - *qgain = qg; - return pf_on; -} - -static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target, - int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, - int constrained_vbr, opus_val16 stereo_saving, int tot_boost, - opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth, - int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking, - opus_val16 temporal_vbr) -{ - /* The target rate in 8th bits per frame */ - opus_int32 target; - int coded_bins; - int coded_bands; - opus_val16 tf_calibration; - int nbEBands; - const opus_int16 *eBands; - - nbEBands = mode->nbEBands; - eBands = mode->eBands; - - coded_bands = lastCodedBands ? lastCodedBands : nbEBands; - coded_bins = eBands[coded_bands]<<LM; - if (C==2) - coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM; - - target = base_target; - - /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ -#ifndef DISABLE_FLOAT_API - if (analysis->valid && analysis->activity<.4) - target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); -#endif - /* Stereo savings */ - if (C==2) - { - int coded_stereo_bands; - int coded_stereo_dof; - opus_val16 max_frac; - coded_stereo_bands = IMIN(intensity, coded_bands); - coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands; - /* Maximum fraction of the bits we can save if the signal is mono. */ - max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); - stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8)); - /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ - target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), - SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); - } - /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ - target += tot_boost-(16<<LM); - /* Apply transient boost, compensating for average boost. */ - tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ? - QCONST16(0.02f,14) : QCONST16(0.04f,14); - target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); - -#ifndef DISABLE_FLOAT_API - /* Apply tonality boost */ - if (analysis->valid && !lfe) - { - opus_int32 tonal_target; - float tonal; - - /* Tonality boost (compensating for the average). */ - tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f; - tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); - if (pitch_change) - tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); - /*printf("%f %f ", analysis->tonality, tonal);*/ - target = tonal_target; - } -#else - (void)analysis; - (void)pitch_change; -#endif - - if (has_surround_mask&&!lfe) - { - opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT); - /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/ - target = IMAX(target/4, surround_target); - } - - { - opus_int32 floor_depth; - int bins; - bins = eBands[nbEBands-2]<<LM; - /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/ - floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT); - floor_depth = IMAX(floor_depth, target>>2); - target = IMIN(target, floor_depth); - /*printf("%f %d\n", maxDepth, floor_depth);*/ - } - - if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) - { - opus_val16 rate_factor = Q15ONE; - if (bitrate < 64000) - { -#ifdef FIXED_POINT - rate_factor = MAX16(0,(bitrate-32000)); -#else - rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); -#endif - } - if (constrained_vbr) - rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); - target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); - - } - - if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) - { - opus_val16 amount; - opus_val16 tvbr_factor; - amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate))); - tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT); - target += (opus_int32)MULT16_32_Q15(tvbr_factor, target); - } - - /* Don't allow more than doubling the rate */ - target = IMIN(2*base_target, target); - - return target; -} - -int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc) -{ - int i, c, N; - opus_int32 bits; - ec_enc _enc; - VARDECL(celt_sig, in); - VARDECL(celt_sig, freq); - VARDECL(celt_norm, X); - VARDECL(celt_ener, bandE); - VARDECL(opus_val16, bandLogE); - VARDECL(opus_val16, bandLogE2); - VARDECL(int, fine_quant); - VARDECL(opus_val16, error); - VARDECL(int, pulses); - VARDECL(int, cap); - VARDECL(int, offsets); - VARDECL(int, fine_priority); - VARDECL(int, tf_res); - VARDECL(unsigned char, collapse_masks); - celt_sig *prefilter_mem; - opus_val16 *oldBandE, *oldLogE, *oldLogE2; - int shortBlocks=0; - int isTransient=0; - const int CC = st->channels; - const int C = st->stream_channels; - int LM, M; - int tf_select; - int nbFilledBytes, nbAvailableBytes; - int start; - int end; - int effEnd; - int codedBands; - int tf_sum; - int alloc_trim; - int pitch_index=COMBFILTER_MINPERIOD; - opus_val16 gain1 = 0; - int dual_stereo=0; - int effectiveBytes; - int dynalloc_logp; - opus_int32 vbr_rate; - opus_int32 total_bits; - opus_int32 total_boost; - opus_int32 balance; - opus_int32 tell; - int prefilter_tapset=0; - int pf_on; - int anti_collapse_rsv; - int anti_collapse_on=0; - int silence=0; - int tf_chan = 0; - opus_val16 tf_estimate; - int pitch_change=0; - opus_int32 tot_boost; - opus_val32 sample_max; - opus_val16 maxDepth; - const OpusCustomMode *mode; - int nbEBands; - int overlap; - const opus_int16 *eBands; - int secondMdct; - int signalBandwidth; - int transient_got_disabled=0; - opus_val16 surround_masking=0; - opus_val16 temporal_vbr=0; - opus_val16 surround_trim = 0; - opus_int32 equiv_rate = 510000; - VARDECL(opus_val16, surround_dynalloc); - ALLOC_STACK; - - mode = st->mode; - nbEBands = mode->nbEBands; - overlap = mode->overlap; - eBands = mode->eBands; - start = st->start; - end = st->end; - tf_estimate = 0; - if (nbCompressedBytes<2 || pcm==NULL) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - frame_size *= st->upsample; - for (LM=0;LM<=mode->maxLM;LM++) - if (mode->shortMdctSize<<LM==frame_size) - break; - if (LM>mode->maxLM) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - M=1<<LM; - N = M*mode->shortMdctSize; - - prefilter_mem = st->in_mem+CC*(overlap); - oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); - oldLogE = oldBandE + CC*nbEBands; - oldLogE2 = oldLogE + CC*nbEBands; - - if (enc==NULL) - { - tell=1; - nbFilledBytes=0; - } else { - tell=ec_tell(enc); - nbFilledBytes=(tell+4)>>3; - } - -#ifdef CUSTOM_MODES - if (st->signalling && enc==NULL) - { - int tmp = (mode->effEBands-end)>>1; - end = st->end = IMAX(1, mode->effEBands-tmp); - compressed[0] = tmp<<5; - compressed[0] |= LM<<3; - compressed[0] |= (C==2)<<2; - /* Convert "standard mode" to Opus header */ - if (mode->Fs==48000 && mode->shortMdctSize==120) - { - int c0 = toOpus(compressed[0]); - if (c0<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - compressed[0] = c0; - } - compressed++; - nbCompressedBytes--; - } -#else - celt_assert(st->signalling==0); -#endif - - /* Can't produce more than 1275 output bytes */ - nbCompressedBytes = IMIN(nbCompressedBytes,1275); - nbAvailableBytes = nbCompressedBytes - nbFilledBytes; - - if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX) - { - opus_int32 den=mode->Fs>>BITRES; - vbr_rate=(st->bitrate*frame_size+(den>>1))/den; -#ifdef CUSTOM_MODES - if (st->signalling) - vbr_rate -= 8<<BITRES; -#endif - effectiveBytes = vbr_rate>>(3+BITRES); - } else { - opus_int32 tmp; - vbr_rate = 0; - tmp = st->bitrate*frame_size; - if (tell>1) - tmp += tell; - if (st->bitrate!=OPUS_BITRATE_MAX) - nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, - (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); - effectiveBytes = nbCompressedBytes; - } - if (st->bitrate != OPUS_BITRATE_MAX) - equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50); - - if (enc==NULL) - { - ec_enc_init(&_enc, compressed, nbCompressedBytes); - enc = &_enc; - } - - if (vbr_rate>0) - { - /* Computes the max bit-rate allowed in VBR mode to avoid violating the - target rate and buffering. - We must do this up front so that bust-prevention logic triggers - correctly if we don't have enough bits. */ - if (st->constrained_vbr) - { - opus_int32 vbr_bound; - opus_int32 max_allowed; - /* We could use any multiple of vbr_rate as bound (depending on the - delay). - This is clamped to ensure we use at least two bytes if the encoder - was entirely empty, but to allow 0 in hybrid mode. */ - vbr_bound = vbr_rate; - max_allowed = IMIN(IMAX(tell==1?2:0, - (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)), - nbAvailableBytes); - if(max_allowed < nbAvailableBytes) - { - nbCompressedBytes = nbFilledBytes+max_allowed; - nbAvailableBytes = max_allowed; - ec_enc_shrink(enc, nbCompressedBytes); - } - } - } - total_bits = nbCompressedBytes*8; - - effEnd = end; - if (effEnd > mode->effEBands) - effEnd = mode->effEBands; - - ALLOC(in, CC*(N+overlap), celt_sig); - - sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); - st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); - sample_max=MAX32(sample_max, st->overlap_max); -#ifdef FIXED_POINT - silence = (sample_max==0); -#else - silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth)); -#endif -#ifdef FUZZING - if ((rand()&0x3F)==0) - silence = 1; -#endif - if (tell==1) - ec_enc_bit_logp(enc, silence, 15); - else - silence=0; - if (silence) - { - /*In VBR mode there is no need to send more than the minimum. */ - if (vbr_rate>0) - { - effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2); - total_bits=nbCompressedBytes*8; - nbAvailableBytes=2; - ec_enc_shrink(enc, nbCompressedBytes); - } - /* Pretend we've filled all the remaining bits with zeros - (that's what the initialiser did anyway) */ - tell = nbCompressedBytes*8; - enc->nbits_total+=tell-ec_tell(enc); - } - c=0; do { - int need_clip=0; -#ifndef FIXED_POINT - need_clip = st->clip && sample_max>65536.f; -#endif - celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, - mode->preemph, st->preemph_memE+c, need_clip); - } while (++c<CC); - - - - /* Find pitch period and gain */ - { - int enabled; - int qg; - enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf - && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); - - prefilter_tapset = st->tapset_decision; - pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); - if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) - && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) - pitch_change = 1; - if (pf_on==0) - { - if(start==0 && tell+16<=total_bits) - ec_enc_bit_logp(enc, 0, 1); - } else { - /*This block is not gated by a total bits check only because - of the nbAvailableBytes check above.*/ - int octave; - ec_enc_bit_logp(enc, 1, 1); - pitch_index += 1; - octave = EC_ILOG(pitch_index)-5; - ec_enc_uint(enc, octave, 6); - ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave); - pitch_index -= 1; - ec_enc_bits(enc, qg, 3); - ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2); - } - } - - isTransient = 0; - shortBlocks = 0; - if (st->complexity >= 1 && !st->lfe) - { - isTransient = transient_analysis(in, N+overlap, CC, - &tf_estimate, &tf_chan); - } - if (LM>0 && ec_tell(enc)+3<=total_bits) - { - if (isTransient) - shortBlocks = M; - } else { - isTransient = 0; - transient_got_disabled=1; - } - - ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ - ALLOC(bandE,nbEBands*CC, celt_ener); - ALLOC(bandLogE,nbEBands*CC, opus_val16); - - secondMdct = shortBlocks && st->complexity>=8; - ALLOC(bandLogE2, C*nbEBands, opus_val16); - if (secondMdct) - { - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); - compute_band_energies(mode, freq, bandE, effEnd, C, LM); - amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); - for (i=0;i<C*nbEBands;i++) - bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); - } - - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); - if (CC==2&&C==1) - tf_chan = 0; - compute_band_energies(mode, freq, bandE, effEnd, C, LM); - - if (st->lfe) - { - for (i=2;i<end;i++) - { - bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); - bandE[i] = MAX32(bandE[i], EPSILON); - } - } - amp2Log2(mode, effEnd, end, bandE, bandLogE, C); - - ALLOC(surround_dynalloc, C*nbEBands, opus_val16); - OPUS_CLEAR(surround_dynalloc, end); - /* This computes how much masking takes place between surround channels */ - if (start==0&&st->energy_mask&&!st->lfe) - { - int mask_end; - int midband; - int count_dynalloc; - opus_val32 mask_avg=0; - opus_val32 diff=0; - int count=0; - mask_end = IMAX(2,st->lastCodedBands); - for (c=0;c<C;c++) - { - for(i=0;i<mask_end;i++) - { - opus_val16 mask; - mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i], - QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); - if (mask > 0) - mask = HALF16(mask); - mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); - count += eBands[i+1]-eBands[i]; - diff += MULT16_16(mask, 1+2*i-mask_end); - } - } - celt_assert(count>0); - mask_avg = DIV32_16(mask_avg,count); - mask_avg += QCONST16(.2f, DB_SHIFT); - diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); - /* Again, being conservative */ - diff = HALF32(diff); - diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); - /* Find the band that's in the middle of the coded spectrum */ - for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); - count_dynalloc=0; - for(i=0;i<mask_end;i++) - { - opus_val32 lin; - opus_val16 unmask; - lin = mask_avg + diff*(i-midband); - if (C==2) - unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]); - else - unmask = st->energy_mask[i]; - unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); - unmask -= lin; - if (unmask > QCONST16(.25f, DB_SHIFT)) - { - surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); - count_dynalloc++; - } - } - if (count_dynalloc>=3) - { - /* If we need dynalloc in many bands, it's probably because our - initial masking rate was too low. */ - mask_avg += QCONST16(.25f, DB_SHIFT); - if (mask_avg>0) - { - /* Something went really wrong in the original calculations, - disabling masking. */ - mask_avg = 0; - diff = 0; - OPUS_CLEAR(surround_dynalloc, mask_end); - } else { - for(i=0;i<mask_end;i++) - surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); - } - } - mask_avg += QCONST16(.2f, DB_SHIFT); - /* Convert to 1/64th units used for the trim */ - surround_trim = 64*diff; - /*printf("%d %d ", mask_avg, surround_trim);*/ - surround_masking = mask_avg; - } - /* Temporal VBR (but not for LFE) */ - if (!st->lfe) - { - opus_val16 follow=-QCONST16(10.0f,DB_SHIFT); - opus_val32 frame_avg=0; - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; - for(i=start;i<end;i++) - { - follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset); - if (C==2) - follow = MAX16(follow, bandLogE[i+nbEBands]-offset); - frame_avg += follow; - } - frame_avg /= (end-start); - temporal_vbr = SUB16(frame_avg,st->spec_avg); - temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); - st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); - } - /*for (i=0;i<21;i++) - printf("%f ", bandLogE[i]); - printf("\n");*/ - - if (!secondMdct) - { - OPUS_COPY(bandLogE2, bandLogE, C*nbEBands); - } - - /* Last chance to catch any transient we might have missed in the - time-domain analysis */ - if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) - { - if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) - { - isTransient = 1; - shortBlocks = M; - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); - compute_band_energies(mode, freq, bandE, effEnd, C, LM); - amp2Log2(mode, effEnd, end, bandE, bandLogE, C); - /* Compensate for the scaling of short vs long mdcts */ - for (i=0;i<C*nbEBands;i++) - bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); - tf_estimate = QCONST16(.2f,14); - } - } - - if (LM>0 && ec_tell(enc)+3<=total_bits) - ec_enc_bit_logp(enc, isTransient, 3); - - ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ - - /* Band normalisation */ - normalise_bands(mode, freq, X, bandE, effEnd, C, M); - - ALLOC(tf_res, nbEBands, int); - /* Disable variable tf resolution for hybrid and at very low bitrate */ - if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) - { - int lambda; - if (effectiveBytes<40) - lambda = 12; - else if (effectiveBytes<60) - lambda = 6; - else if (effectiveBytes<100) - lambda = 4; - else - lambda = 3; - lambda*=2; - tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); - for (i=effEnd;i<end;i++) - tf_res[i] = tf_res[effEnd-1]; - } else { - tf_sum = 0; - for (i=0;i<end;i++) - tf_res[i] = isTransient; - tf_select=0; - } - - ALLOC(error, C*nbEBands, opus_val16); - quant_coarse_energy(mode, start, end, effEnd, bandLogE, - oldBandE, total_bits, error, enc, - C, LM, nbAvailableBytes, st->force_intra, - &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); - - tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); - - if (ec_tell(enc)+4<=total_bits) - { - if (st->lfe) - { - st->tapset_decision = 0; - st->spread_decision = SPREAD_NORMAL; - } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) - { - if (st->complexity == 0) - st->spread_decision = SPREAD_NONE; - else - st->spread_decision = SPREAD_NORMAL; - } else { - /* Disable new spreading+tapset estimator until we can show it works - better than the old one. So far it seems like spreading_decision() - works best. */ -#if 0 - if (st->analysis.valid) - { - static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; - static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; - static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; - static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; - st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); - st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); - } else -#endif - { - st->spread_decision = spreading_decision(mode, X, - &st->tonal_average, st->spread_decision, &st->hf_average, - &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); - } - /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ - /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ - } - ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); - } - - ALLOC(offsets, nbEBands, int); - - maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, - st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); - /* For LFE, everything interesting is in the first band */ - if (st->lfe) - offsets[0] = IMIN(8, effectiveBytes/3); - ALLOC(cap, nbEBands, int); - init_caps(mode,cap,LM,C); - - dynalloc_logp = 6; - total_bits<<=BITRES; - total_boost = 0; - tell = ec_tell_frac(enc); - for (i=start;i<end;i++) - { - int width, quanta; - int dynalloc_loop_logp; - int boost; - int j; - width = C*(eBands[i+1]-eBands[i])<<LM; - /* quanta is 6 bits, but no more than 1 bit/sample - and no less than 1/8 bit/sample */ - quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); - dynalloc_loop_logp = dynalloc_logp; - boost = 0; - for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost - && boost < cap[i]; j++) - { - int flag; - flag = j<offsets[i]; - ec_enc_bit_logp(enc, flag, dynalloc_loop_logp); - tell = ec_tell_frac(enc); - if (!flag) - break; - boost += quanta; - total_boost += quanta; - dynalloc_loop_logp = 1; - } - /* Making dynalloc more likely */ - if (j) - dynalloc_logp = IMAX(2, dynalloc_logp-1); - offsets[i] = boost; - } - - if (C==2) - { - static const opus_val16 intensity_thresholds[21]= - /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/ - { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134}; - static const opus_val16 intensity_histeresis[21]= - { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8}; - - /* Always use MS for 2.5 ms frames until we can do a better analysis */ - if (LM!=0) - dual_stereo = stereo_analysis(mode, X, LM, N); - - st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000), - intensity_thresholds, intensity_histeresis, 21, st->intensity); - st->intensity = IMIN(end,IMAX(start, st->intensity)); - } - - alloc_trim = 5; - if (tell+(6<<BITRES) <= total_bits - total_boost) - { - if (st->lfe) - alloc_trim = 5; - else - alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, - st->intensity, surround_trim, st->arch); - ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); - tell = ec_tell_frac(enc); - } - - /* Variable bitrate */ - if (vbr_rate>0) - { - opus_val16 alpha; - opus_int32 delta; - /* The target rate in 8th bits per frame */ - opus_int32 target, base_target; - opus_int32 min_allowed; - int lm_diff = mode->maxLM - LM; - - /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. - The CELT allocator will just not be able to use more than that anyway. */ - nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); - base_target = vbr_rate - ((40*C+20)<<BITRES); - - if (st->constrained_vbr) - base_target += (st->vbr_offset>>lm_diff); - - target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, - st->lastCodedBands, C, st->intensity, st->constrained_vbr, - st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, - st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, - temporal_vbr); - - /* The current offset is removed from the target and the space used - so far is added*/ - target=target+tell; - /* In VBR mode the frame size must not be reduced so much that it would - result in the encoder running out of bits. - The margin of 2 bytes ensures that none of the bust-prevention logic - in the decoder will have triggered so far. */ - min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes; - - nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); - nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); - nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes; - - /* By how much did we "miss" the target on that frame */ - delta = target - vbr_rate; - - target=nbAvailableBytes<<(BITRES+3); - - /*If the frame is silent we don't adjust our drift, otherwise - the encoder will shoot to very high rates after hitting a - span of silence, but we do allow the bitres to refill. - This means that we'll undershoot our target in CVBR/VBR modes - on files with lots of silence. */ - if(silence) - { - nbAvailableBytes = 2; - target = 2*8<<BITRES; - delta = 0; - } - - if (st->vbr_count < 970) - { - st->vbr_count++; - alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16)); - } else - alpha = QCONST16(.001f,15); - /* How many bits have we used in excess of what we're allowed */ - if (st->constrained_vbr) - st->vbr_reservoir += target - vbr_rate; - /*printf ("%d\n", st->vbr_reservoir);*/ - - /* Compute the offset we need to apply in order to reach the target */ - if (st->constrained_vbr) - { - st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); - st->vbr_offset = -st->vbr_drift; - } - /*printf ("%d\n", st->vbr_drift);*/ - - if (st->constrained_vbr && st->vbr_reservoir < 0) - { - /* We're under the min value -- increase rate */ - int adjust = (-st->vbr_reservoir)/(8<<BITRES); - /* Unless we're just coding silence */ - nbAvailableBytes += silence?0:adjust; - st->vbr_reservoir = 0; - /*printf ("+%d\n", adjust);*/ - } - nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); - /*printf("%d\n", nbCompressedBytes*50*8);*/ - /* This moves the raw bits to take into account the new compressed size */ - ec_enc_shrink(enc, nbCompressedBytes); - } - - /* Bit allocation */ - ALLOC(fine_quant, nbEBands, int); - ALLOC(pulses, nbEBands, int); - ALLOC(fine_priority, nbEBands, int); - - /* bits = packet size - where we are - safety*/ - bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1; - anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; - bits -= anti_collapse_rsv; - signalBandwidth = end-1; -#ifndef DISABLE_FLOAT_API - if (st->analysis.valid) - { - int min_bandwidth; - if (equiv_rate < (opus_int32)32000*C) - min_bandwidth = 13; - else if (equiv_rate < (opus_int32)48000*C) - min_bandwidth = 16; - else if (equiv_rate < (opus_int32)60000*C) - min_bandwidth = 18; - else if (equiv_rate < (opus_int32)80000*C) - min_bandwidth = 19; - else - min_bandwidth = 20; - signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth); - } -#endif - if (st->lfe) - signalBandwidth = 1; - codedBands = compute_allocation(mode, start, end, offsets, cap, - alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, - fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); - if (st->lastCodedBands) - st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); - else - st->lastCodedBands = codedBands; - - quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C); - - /* Residual quantisation */ - ALLOC(collapse_masks, C*nbEBands, unsigned char); - quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, - bandE, pulses, shortBlocks, st->spread_decision, - dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, - balance, enc, LM, codedBands, &st->rng, st->arch); - - if (anti_collapse_rsv > 0) - { - anti_collapse_on = st->consec_transient<2; -#ifdef FUZZING - anti_collapse_on = rand()&0x1; -#endif - ec_enc_bits(enc, anti_collapse_on, 1); - } - quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); - - if (silence) - { - for (i=0;i<C*nbEBands;i++) - oldBandE[i] = -QCONST16(28.f,DB_SHIFT); - } - -#ifdef RESYNTH - /* Re-synthesis of the coded audio if required */ - { - celt_sig *out_mem[2]; - - if (anti_collapse_on) - { - anti_collapse(mode, X, collapse_masks, LM, C, N, - start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - } - - c=0; do { - OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); - } while (++c<CC); - - c=0; do { - out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N; - } while (++c<CC); - - celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd, - C, CC, isTransient, LM, st->upsample, silence, st->arch); - - c=0; do { - st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); - st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); - comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, - st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, - mode->window, overlap); - if (LM!=0) - comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, - st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, - mode->window, overlap); - } while (++c<CC); - - /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD); - st->prefilter_period_old = st->prefilter_period; - st->prefilter_gain_old = st->prefilter_gain; - st->prefilter_tapset_old = st->prefilter_tapset; - } -#endif - - st->prefilter_period = pitch_index; - st->prefilter_gain = gain1; - st->prefilter_tapset = prefilter_tapset; -#ifdef RESYNTH - if (LM!=0) - { - st->prefilter_period_old = st->prefilter_period; - st->prefilter_gain_old = st->prefilter_gain; - st->prefilter_tapset_old = st->prefilter_tapset; - } -#endif - - if (CC==2&&C==1) { - OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); - } - - if (!isTransient) - { - OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands); - OPUS_COPY(oldLogE, oldBandE, CC*nbEBands); - } else { - for (i=0;i<CC*nbEBands;i++) - oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); - } - /* In case start or end were to change */ - c=0; do - { - for (i=0;i<start;i++) - { - oldBandE[c*nbEBands+i]=0; - oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); - } - for (i=end;i<nbEBands;i++) - { - oldBandE[c*nbEBands+i]=0; - oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); - } - } while (++c<CC); - - if (isTransient || transient_got_disabled) - st->consec_transient++; - else - st->consec_transient=0; - st->rng = enc->rng; - - /* If there's any room left (can only happen for very high rates), - it's already filled with zeros */ - ec_enc_done(enc); - -#ifdef CUSTOM_MODES - if (st->signalling) - nbCompressedBytes++; -#endif - - RESTORE_STACK; - if (ec_get_error(enc)) - return OPUS_INTERNAL_ERROR; - else - return nbCompressedBytes; -} - - -#ifdef CUSTOM_MODES - -#ifdef FIXED_POINT -int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) -{ - return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); -} - -#ifndef DISABLE_FLOAT_API -int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) -{ - int j, ret, C, N; - VARDECL(opus_int16, in); - ALLOC_STACK; - - if (pcm==NULL) - return OPUS_BAD_ARG; - - C = st->channels; - N = frame_size; - ALLOC(in, C*N, opus_int16); - - for (j=0;j<C*N;j++) - in[j] = FLOAT2INT16(pcm[j]); - - ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); -#ifdef RESYNTH - for (j=0;j<C*N;j++) - ((float*)pcm)[j]=in[j]*(1.f/32768.f); -#endif - RESTORE_STACK; - return ret; -} -#endif /* DISABLE_FLOAT_API */ -#else - -int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) -{ - int j, ret, C, N; - VARDECL(celt_sig, in); - ALLOC_STACK; - - if (pcm==NULL) - return OPUS_BAD_ARG; - - C=st->channels; - N=frame_size; - ALLOC(in, C*N, celt_sig); - for (j=0;j<C*N;j++) { - in[j] = SCALEOUT(pcm[j]); - } - - ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); -#ifdef RESYNTH - for (j=0;j<C*N;j++) - ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]); -#endif - RESTORE_STACK; - return ret; -} - -int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) -{ - return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); -} - -#endif - -#endif /* CUSTOM_MODES */ - -int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) -{ - va_list ap; - - va_start(ap, request); - switch (request) - { - case OPUS_SET_COMPLEXITY_REQUEST: - { - int value = va_arg(ap, opus_int32); - if (value<0 || value>10) - goto bad_arg; - st->complexity = value; - } - break; - case CELT_SET_START_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<0 || value>=st->mode->nbEBands) - goto bad_arg; - st->start = value; - } - break; - case CELT_SET_END_BAND_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>st->mode->nbEBands) - goto bad_arg; - st->end = value; - } - break; - case CELT_SET_PREDICTION_REQUEST: - { - int value = va_arg(ap, opus_int32); - if (value<0 || value>2) - goto bad_arg; - st->disable_pf = value<=1; - st->force_intra = value==0; - } - break; - case OPUS_SET_PACKET_LOSS_PERC_REQUEST: - { - int value = va_arg(ap, opus_int32); - if (value<0 || value>100) - goto bad_arg; - st->loss_rate = value; - } - break; - case OPUS_SET_VBR_CONSTRAINT_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->constrained_vbr = value; - } - break; - case OPUS_SET_VBR_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->vbr = value; - } - break; - case OPUS_SET_BITRATE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<=500 && value!=OPUS_BITRATE_MAX) - goto bad_arg; - value = IMIN(value, 260000*st->channels); - st->bitrate = value; - } - break; - case CELT_SET_CHANNELS_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<1 || value>2) - goto bad_arg; - st->stream_channels = value; - } - break; - case OPUS_SET_LSB_DEPTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<8 || value>24) - goto bad_arg; - st->lsb_depth=value; - } - break; - case OPUS_GET_LSB_DEPTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - *value=st->lsb_depth; - } - break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->variable_duration = value; - } - break; - case OPUS_RESET_STATE: - { - int i; - opus_val16 *oldBandE, *oldLogE, *oldLogE2; - oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD)); - oldLogE = oldBandE + st->channels*st->mode->nbEBands; - oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; - OPUS_CLEAR((char*)&st->ENCODER_RESET_START, - opus_custom_encoder_get_size(st->mode, st->channels)- - ((char*)&st->ENCODER_RESET_START - (char*)st)); - for (i=0;i<st->channels*st->mode->nbEBands;i++) - oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); - st->vbr_offset = 0; - st->delayedIntra = 1; - st->spread_decision = SPREAD_NORMAL; - st->tonal_average = 256; - st->hf_average = 0; - st->tapset_decision = 0; - } - break; -#ifdef CUSTOM_MODES - case CELT_SET_INPUT_CLIPPING_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->clip = value; - } - break; -#endif - case CELT_SET_SIGNALLING_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->signalling = value; - } - break; - case CELT_SET_ANALYSIS_REQUEST: - { - AnalysisInfo *info = va_arg(ap, AnalysisInfo *); - if (info) - OPUS_COPY(&st->analysis, info, 1); - } - break; - case CELT_GET_MODE_REQUEST: - { - const CELTMode ** value = va_arg(ap, const CELTMode**); - if (value==0) - goto bad_arg; - *value=st->mode; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 * value = va_arg(ap, opus_uint32 *); - if (value==0) - goto bad_arg; - *value=st->rng; - } - break; - case OPUS_SET_LFE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->lfe = value; - } - break; - case OPUS_SET_ENERGY_MASK_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_mask = value; - } - break; - default: - goto bad_request; - } - va_end(ap); - return OPUS_OK; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -bad_request: - va_end(ap); - return OPUS_UNIMPLEMENTED; -} diff --git a/thirdparty/opus/celt/celt_lpc.c b/thirdparty/opus/celt/celt_lpc.c deleted file mode 100644 index b410a21c5f..0000000000 --- a/thirdparty/opus/celt/celt_lpc.c +++ /dev/null @@ -1,314 +0,0 @@ -/* Copyright (c) 2009-2010 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -void _celt_lpc( - opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ -const opus_val32 *ac, /* in: [0...p] autocorrelation values */ -int p -) -{ - int i, j; - opus_val32 r; - opus_val32 error = ac[0]; -#ifdef FIXED_POINT - opus_val32 lpc[LPC_ORDER]; -#else - float *lpc = _lpc; -#endif - - OPUS_CLEAR(lpc, p); - if (ac[0] != 0) - { - for (i = 0; i < p; i++) { - /* Sum up this iteration's reflection coefficient */ - opus_val32 rr = 0; - for (j = 0; j < i; j++) - rr += MULT32_32_Q31(lpc[j],ac[i - j]); - rr += SHR32(ac[i + 1],3); - r = -frac_div32(SHL32(rr,3), error); - /* Update LPC coefficients and total error */ - lpc[i] = SHR32(r,3); - for (j = 0; j < (i+1)>>1; j++) - { - opus_val32 tmp1, tmp2; - tmp1 = lpc[j]; - tmp2 = lpc[i-1-j]; - lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); - lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); - } - - error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); - /* Bail out once we get 30 dB gain */ -#ifdef FIXED_POINT - if (error<SHR32(ac[0],10)) - break; -#else - if (error<.001f*ac[0]) - break; -#endif - } - } -#ifdef FIXED_POINT - for (i=0;i<p;i++) - _lpc[i] = ROUND16(lpc[i],16); -#endif -} - - -void celt_fir_c( - const opus_val16 *_x, - const opus_val16 *num, - opus_val16 *_y, - int N, - int ord, - opus_val16 *mem, - int arch) -{ - int i,j; - VARDECL(opus_val16, rnum); - VARDECL(opus_val16, x); - SAVE_STACK; - - ALLOC(rnum, ord, opus_val16); - ALLOC(x, N+ord, opus_val16); - for(i=0;i<ord;i++) - rnum[i] = num[ord-i-1]; - for(i=0;i<ord;i++) - x[i] = mem[ord-i-1]; - for (i=0;i<N;i++) - x[i+ord]=_x[i]; - for(i=0;i<ord;i++) - mem[i] = _x[N-i-1]; -#ifdef SMALL_FOOTPRINT - (void)arch; - for (i=0;i<N;i++) - { - opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); - for (j=0;j<ord;j++) - { - sum = MAC16_16(sum,rnum[j],x[i+j]); - } - _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); - } -#else - for (i=0;i<N-3;i+=4) - { - opus_val32 sum[4]={0,0,0,0}; - xcorr_kernel(rnum, x+i, sum, ord, arch); - _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); - _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); - _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); - _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); - } - for (;i<N;i++) - { - opus_val32 sum = 0; - for (j=0;j<ord;j++) - sum = MAC16_16(sum,rnum[j],x[i+j]); - _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); - } -#endif - RESTORE_STACK; -} - -void celt_iir(const opus_val32 *_x, - const opus_val16 *den, - opus_val32 *_y, - int N, - int ord, - opus_val16 *mem, - int arch) -{ -#ifdef SMALL_FOOTPRINT - int i,j; - (void)arch; - for (i=0;i<N;i++) - { - opus_val32 sum = _x[i]; - for (j=0;j<ord;j++) - { - sum -= MULT16_16(den[j],mem[j]); - } - for (j=ord-1;j>=1;j--) - { - mem[j]=mem[j-1]; - } - mem[0] = ROUND16(sum,SIG_SHIFT); - _y[i] = sum; - } -#else - int i,j; - VARDECL(opus_val16, rden); - VARDECL(opus_val16, y); - SAVE_STACK; - - celt_assert((ord&3)==0); - ALLOC(rden, ord, opus_val16); - ALLOC(y, N+ord, opus_val16); - for(i=0;i<ord;i++) - rden[i] = den[ord-i-1]; - for(i=0;i<ord;i++) - y[i] = -mem[ord-i-1]; - for(;i<N+ord;i++) - y[i]=0; - for (i=0;i<N-3;i+=4) - { - /* Unroll by 4 as if it were an FIR filter */ - opus_val32 sum[4]; - sum[0]=_x[i]; - sum[1]=_x[i+1]; - sum[2]=_x[i+2]; - sum[3]=_x[i+3]; - xcorr_kernel(rden, y+i, sum, ord, arch); - - /* Patch up the result to compensate for the fact that this is an IIR */ - y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); - _y[i ] = sum[0]; - sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); - y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); - _y[i+1] = sum[1]; - sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); - sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); - y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); - _y[i+2] = sum[2]; - - sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); - sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); - sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); - y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); - _y[i+3] = sum[3]; - } - for (;i<N;i++) - { - opus_val32 sum = _x[i]; - for (j=0;j<ord;j++) - sum -= MULT16_16(rden[j],y[i+j]); - y[i+ord] = ROUND16(sum,SIG_SHIFT); - _y[i] = sum; - } - for(i=0;i<ord;i++) - mem[i] = _y[N-i-1]; - RESTORE_STACK; -#endif -} - -int _celt_autocorr( - const opus_val16 *x, /* in: [0...n-1] samples x */ - opus_val32 *ac, /* out: [0...lag-1] ac values */ - const opus_val16 *window, - int overlap, - int lag, - int n, - int arch - ) -{ - opus_val32 d; - int i, k; - int fastN=n-lag; - int shift; - const opus_val16 *xptr; - VARDECL(opus_val16, xx); - SAVE_STACK; - ALLOC(xx, n, opus_val16); - celt_assert(n>0); - celt_assert(overlap>=0); - if (overlap == 0) - { - xptr = x; - } else { - for (i=0;i<n;i++) - xx[i] = x[i]; - for (i=0;i<overlap;i++) - { - xx[i] = MULT16_16_Q15(x[i],window[i]); - xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); - } - xptr = xx; - } - shift=0; -#ifdef FIXED_POINT - { - opus_val32 ac0; - ac0 = 1+(n<<7); - if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9); - for(i=(n&1);i<n;i+=2) - { - ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9); - ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9); - } - - shift = celt_ilog2(ac0)-30+10; - shift = (shift)/2; - if (shift>0) - { - for(i=0;i<n;i++) - xx[i] = PSHR32(xptr[i], shift); - xptr = xx; - } else - shift = 0; - } -#endif - celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); - for (k=0;k<=lag;k++) - { - for (i = k+fastN, d = 0; i < n; i++) - d = MAC16_16(d, xptr[i], xptr[i-k]); - ac[k] += d; - } -#ifdef FIXED_POINT - shift = 2*shift; - if (shift<=0) - ac[0] += SHL32((opus_int32)1, -shift); - if (ac[0] < 268435456) - { - int shift2 = 29 - EC_ILOG(ac[0]); - for (i=0;i<=lag;i++) - ac[i] = SHL32(ac[i], shift2); - shift -= shift2; - } else if (ac[0] >= 536870912) - { - int shift2=1; - if (ac[0] >= 1073741824) - shift2++; - for (i=0;i<=lag;i++) - ac[i] = SHR32(ac[i], shift2); - shift += shift2; - } -#endif - - RESTORE_STACK; - return shift; -} diff --git a/thirdparty/opus/celt/celt_lpc.h b/thirdparty/opus/celt/celt_lpc.h deleted file mode 100644 index 323459eb1a..0000000000 --- a/thirdparty/opus/celt/celt_lpc.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2009-2010 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PLC_H -#define PLC_H - -#include "arch.h" -#include "cpu_support.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/celt_lpc_sse.h" -#endif - -#define LPC_ORDER 24 - -void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); - -void celt_fir_c( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -#if !defined(OVERRIDE_CELT_FIR) -#define celt_fir(x, num, y, N, ord, mem, arch) \ - (celt_fir_c(x, num, y, N, ord, mem, arch)) -#endif - -void celt_iir(const opus_val32 *x, - const opus_val16 *den, - opus_val32 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, - const opus_val16 *window, int overlap, int lag, int n, int arch); - -#endif /* PLC_H */ diff --git a/thirdparty/opus/celt/cpu_support.h b/thirdparty/opus/celt/cpu_support.h deleted file mode 100644 index 68fc60678f..0000000000 --- a/thirdparty/opus/celt/cpu_support.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation - * Copyright (c) 2013 Parrot */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CPU_SUPPORT_H -#define CPU_SUPPORT_H - -#include "opus_types.h" -#include "opus_defines.h" - -#if defined(OPUS_HAVE_RTCD) && \ - (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -#include "arm/armcpu.h" - -/* We currently support 4 ARM variants: - * arch[0] -> ARMv4 - * arch[1] -> ARMv5E - * arch[2] -> ARMv6 - * arch[3] -> NEON - */ -#define OPUS_ARCHMASK 3 - -#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) - -#include "x86/x86cpu.h" -/* We currently support 5 x86 variants: - * arch[0] -> non-sse - * arch[1] -> sse - * arch[2] -> sse2 - * arch[3] -> sse4.1 - * arch[4] -> avx - */ -#define OPUS_ARCHMASK 7 -int opus_select_arch(void); - -#else -#define OPUS_ARCHMASK 0 - -static OPUS_INLINE int opus_select_arch(void) -{ - return 0; -} -#endif -#endif diff --git a/thirdparty/opus/celt/cwrs.c b/thirdparty/opus/celt/cwrs.c deleted file mode 100644 index 9722f0ac86..0000000000 --- a/thirdparty/opus/celt/cwrs.c +++ /dev/null @@ -1,715 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2007-2009 Timothy B. Terriberry - Written by Timothy B. Terriberry and Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "os_support.h" -#include "cwrs.h" -#include "mathops.h" -#include "arch.h" - -#ifdef CUSTOM_MODES - -/*Guaranteed to return a conservatively large estimate of the binary logarithm - with frac bits of fractional precision. - Tested for all possible 32-bit inputs with frac=4, where the maximum - overestimation is 0.06254243 bits.*/ -int log2_frac(opus_uint32 val, int frac) -{ - int l; - l=EC_ILOG(val); - if(val&(val-1)){ - /*This is (val>>l-16), but guaranteed to round up, even if adding a bias - before the shift would cause overflow (e.g., for 0xFFFFxxxx). - Doesn't work for val=0, but that case fails the test above.*/ - if(l>16)val=((val-1)>>(l-16))+1; - else val<<=16-l; - l=(l-1)<<frac; - /*Note that we always need one iteration, since the rounding up above means - that we might need to adjust the integer part of the logarithm.*/ - do{ - int b; - b=(int)(val>>16); - l+=b<<frac; - val=(val+b)>>b; - val=(val*val+0x7FFF)>>15; - } - while(frac-->0); - /*If val is not exactly 0x8000, then we have to round up the remainder.*/ - return l+(val>0x8000); - } - /*Exact powers of two require no rounding.*/ - else return (l-1)<<frac; -} -#endif - -/*Although derived separately, the pulse vector coding scheme is equivalent to - a Pyramid Vector Quantizer \cite{Fis86}. - Some additional notes about an early version appear at - https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering - and the definitions of some terms have evolved since that was written. - - The conversion from a pulse vector to an integer index (encoding) and back - (decoding) is governed by two related functions, V(N,K) and U(N,K). - - V(N,K) = the number of combinations, with replacement, of N items, taken K - at a time, when a sign bit is added to each item taken at least once (i.e., - the number of N-dimensional unit pulse vectors with K pulses). - One way to compute this is via - V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1, - where choose() is the binomial function. - A table of values for N<10 and K<10 looks like: - V[10][10] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {1, 2, 2, 2, 2, 2, 2, 2, 2, 2}, - {1, 4, 8, 12, 16, 20, 24, 28, 32, 36}, - {1, 6, 18, 38, 66, 102, 146, 198, 258, 326}, - {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992}, - {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290}, - {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436}, - {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598}, - {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688}, - {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146} - }; - - U(N,K) = the number of such combinations wherein N-1 objects are taken at - most K-1 at a time. - This is given by - U(N,K) = sum(k=0...K-1,V(N-1,k)) - = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0. - The latter expression also makes clear that U(N,K) is half the number of such - combinations wherein the first object is taken at least once. - Although it may not be clear from either of these definitions, U(N,K) is the - natural function to work with when enumerating the pulse vector codebooks, - not V(N,K). - U(N,K) is not well-defined for N=0, but with the extension - U(0,K) = K>0 ? 0 : 1, - the function becomes symmetric: U(N,K) = U(K,N), with a similar table: - U[10][10] = { - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 1, 3, 5, 7, 9, 11, 13, 15, 17}, - {0, 1, 5, 13, 25, 41, 61, 85, 113, 145}, - {0, 1, 7, 25, 63, 129, 231, 377, 575, 833}, - {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649}, - {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073}, - {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081}, - {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545}, - {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729} - }; - - With this extension, V(N,K) may be written in terms of U(N,K): - V(N,K) = U(N,K) + U(N,K+1) - for all N>=0, K>=0. - Thus U(N,K+1) represents the number of combinations where the first element - is positive or zero, and U(N,K) represents the number of combinations where - it is negative. - With a large enough table of U(N,K) values, we could write O(N) encoding - and O(min(N*log(K),N+K)) decoding routines, but such a table would be - prohibitively large for small embedded devices (K may be as large as 32767 - for small N, and N may be as large as 200). - - Both functions obey the same recurrence relation: - V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1), - U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1), - for all N>0, K>0, with different initial conditions at N=0 or K=0. - This allows us to construct a row of one of the tables above given the - previous row or the next row. - Thus we can derive O(NK) encoding and decoding routines with O(K) memory - using only addition and subtraction. - - When encoding, we build up from the U(2,K) row and work our way forwards. - When decoding, we need to start at the U(N,K) row and work our way backwards, - which requires a means of computing U(N,K). - U(N,K) may be computed from two previous values with the same N: - U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2) - for all N>1, and since U(N,K) is symmetric, a similar relation holds for two - previous values with the same K: - U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K) - for all K>1. - This allows us to construct an arbitrary row of the U(N,K) table by starting - with the first two values, which are constants. - This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K) - multiplications. - Similar relations can be derived for V(N,K), but are not used here. - - For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree - polynomial for fixed N. - The first few are - U(1,K) = 1, - U(2,K) = 2*K-1, - U(3,K) = (2*K-2)*K+1, - U(4,K) = (((4*K-6)*K+8)*K-3)/3, - U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3, - and - V(1,K) = 2, - V(2,K) = 4*K, - V(3,K) = 4*K*K+2, - V(4,K) = 8*(K*K+2)*K/3, - V(5,K) = ((4*K*K+20)*K*K+6)/3, - for all K>0. - This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for - small N (and indeed decoding is also O(N) for N<3). - - @ARTICLE{Fis86, - author="Thomas R. Fischer", - title="A Pyramid Vector Quantizer", - journal="IEEE Transactions on Information Theory", - volume="IT-32", - number=4, - pages="568--583", - month=Jul, - year=1986 - }*/ - -#if !defined(SMALL_FOOTPRINT) - -/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/ -# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)]) -/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N - with K pulses allocated to it.*/ -# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1)) - -/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)). - Thus, the number of entries in row I is the larger of the maximum number of - pulses we will ever allocate for a given N=I (K=128, or however many fit in - 32 bits, whichever is smaller), plus one, and the maximum N for which - K=I-1 pulses fit in 32 bits. - The largest band size in an Opus Custom mode is 208. - Otherwise, we can limit things to the set of N which can be achieved by - splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48, - 44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/ -#if defined(CUSTOM_MODES) -static const opus_uint32 CELT_PVQ_U_DATA[1488]={ -#else -static const opus_uint32 CELT_PVQ_U_DATA[1272]={ -#endif - /*N=0, K=0...176:*/ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -#if defined(CUSTOM_MODES) - /*...208:*/ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -#endif - /*N=1, K=1...176:*/ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -#if defined(CUSTOM_MODES) - /*...208:*/ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, -#endif - /*N=2, K=2...176:*/ - 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, - 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, - 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, - 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143, - 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, - 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, - 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, - 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, - 265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293, - 295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323, - 325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351, -#if defined(CUSTOM_MODES) - /*...208:*/ - 353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381, - 383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411, - 413, 415, -#endif - /*N=3, K=3...176:*/ - 13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613, - 685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861, - 1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785, - 3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385, - 6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661, - 9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961, - 13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745, - 17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013, - 21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765, - 26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001, - 31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721, - 37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925, - 43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613, - 50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785, - 57461, 58141, 58825, 59513, 60205, 60901, 61601, -#if defined(CUSTOM_MODES) - /*...208:*/ - 62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565, - 70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013, - 78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113, -#endif - /*N=4, K=4...176:*/ - 63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017, - 7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775, - 30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153, - 82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193, - 161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575, - 267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217, - 410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951, - 597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609, - 833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023, - 1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407, - 1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759, - 1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175, - 2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751, - 2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583, - 3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767, - 3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399, - 4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575, - 5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391, - 6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943, - 7085049, 7207551, -#if defined(CUSTOM_MODES) - /*...208:*/ - 7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783, - 8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967, - 9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199, - 10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177, - 11912575, -#endif - /*N=5, K=5...176:*/ - 321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041, - 50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401, - 330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241, - 1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241, - 2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801, - 4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849, - 8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849, - 13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809, - 20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881, - 29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641, - 40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081, - 55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609, - 73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049, - 95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641, - 122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041, - 155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321, - 193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969, - 238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889, - 290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401, - 351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241, - 420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561, - 500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929, - 590359041, 604167209, 618216201, 632508801, -#if defined(CUSTOM_MODES) - /*...208:*/ - 647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241, - 755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161, - 878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329, - 1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041, - 1143412929, 1166053121, 1189027881, 1212340489, 1235994241, -#endif - /*N=6, K=6...96:*/ - 1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047, - 335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409, - 2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793, - 11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455, - 29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189, - 64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651, - 128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185, - 235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647, - 402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229, - 655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283, - 1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135, - 1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187, - 2011371957, 2120032959, -#if defined(CUSTOM_MODES) - /*...109:*/ - 2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U, - 3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U, - 4012305913U, -#endif - /*N=7, K=7...54*/ - 8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777, - 1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233, - 19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013, - 88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805, - 292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433, - 793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821, - 1667010073, 1870535785, 2094367717, -#if defined(CUSTOM_MODES) - /*...60:*/ - 2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U, -#endif - /*N=8, K=8...37*/ - 48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767, - 9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017, - 104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351, - 638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615, - 2229491905U, -#if defined(CUSTOM_MODES) - /*...40:*/ - 2691463695U, 3233240945U, 3866006015U, -#endif - /*N=9, K=9...28:*/ - 265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777, - 39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145, - 628496897, 872893441, 1196924561, 1621925137, 2173806145U, -#if defined(CUSTOM_MODES) - /*...29:*/ - 2883810113U, -#endif - /*N=10, K=10...24:*/ - 1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073, - 254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U, - 3375210671U, - /*N=11, K=11...19:*/ - 8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585, - 948062325, 1616336765, -#if defined(CUSTOM_MODES) - /*...20:*/ - 2684641785U, -#endif - /*N=12, K=12...18:*/ - 45046719, 103274625, 224298231, 464387817, 921406335, 1759885185, - 3248227095U, - /*N=13, K=13...16:*/ - 251595969, 579168825, 1267854873, 2653649025U, - /*N=14, K=14:*/ - 1409933619 -}; - -#if defined(CUSTOM_MODES) -static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ - CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415, - CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030, - CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389, - CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455, - CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473 -}; -#else -static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ - CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351, - CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870, - CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178, - CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240, - CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257 -}; -#endif - -#if defined(CUSTOM_MODES) -void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ - int k; - /*_maxk==0 => there's nothing to do.*/ - celt_assert(_maxk>0); - _bits[0]=0; - for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac); -} -#endif - -static opus_uint32 icwrs(int _n,const int *_y){ - opus_uint32 i; - int j; - int k; - celt_assert(_n>=2); - j=_n-1; - i=_y[j]<0; - k=abs(_y[j]); - do{ - j--; - i+=CELT_PVQ_U(_n-j,k); - k+=abs(_y[j]); - if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1); - } - while(j>0); - return i; -} - -void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ - celt_assert(_k>0); - ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); -} - -static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ - opus_uint32 p; - int s; - int k0; - opus_int16 val; - opus_val32 yy=0; - celt_assert(_k>0); - celt_assert(_n>1); - while(_n>2){ - opus_uint32 q; - /*Lots of pulses case:*/ - if(_k>=_n){ - const opus_uint32 *row; - row=CELT_PVQ_U_ROW[_n]; - /*Are the pulses in this dimension negative?*/ - p=row[_k+1]; - s=-(_i>=p); - _i-=p&s; - /*Count how many pulses were placed in this dimension.*/ - k0=_k; - q=row[_n]; - if(q>_i){ - celt_assert(p>q); - _k=_n; - do p=CELT_PVQ_U_ROW[--_k][_n]; - while(p>_i); - } - else for(p=row[_k];p>_i;p=row[_k])_k--; - _i-=p; - val=(k0-_k+s)^s; - *_y++=val; - yy=MAC16_16(yy,val,val); - } - /*Lots of dimensions case:*/ - else{ - /*Are there any pulses in this dimension at all?*/ - p=CELT_PVQ_U_ROW[_k][_n]; - q=CELT_PVQ_U_ROW[_k+1][_n]; - if(p<=_i&&_i<q){ - _i-=p; - *_y++=0; - } - else{ - /*Are the pulses in this dimension negative?*/ - s=-(_i>=q); - _i-=q&s; - /*Count how many pulses were placed in this dimension.*/ - k0=_k; - do p=CELT_PVQ_U_ROW[--_k][_n]; - while(p>_i); - _i-=p; - val=(k0-_k+s)^s; - *_y++=val; - yy=MAC16_16(yy,val,val); - } - } - _n--; - } - /*_n==2*/ - p=2*_k+1; - s=-(_i>=p); - _i-=p&s; - k0=_k; - _k=(_i+1)>>1; - if(_k)_i-=2*_k-1; - val=(k0-_k+s)^s; - *_y++=val; - yy=MAC16_16(yy,val,val); - /*_n==1*/ - s=-(int)_i; - val=(_k+s)^s; - *_y=val; - yy=MAC16_16(yy,val,val); - return yy; -} - -opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); -} - -#else /* SMALL_FOOTPRINT */ - -/*Computes the next row/column of any recurrence that obeys the relation - u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. - _ui0 is the base case for the new row/column.*/ -static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ - opus_uint32 ui1; - unsigned j; - /*This do-while will overrun the array if we don't have storage for at least - 2 values.*/ - j=1; do { - ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0); - _ui[j-1]=_ui0; - _ui0=ui1; - } while (++j<_len); - _ui[j-1]=_ui0; -} - -/*Computes the previous row/column of any recurrence that obeys the relation - u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1]. - _ui0 is the base case for the new row/column.*/ -static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ - opus_uint32 ui1; - unsigned j; - /*This do-while will overrun the array if we don't have storage for at least - 2 values.*/ - j=1; do { - ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0); - _ui[j-1]=_ui0; - _ui0=ui1; - } while (++j<_n); - _ui[j-1]=_ui0; -} - -/*Compute V(_n,_k), as well as U(_n,0..._k+1). - _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/ -static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ - opus_uint32 um2; - unsigned len; - unsigned k; - len=_k+2; - /*We require storage at least 3 values (e.g., _k>0).*/ - celt_assert(len>=3); - _u[0]=0; - _u[1]=um2=1; - /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ - /*If _n==1, _u[i] should be 1 for i>1.*/ - celt_assert(_n>=2); - /*If _k==0, the following do-while loop will overflow the buffer.*/ - celt_assert(_k>0); - k=2; - do _u[k]=(k<<1)-1; - while(++k<len); - for(k=2;k<_n;k++)unext(_u+1,_k+1,1); - return _u[_k]+_u[_k+1]; -} - -/*Returns the _i'th combination of _k elements chosen from a set of size _n - with associated sign bits. - _y: Returns the vector of pulses. - _u: Must contain entries [0..._k+1] of row _n of U() on input. - Its contents will be destructively modified.*/ -static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ - int j; - opus_int16 val; - opus_val32 yy=0; - celt_assert(_n>0); - j=0; - do{ - opus_uint32 p; - int s; - int yj; - p=_u[_k+1]; - s=-(_i>=p); - _i-=p&s; - yj=_k; - p=_u[_k]; - while(p>_i)p=_u[--_k]; - _i-=p; - yj-=_k; - val=(yj+s)^s; - _y[j]=val; - yy=MAC16_16(yy,val,val); - uprev(_u,_k+2,0); - } - while(++j<_n); - return yy; -} - -/*Returns the index of the given combination of K elements chosen from a set - of size 1 with associated sign bits. - _y: The vector of pulses, whose sum of absolute values is K. - _k: Returns K.*/ -static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){ - *_k=abs(_y[0]); - return _y[0]<0; -} - -/*Returns the index of the given combination of K elements chosen from a set - of size _n with associated sign bits. - _y: The vector of pulses, whose sum of absolute values must be _k. - _nc: Returns V(_n,_k).*/ -static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, - opus_uint32 *_u){ - opus_uint32 i; - int j; - int k; - /*We can't unroll the first two iterations of the loop unless _n>=2.*/ - celt_assert(_n>=2); - _u[0]=0; - for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1; - i=icwrs1(_y+_n-1,&k); - j=_n-2; - i+=_u[k]; - k+=abs(_y[j]); - if(_y[j]<0)i+=_u[k+1]; - while(j-->0){ - unext(_u,_k+2,0); - i+=_u[k]; - k+=abs(_y[j]); - if(_y[j]<0)i+=_u[k+1]; - } - *_nc=_u[k]+_u[k+1]; - return i; -} - -#ifdef CUSTOM_MODES -void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ - int k; - /*_maxk==0 => there's nothing to do.*/ - celt_assert(_maxk>0); - _bits[0]=0; - if (_n==1) - { - for (k=1;k<=_maxk;k++) - _bits[k] = 1<<_frac; - } - else { - VARDECL(opus_uint32,u); - SAVE_STACK; - ALLOC(u,_maxk+2U,opus_uint32); - ncwrs_urow(_n,_maxk,u); - for(k=1;k<=_maxk;k++) - _bits[k]=log2_frac(u[k]+u[k+1],_frac); - RESTORE_STACK; - } -} -#endif /* CUSTOM_MODES */ - -void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ - opus_uint32 i; - VARDECL(opus_uint32,u); - opus_uint32 nc; - SAVE_STACK; - celt_assert(_k>0); - ALLOC(u,_k+2U,opus_uint32); - i=icwrs(_n,_k,&nc,_y,u); - ec_enc_uint(_enc,i,nc); - RESTORE_STACK; -} - -opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - VARDECL(opus_uint32,u); - int ret; - SAVE_STACK; - celt_assert(_k>0); - ALLOC(u,_k+2U,opus_uint32); - ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); - RESTORE_STACK; - return ret; -} - -#endif /* SMALL_FOOTPRINT */ diff --git a/thirdparty/opus/celt/cwrs.h b/thirdparty/opus/celt/cwrs.h deleted file mode 100644 index 7cd4717459..0000000000 --- a/thirdparty/opus/celt/cwrs.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2007-2009 Timothy B. Terriberry - Written by Timothy B. Terriberry and Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CWRS_H -#define CWRS_H - -#include "arch.h" -#include "stack_alloc.h" -#include "entenc.h" -#include "entdec.h" - -#ifdef CUSTOM_MODES -int log2_frac(opus_uint32 val, int frac); -#endif - -void get_required_bits(opus_int16 *bits, int N, int K, int frac); - -void encode_pulses(const int *_y, int N, int K, ec_enc *enc); - -opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); - -#endif /* CWRS_H */ diff --git a/thirdparty/opus/celt/ecintrin.h b/thirdparty/opus/celt/ecintrin.h deleted file mode 100644 index 2263cff6bd..0000000000 --- a/thirdparty/opus/celt/ecintrin.h +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright (c) 2003-2008 Timothy B. Terriberry - Copyright (c) 2008 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/*Some common macros for potential platform-specific optimization.*/ -#include "opus_types.h" -#include <math.h> -#include <limits.h> -#include "arch.h" -#if !defined(_ecintrin_H) -# define _ecintrin_H (1) - -/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly - versions of these functions which can substantially improve performance. - We define macros for them to allow easy incorporation of these non-ANSI - features.*/ - -/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if - given an appropriate architecture, but the branchless bit-twiddling versions - are just as fast, and do not require any special target architecture. - Earlier gcc versions (3.x) compiled both code to the same assembly - instructions, because of the way they represented ((_b)>(_a)) internally.*/ -# define EC_MINI(_a,_b) ((_a)+(((_b)-(_a))&-((_b)<(_a)))) - -/*Count leading zeros. - This macro should only be used for implementing ec_ilog(), if it is defined. - All other code should use EC_ILOG() instead.*/ -#if defined(_MSC_VER) && (_MSC_VER >= 1400) -# include <intrin.h> -/*In _DEBUG mode this is not an intrinsic by default.*/ -# pragma intrinsic(_BitScanReverse) - -static __inline int ec_bsr(unsigned long _x){ - unsigned long ret; - _BitScanReverse(&ret,_x); - return (int)ret; -} -# define EC_CLZ0 (1) -# define EC_CLZ(_x) (-ec_bsr(_x)) -#elif defined(ENABLE_TI_DSPLIB) -# include "dsplib.h" -# define EC_CLZ0 (31) -# define EC_CLZ(_x) (_lnorm(_x)) -#elif __GNUC_PREREQ(3,4) -# if INT_MAX>=2147483647 -# define EC_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT) -# define EC_CLZ(_x) (__builtin_clz(_x)) -# elif LONG_MAX>=2147483647L -# define EC_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT) -# define EC_CLZ(_x) (__builtin_clzl(_x)) -# endif -#endif - -#if defined(EC_CLZ) -/*Note that __builtin_clz is not defined when _x==0, according to the gcc - documentation (and that of the BSR instruction that implements it on x86). - The majority of the time we can never pass it zero. - When we need to, it can be special cased.*/ -# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x)) -#else -int ec_ilog(opus_uint32 _v); -# define EC_ILOG(_x) (ec_ilog(_x)) -#endif -#endif diff --git a/thirdparty/opus/celt/entcode.c b/thirdparty/opus/celt/entcode.c deleted file mode 100644 index 70f32016ec..0000000000 --- a/thirdparty/opus/celt/entcode.c +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "entcode.h" -#include "arch.h" - -#if !defined(EC_CLZ) -/*This is a fallback for systems where we don't know how to access - a BSR or CLZ instruction (see ecintrin.h). - If you are optimizing Opus on a new platform and it has a native CLZ or - BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be - an easy performance win.*/ -int ec_ilog(opus_uint32 _v){ - /*On a Pentium M, this branchless version tested as the fastest on - 1,000,000,000 random 32-bit integers, edging out a similar version with - branches, and a 256-entry LUT version.*/ - int ret; - int m; - ret=!!_v; - m=!!(_v&0xFFFF0000)<<4; - _v>>=m; - ret|=m; - m=!!(_v&0xFF00)<<3; - _v>>=m; - ret|=m; - m=!!(_v&0xF0)<<2; - _v>>=m; - ret|=m; - m=!!(_v&0xC)<<1; - _v>>=m; - ret|=m; - ret+=!!(_v&0x2); - return ret; -} -#endif - -#if 1 -/* This is a faster version of ec_tell_frac() that takes advantage - of the low (1/8 bit) resolution to use just a linear function - followed by a lookup to determine the exact transition thresholds. */ -opus_uint32 ec_tell_frac(ec_ctx *_this){ - static const unsigned correction[8] = - {35733, 38967, 42495, 46340, - 50535, 55109, 60097, 65535}; - opus_uint32 nbits; - opus_uint32 r; - int l; - unsigned b; - nbits=_this->nbits_total<<BITRES; - l=EC_ILOG(_this->rng); - r=_this->rng>>(l-16); - b = (r>>12)-8; - b += r>correction[b]; - l = (l<<3)+b; - return nbits-l; -} -#else -opus_uint32 ec_tell_frac(ec_ctx *_this){ - opus_uint32 nbits; - opus_uint32 r; - int l; - int i; - /*To handle the non-integral number of bits still left in the encoder/decoder - state, we compute the worst-case number of bits of val that must be - encoded to ensure that the value is inside the range for any possible - subsequent bits. - The computation here is independent of val itself (the decoder does not - even track that value), even though the real number of bits used after - ec_enc_done() may be 1 smaller if rng is a power of two and the - corresponding trailing bits of val are all zeros. - If we did try to track that special case, then coding a value with a - probability of 1/(1<<n) might sometimes appear to use more than n bits. - This may help explain the surprising result that a newly initialized - encoder or decoder claims to have used 1 bit.*/ - nbits=_this->nbits_total<<BITRES; - l=EC_ILOG(_this->rng); - r=_this->rng>>(l-16); - for(i=BITRES;i-->0;){ - int b; - r=r*r>>15; - b=(int)(r>>16); - l=l<<1|b; - r>>=b; - } - return nbits-l; -} -#endif - -#ifdef USE_SMALL_DIV_TABLE -/* Result of 2^32/(2*i+1), except for i=0. */ -const opus_uint32 SMALL_DIV_TABLE[129] = { - 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, - 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, - 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, - 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, - 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, - 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, - 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, - 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, - 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, - 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, - 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, - 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, - 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, - 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, - 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, - 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, - 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, - 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, - 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, - 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, - 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, - 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, - 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, - 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, - 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, - 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, - 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, - 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, - 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, - 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, - 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, - 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 -}; -#endif diff --git a/thirdparty/opus/celt/entcode.h b/thirdparty/opus/celt/entcode.h deleted file mode 100644 index 13d6c84ef0..0000000000 --- a/thirdparty/opus/celt/entcode.h +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "opus_types.h" -#include "opus_defines.h" - -#if !defined(_entcode_H) -# define _entcode_H (1) -# include <limits.h> -# include <stddef.h> -# include "ecintrin.h" - -extern const opus_uint32 SMALL_DIV_TABLE[129]; - -#ifdef OPUS_ARM_ASM -#define USE_SMALL_DIV_TABLE -#endif - -/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a - larger type, you can speed up the decoder by using it here.*/ -typedef opus_uint32 ec_window; -typedef struct ec_ctx ec_ctx; -typedef struct ec_ctx ec_enc; -typedef struct ec_ctx ec_dec; - -# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT) - -/*The number of bits to use for the range-coded part of unsigned integers.*/ -# define EC_UINT_BITS (8) - -/*The resolution of fractional-precision bit usage measurements, i.e., - 3 => 1/8th bits.*/ -# define BITRES 3 - -/*The entropy encoder/decoder context. - We use the same structure for both, so that common functions like ec_tell() - can be used on either one.*/ -struct ec_ctx{ - /*Buffered input/output.*/ - unsigned char *buf; - /*The size of the buffer.*/ - opus_uint32 storage; - /*The offset at which the last byte containing raw bits was read/written.*/ - opus_uint32 end_offs; - /*Bits that will be read from/written at the end.*/ - ec_window end_window; - /*Number of valid bits in end_window.*/ - int nend_bits; - /*The total number of whole bits read/written. - This does not include partial bits currently in the range coder.*/ - int nbits_total; - /*The offset at which the next range coder byte will be read/written.*/ - opus_uint32 offs; - /*The number of values in the current range.*/ - opus_uint32 rng; - /*In the decoder: the difference between the top of the current range and - the input value, minus one. - In the encoder: the low end of the current range.*/ - opus_uint32 val; - /*In the decoder: the saved normalization factor from ec_decode(). - In the encoder: the number of oustanding carry propagating symbols.*/ - opus_uint32 ext; - /*A buffered input/output symbol, awaiting carry propagation.*/ - int rem; - /*Nonzero if an error occurred.*/ - int error; -}; - -static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){ - return _this->offs; -} - -static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){ - return _this->buf; -} - -static OPUS_INLINE int ec_get_error(ec_ctx *_this){ - return _this->error; -} - -/*Returns the number of bits "used" by the encoded or decoded symbols so far. - This same number can be computed in either the encoder or the decoder, and is - suitable for making coding decisions. - Return: The number of bits. - This will always be slightly larger than the exact value (e.g., all - rounding error is in the positive direction).*/ -static OPUS_INLINE int ec_tell(ec_ctx *_this){ - return _this->nbits_total-EC_ILOG(_this->rng); -} - -/*Returns the number of bits "used" by the encoded or decoded symbols so far. - This same number can be computed in either the encoder or the decoder, and is - suitable for making coding decisions. - Return: The number of bits scaled by 2**BITRES. - This will always be slightly larger than the exact value (e.g., all - rounding error is in the positive direction).*/ -opus_uint32 ec_tell_frac(ec_ctx *_this); - -/* Tested exhaustively for all n and for 1<=d<=256 */ -static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { - celt_assert(d>0); -#ifdef USE_SMALL_DIV_TABLE - if (d>256) - return n/d; - else { - opus_uint32 t, q; - t = EC_ILOG(d&-d); - q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; - return q+(n-q*d >= d); - } -#else - return n/d; -#endif -} - -static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { - celt_assert(d>0); -#ifdef USE_SMALL_DIV_TABLE - if (n<0) - return -(opus_int32)celt_udiv(-n, d); - else - return celt_udiv(n, d); -#else - return n/d; -#endif -} - -#endif diff --git a/thirdparty/opus/celt/entdec.c b/thirdparty/opus/celt/entdec.c deleted file mode 100644 index 0b3433ed8b..0000000000 --- a/thirdparty/opus/celt/entdec.c +++ /dev/null @@ -1,245 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stddef.h> -#include "os_support.h" -#include "arch.h" -#include "entdec.h" -#include "mfrngcod.h" - -/*A range decoder. - This is an entropy decoder based upon \cite{Mar79}, which is itself a - rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. - It is very similar to arithmetic encoding, except that encoding is done with - digits in any base, instead of with bits, and so it is faster when using - larger bases (i.e.: a byte). - The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ - is the base, longer than the theoretical optimum, but to my knowledge there - is no published justification for this claim. - This only seems true when using near-infinite precision arithmetic so that - the process is carried out with no rounding errors. - - An excellent description of implementation details is available at - http://www.arturocampos.com/ac_range.html - A recent work \cite{MNW98} which proposes several changes to arithmetic - encoding for efficiency actually re-discovers many of the principles - behind range encoding, and presents a good theoretical analysis of them. - - End of stream is handled by writing out the smallest number of bits that - ensures that the stream will be correctly decoded regardless of the value of - any subsequent bits. - ec_tell() can be used to determine how many bits were needed to decode - all the symbols thus far; other data can be packed in the remaining bits of - the input buffer. - @PHDTHESIS{Pas76, - author="Richard Clark Pasco", - title="Source coding algorithms for fast data compression", - school="Dept. of Electrical Engineering, Stanford University", - address="Stanford, CA", - month=May, - year=1976 - } - @INPROCEEDINGS{Mar79, - author="Martin, G.N.N.", - title="Range encoding: an algorithm for removing redundancy from a digitised - message", - booktitle="Video & Data Recording Conference", - year=1979, - address="Southampton", - month=Jul - } - @ARTICLE{MNW98, - author="Alistair Moffat and Radford Neal and Ian H. Witten", - title="Arithmetic Coding Revisited", - journal="{ACM} Transactions on Information Systems", - year=1998, - volume=16, - number=3, - pages="256--294", - month=Jul, - URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf" - }*/ - -static int ec_read_byte(ec_dec *_this){ - return _this->offs<_this->storage?_this->buf[_this->offs++]:0; -} - -static int ec_read_byte_from_end(ec_dec *_this){ - return _this->end_offs<_this->storage? - _this->buf[_this->storage-++(_this->end_offs)]:0; -} - -/*Normalizes the contents of val and rng so that rng lies entirely in the - high-order symbol.*/ -static void ec_dec_normalize(ec_dec *_this){ - /*If the range is too small, rescale it and input some bits.*/ - while(_this->rng<=EC_CODE_BOT){ - int sym; - _this->nbits_total+=EC_SYM_BITS; - _this->rng<<=EC_SYM_BITS; - /*Use up the remaining bits from our last symbol.*/ - sym=_this->rem; - /*Read the next value from the input.*/ - _this->rem=ec_read_byte(_this); - /*Take the rest of the bits we need from this new symbol.*/ - sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA); - /*And subtract them from val, capped to be less than EC_CODE_TOP.*/ - _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1); - } -} - -void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ - _this->buf=_buf; - _this->storage=_storage; - _this->end_offs=0; - _this->end_window=0; - _this->nend_bits=0; - /*This is the offset from which ec_tell() will subtract partial bits. - The final value after the ec_dec_normalize() call will be the same as in - the encoder, but we have to compensate for the bits that are added there.*/ - _this->nbits_total=EC_CODE_BITS+1 - -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS; - _this->offs=0; - _this->rng=1U<<EC_CODE_EXTRA; - _this->rem=ec_read_byte(_this); - _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA)); - _this->error=0; - /*Normalize the interval.*/ - ec_dec_normalize(_this); -} - -unsigned ec_decode(ec_dec *_this,unsigned _ft){ - unsigned s; - _this->ext=celt_udiv(_this->rng,_ft); - s=(unsigned)(_this->val/_this->ext); - return _ft-EC_MINI(s+1,_ft); -} - -unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){ - unsigned s; - _this->ext=_this->rng>>_bits; - s=(unsigned)(_this->val/_this->ext); - return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits); -} - -void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){ - opus_uint32 s; - s=IMUL32(_this->ext,_ft-_fh); - _this->val-=s; - _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s; - ec_dec_normalize(_this); -} - -/*The probability of having a "one" is 1/(1<<_logp).*/ -int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){ - opus_uint32 r; - opus_uint32 d; - opus_uint32 s; - int ret; - r=_this->rng; - d=_this->val; - s=r>>_logp; - ret=d<s; - if(!ret)_this->val=d-s; - _this->rng=ret?s:r-s; - ec_dec_normalize(_this); - return ret; -} - -int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){ - opus_uint32 r; - opus_uint32 d; - opus_uint32 s; - opus_uint32 t; - int ret; - s=_this->rng; - d=_this->val; - r=s>>_ftb; - ret=-1; - do{ - t=s; - s=IMUL32(r,_icdf[++ret]); - } - while(d<s); - _this->val=d-s; - _this->rng=t-s; - ec_dec_normalize(_this); - return ret; -} - -opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){ - unsigned ft; - unsigned s; - int ftb; - /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ - celt_assert(_ft>1); - _ft--; - ftb=EC_ILOG(_ft); - if(ftb>EC_UINT_BITS){ - opus_uint32 t; - ftb-=EC_UINT_BITS; - ft=(unsigned)(_ft>>ftb)+1; - s=ec_decode(_this,ft); - ec_dec_update(_this,s,s+1,ft); - t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb); - if(t<=_ft)return t; - _this->error=1; - return _ft; - } - else{ - _ft++; - s=ec_decode(_this,(unsigned)_ft); - ec_dec_update(_this,s,s+1,(unsigned)_ft); - return s; - } -} - -opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){ - ec_window window; - int available; - opus_uint32 ret; - window=_this->end_window; - available=_this->nend_bits; - if((unsigned)available<_bits){ - do{ - window|=(ec_window)ec_read_byte_from_end(_this)<<available; - available+=EC_SYM_BITS; - } - while(available<=EC_WINDOW_SIZE-EC_SYM_BITS); - } - ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U); - window>>=_bits; - available-=_bits; - _this->end_window=window; - _this->nend_bits=available; - _this->nbits_total+=_bits; - return ret; -} diff --git a/thirdparty/opus/celt/entdec.h b/thirdparty/opus/celt/entdec.h deleted file mode 100644 index d8ab318730..0000000000 --- a/thirdparty/opus/celt/entdec.h +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(_entdec_H) -# define _entdec_H (1) -# include <limits.h> -# include "entcode.h" - -/*Initializes the decoder. - _buf: The input buffer to use. - Return: 0 on success, or a negative value on error.*/ -void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage); - -/*Calculates the cumulative frequency for the next symbol. - This can then be fed into the probability model to determine what that - symbol is, and the additional frequency information required to advance to - the next symbol. - This function cannot be called more than once without a corresponding call to - ec_dec_update(), or decoding will not proceed correctly. - _ft: The total frequency of the symbols in the alphabet the next symbol was - encoded with. - Return: A cumulative frequency representing the encoded symbol. - If the cumulative frequency of all the symbols before the one that - was encoded was fl, and the cumulative frequency of all the symbols - up to and including the one encoded is fh, then the returned value - will fall in the range [fl,fh).*/ -unsigned ec_decode(ec_dec *_this,unsigned _ft); - -/*Equivalent to ec_decode() with _ft==1<<_bits.*/ -unsigned ec_decode_bin(ec_dec *_this,unsigned _bits); - -/*Advance the decoder past the next symbol using the frequency information the - symbol was encoded with. - Exactly one call to ec_decode() must have been made so that all necessary - intermediate calculations are performed. - _fl: The cumulative frequency of all symbols that come before the symbol - decoded. - _fh: The cumulative frequency of all symbols up to and including the symbol - decoded. - Together with _fl, this defines the range [_fl,_fh) in which the value - returned above must fall. - _ft: The total frequency of the symbols in the alphabet the symbol decoded - was encoded in. - This must be the same as passed to the preceding call to ec_decode().*/ -void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft); - -/* Decode a bit that has a 1/(1<<_logp) probability of being a one */ -int ec_dec_bit_logp(ec_dec *_this,unsigned _logp); - -/*Decodes a symbol given an "inverse" CDF table. - No call to ec_dec_update() is necessary after this call. - _icdf: The "inverse" CDF, such that symbol s falls in the range - [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb. - The values must be monotonically non-increasing, and the last value - must be 0. - _ftb: The number of bits of precision in the cumulative distribution. - Return: The decoded symbol s.*/ -int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb); - -/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream. - The bits must have been encoded with ec_enc_uint(). - No call to ec_dec_update() is necessary after this call. - _ft: The number of integers that can be decoded (one more than the max). - This must be at least one, and no more than 2**32-1. - Return: The decoded bits.*/ -opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft); - -/*Extracts a sequence of raw bits from the stream. - The bits must have been encoded with ec_enc_bits(). - No call to ec_dec_update() is necessary after this call. - _ftb: The number of bits to extract. - This must be between 0 and 25, inclusive. - Return: The decoded bits.*/ -opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb); - -#endif diff --git a/thirdparty/opus/celt/entenc.c b/thirdparty/opus/celt/entenc.c deleted file mode 100644 index f1750d25b8..0000000000 --- a/thirdparty/opus/celt/entenc.c +++ /dev/null @@ -1,294 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if defined(HAVE_CONFIG_H) -# include "config.h" -#endif -#include "os_support.h" -#include "arch.h" -#include "entenc.h" -#include "mfrngcod.h" - -/*A range encoder. - See entdec.c and the references for implementation details \cite{Mar79,MNW98}. - - @INPROCEEDINGS{Mar79, - author="Martin, G.N.N.", - title="Range encoding: an algorithm for removing redundancy from a digitised - message", - booktitle="Video \& Data Recording Conference", - year=1979, - address="Southampton", - month=Jul - } - @ARTICLE{MNW98, - author="Alistair Moffat and Radford Neal and Ian H. Witten", - title="Arithmetic Coding Revisited", - journal="{ACM} Transactions on Information Systems", - year=1998, - volume=16, - number=3, - pages="256--294", - month=Jul, - URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" - }*/ - -static int ec_write_byte(ec_enc *_this,unsigned _value){ - if(_this->offs+_this->end_offs>=_this->storage)return -1; - _this->buf[_this->offs++]=(unsigned char)_value; - return 0; -} - -static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){ - if(_this->offs+_this->end_offs>=_this->storage)return -1; - _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value; - return 0; -} - -/*Outputs a symbol, with a carry bit. - If there is a potential to propagate a carry over several symbols, they are - buffered until it can be determined whether or not an actual carry will - occur. - If the counter for the buffered symbols overflows, then the stream becomes - undecodable. - This gives a theoretical limit of a few billion symbols in a single packet on - 32-bit systems. - The alternative is to truncate the range in order to force a carry, but - requires similar carry tracking in the decoder, needlessly slowing it down.*/ -static void ec_enc_carry_out(ec_enc *_this,int _c){ - if(_c!=EC_SYM_MAX){ - /*No further carry propagation possible, flush buffer.*/ - int carry; - carry=_c>>EC_SYM_BITS; - /*Don't output a byte on the first write. - This compare should be taken care of by branch-prediction thereafter.*/ - if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry); - if(_this->ext>0){ - unsigned sym; - sym=(EC_SYM_MAX+carry)&EC_SYM_MAX; - do _this->error|=ec_write_byte(_this,sym); - while(--(_this->ext)>0); - } - _this->rem=_c&EC_SYM_MAX; - } - else _this->ext++; -} - -static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){ - /*If the range is too small, output some bits and rescale it.*/ - while(_this->rng<=EC_CODE_BOT){ - ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); - /*Move the next-to-high-order symbol into the high-order position.*/ - _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1); - _this->rng<<=EC_SYM_BITS; - _this->nbits_total+=EC_SYM_BITS; - } -} - -void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ - _this->buf=_buf; - _this->end_offs=0; - _this->end_window=0; - _this->nend_bits=0; - /*This is the offset from which ec_tell() will subtract partial bits.*/ - _this->nbits_total=EC_CODE_BITS+1; - _this->offs=0; - _this->rng=EC_CODE_TOP; - _this->rem=-1; - _this->val=0; - _this->ext=0; - _this->storage=_size; - _this->error=0; -} - -void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ - opus_uint32 r; - r=celt_udiv(_this->rng,_ft); - if(_fl>0){ - _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); - _this->rng=IMUL32(r,(_fh-_fl)); - } - else _this->rng-=IMUL32(r,(_ft-_fh)); - ec_enc_normalize(_this); -} - -void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){ - opus_uint32 r; - r=_this->rng>>_bits; - if(_fl>0){ - _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl)); - _this->rng=IMUL32(r,(_fh-_fl)); - } - else _this->rng-=IMUL32(r,((1U<<_bits)-_fh)); - ec_enc_normalize(_this); -} - -/*The probability of having a "one" is 1/(1<<_logp).*/ -void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){ - opus_uint32 r; - opus_uint32 s; - opus_uint32 l; - r=_this->rng; - l=_this->val; - s=r>>_logp; - r-=s; - if(_val)_this->val=l+r; - _this->rng=_val?s:r; - ec_enc_normalize(_this); -} - -void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){ - opus_uint32 r; - r=_this->rng>>_ftb; - if(_s>0){ - _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]); - _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]); - } - else _this->rng-=IMUL32(r,_icdf[_s]); - ec_enc_normalize(_this); -} - -void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){ - unsigned ft; - unsigned fl; - int ftb; - /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ - celt_assert(_ft>1); - _ft--; - ftb=EC_ILOG(_ft); - if(ftb>EC_UINT_BITS){ - ftb-=EC_UINT_BITS; - ft=(_ft>>ftb)+1; - fl=(unsigned)(_fl>>ftb); - ec_encode(_this,fl,fl+1,ft); - ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb); - } - else ec_encode(_this,_fl,_fl+1,_ft+1); -} - -void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){ - ec_window window; - int used; - window=_this->end_window; - used=_this->nend_bits; - celt_assert(_bits>0); - if(used+_bits>EC_WINDOW_SIZE){ - do{ - _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); - window>>=EC_SYM_BITS; - used-=EC_SYM_BITS; - } - while(used>=EC_SYM_BITS); - } - window|=(ec_window)_fl<<used; - used+=_bits; - _this->end_window=window; - _this->nend_bits=used; - _this->nbits_total+=_bits; -} - -void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){ - int shift; - unsigned mask; - celt_assert(_nbits<=EC_SYM_BITS); - shift=EC_SYM_BITS-_nbits; - mask=((1<<_nbits)-1)<<shift; - if(_this->offs>0){ - /*The first byte has been finalized.*/ - _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift); - } - else if(_this->rem>=0){ - /*The first byte is still awaiting carry propagation.*/ - _this->rem=(_this->rem&~mask)|_val<<shift; - } - else if(_this->rng<=(EC_CODE_TOP>>_nbits)){ - /*The renormalization loop has never been run.*/ - _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))| - (opus_uint32)_val<<(EC_CODE_SHIFT+shift); - } - /*The encoder hasn't even encoded _nbits of data yet.*/ - else _this->error=-1; -} - -void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){ - celt_assert(_this->offs+_this->end_offs<=_size); - OPUS_MOVE(_this->buf+_size-_this->end_offs, - _this->buf+_this->storage-_this->end_offs,_this->end_offs); - _this->storage=_size; -} - -void ec_enc_done(ec_enc *_this){ - ec_window window; - int used; - opus_uint32 msk; - opus_uint32 end; - int l; - /*We output the minimum number of bits that ensures that the symbols encoded - thus far will be decoded correctly regardless of the bits that follow.*/ - l=EC_CODE_BITS-EC_ILOG(_this->rng); - msk=(EC_CODE_TOP-1)>>l; - end=(_this->val+msk)&~msk; - if((end|msk)>=_this->val+_this->rng){ - l++; - msk>>=1; - end=(_this->val+msk)&~msk; - } - while(l>0){ - ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT)); - end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1); - l-=EC_SYM_BITS; - } - /*If we have a buffered byte flush it into the output buffer.*/ - if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0); - /*If we have buffered extra bits, flush them as well.*/ - window=_this->end_window; - used=_this->nend_bits; - while(used>=EC_SYM_BITS){ - _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); - window>>=EC_SYM_BITS; - used-=EC_SYM_BITS; - } - /*Clear any excess space and add any remaining extra bits to the last byte.*/ - if(!_this->error){ - OPUS_CLEAR(_this->buf+_this->offs, - _this->storage-_this->offs-_this->end_offs); - if(used>0){ - /*If there's no range coder data at all, give up.*/ - if(_this->end_offs>=_this->storage)_this->error=-1; - else{ - l=-l; - /*If we've busted, don't add too many extra bits to the last byte; it - would corrupt the range coder data, and that's more important.*/ - if(_this->offs+_this->end_offs>=_this->storage&&l<used){ - window&=(1<<l)-1; - _this->error=-1; - } - _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window; - } - } - } -} diff --git a/thirdparty/opus/celt/entenc.h b/thirdparty/opus/celt/entenc.h deleted file mode 100644 index 796bc4d572..0000000000 --- a/thirdparty/opus/celt/entenc.h +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2001-2011 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(_entenc_H) -# define _entenc_H (1) -# include <stddef.h> -# include "entcode.h" - -/*Initializes the encoder. - _buf: The buffer to store output bytes in. - _size: The size of the buffer, in chars.*/ -void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size); -/*Encodes a symbol given its frequency information. - The frequency information must be discernable by the decoder, assuming it - has read only the previous symbols from the stream. - It is allowable to change the frequency information, or even the entire - source alphabet, so long as the decoder can tell from the context of the - previously encoded information that it is supposed to do so as well. - _fl: The cumulative frequency of all symbols that come before the one to be - encoded. - _fh: The cumulative frequency of all symbols up to and including the one to - be encoded. - Together with _fl, this defines the range [_fl,_fh) in which the - decoded value will fall. - _ft: The sum of the frequencies of all the symbols*/ -void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft); - -/*Equivalent to ec_encode() with _ft==1<<_bits.*/ -void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits); - -/* Encode a bit that has a 1/(1<<_logp) probability of being a one */ -void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp); - -/*Encodes a symbol given an "inverse" CDF table. - _s: The index of the symbol to encode. - _icdf: The "inverse" CDF, such that symbol _s falls in the range - [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb. - The values must be monotonically non-increasing, and the last value - must be 0. - _ftb: The number of bits of precision in the cumulative distribution.*/ -void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb); - -/*Encodes a raw unsigned integer in the stream. - _fl: The integer to encode. - _ft: The number of integers that can be encoded (one more than the max). - This must be at least one, and no more than 2**32-1.*/ -void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft); - -/*Encodes a sequence of raw bits in the stream. - _fl: The bits to encode. - _ftb: The number of bits to encode. - This must be between 1 and 25, inclusive.*/ -void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb); - -/*Overwrites a few bits at the very start of an existing stream, after they - have already been encoded. - This makes it possible to have a few flags up front, where it is easy for - decoders to access them without parsing the whole stream, even if their - values are not determined until late in the encoding process, without having - to buffer all the intermediate symbols in the encoder. - In order for this to work, at least _nbits bits must have already been - encoded using probabilities that are an exact power of two. - The encoder can verify the number of encoded bits is sufficient, but cannot - check this latter condition. - _val: The bits to encode (in the least _nbits significant bits). - They will be decoded in order from most-significant to least. - _nbits: The number of bits to overwrite. - This must be no more than 8.*/ -void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits); - -/*Compacts the data to fit in the target size. - This moves up the raw bits at the end of the current buffer so they are at - the end of the new buffer size. - The caller must ensure that the amount of data that's already been written - will fit in the new size. - _size: The number of bytes in the new buffer. - This must be large enough to contain the bits already written, and - must be no larger than the existing size.*/ -void ec_enc_shrink(ec_enc *_this,opus_uint32 _size); - -/*Indicates that there are no more symbols to encode. - All reamining output bytes are flushed to the output buffer. - ec_enc_init() must be called before the encoder can be used again.*/ -void ec_enc_done(ec_enc *_this); - -#endif diff --git a/thirdparty/opus/celt/fixed_debug.h b/thirdparty/opus/celt/fixed_debug.h deleted file mode 100644 index d28227f5dc..0000000000 --- a/thirdparty/opus/celt/fixed_debug.h +++ /dev/null @@ -1,784 +0,0 @@ -/* Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2012 Xiph.Org Foundation */ -/** - @file fixed_debug.h - @brief Fixed-point operations with debugging -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_DEBUG_H -#define FIXED_DEBUG_H - -#include <stdio.h> -#include "opus_defines.h" - -#ifdef CELT_C -OPUS_EXPORT opus_int64 celt_mips=0; -#else -extern opus_int64 celt_mips; -#endif - -#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) -#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15)) - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16)) - -#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16) - -#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) -#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) - -#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768) -#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL) -#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1)) - -#define SHR(a,b) SHR32(a,b) -#define PSHR(a,b) PSHR32(a,b) - -static OPUS_INLINE short NEG16(int x) -{ - int res; - if (!VERIFY_SHORT(x)) - { - fprintf (stderr, "NEG16: input is not short: %d\n", (int)x); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = -x; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "NEG16: output is not short: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} -static OPUS_INLINE int NEG32(opus_int64 x) -{ - opus_int64 res; - if (!VERIFY_INT(x)) - { - fprintf (stderr, "NEG16: input is not int: %d\n", (int)x); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = -x; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "NEG16: output is not int: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__) -static OPUS_INLINE short EXTRACT16_(int x, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(x)) - { - fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = x; - celt_mips++; - return res; -} - -#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__) -static OPUS_INLINE int EXTEND32_(int x, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(x)) - { - fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = x; - celt_mips++; - return res; -} - -#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__) -static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a>>shift; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} -#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__) -static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a<<shift; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -static OPUS_INLINE int SHR32(opus_int64 a, int shift) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a>>shift; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} -#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__) -static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) - { - fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a<<shift; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift)) -#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) - -#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a)))) -#define HALF16(x) (SHR16(x,1)) -#define HALF32(x) (SHR32(x,1)) - -//#define SHR(a,shift) ((a) >> (shift)) -//#define SHL(a,shift) ((a) << (shift)) - -#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__) -static OPUS_INLINE short ADD16_(int a, int b, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a+b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__) -static OPUS_INLINE short SUB16_(int a, int b, char *file, int line) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a-b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a+b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_INT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a-b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#undef UADD32 -#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line) -{ - opus_uint64 res; - if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) - { - fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a+b; - if (!VERIFY_UINT(res)) - { - fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#undef USUB32 -#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line) -{ - opus_uint64 res; - if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) - { - fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (a<b) - { - fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a-b; - if (!VERIFY_UINT(res)) - { - fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -/* result fits in 16 bits */ -static OPUS_INLINE short MULT16_16_16(int a, int b) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a*b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips++; - return res; -} - -#define MAC16_16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_16((a),(b)))) - -#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__) -static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (ABS32(b)>=((opus_val32)(1)<<(15+Q))) - { - fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = (((opus_int64)a)*(opus_int64)b) >> Q; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (Q==15) - celt_mips+=3; - else - celt_mips+=4; - return res; -} - -#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__) -static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (ABS32(b)>=((opus_int64)(1)<<(15+Q))) - { - fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - if (Q==15) - celt_mips+=4; - else - celt_mips+=5; - return res; -} - -#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) -#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) -#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b)))) - -static OPUS_INLINE int SATURATE(int a, int b) -{ - if (a>b) - a=b; - if (a<-b) - a = -b; - celt_mips+=3; - return a; -} - -static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a) -{ - celt_mips+=3; - if (a>32767) - return 32767; - else if (a<-32768) - return -32768; - else return a; -} - -static OPUS_INLINE int MULT16_16_Q11_32(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 11; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=3; - return res; -} -static OPUS_INLINE short MULT16_16_Q13(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 13; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=3; - return res; -} -static OPUS_INLINE short MULT16_16_Q14(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 14; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=3; - return res; -} - -#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__) -static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res >>= 15; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=1; - return res; -} - -static OPUS_INLINE short MULT16_16_P13(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res += 4096; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res >>= 13; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=4; - return res; -} -static OPUS_INLINE short MULT16_16_P14(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res += 8192; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res >>= 14; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=4; - return res; -} -static OPUS_INLINE short MULT16_16_P15(int a, int b) -{ - opus_int64 res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = ((opus_int64)a)*b; - res += 16384; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res >>= 15; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=2; - return res; -} - -#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__) - -static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (b==0) - { - fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - return 0; - } - if (!VERIFY_INT(a) || !VERIFY_SHORT(b)) - { - fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a/b; - if (!VERIFY_SHORT(res)) - { - fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line); - if (res>32767) - res = 32767; - if (res<-32768) - res = -32768; -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=35; - return res; -} - -#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__) -static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) -{ - opus_int64 res; - if (b==0) - { - fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - return 0; - } - - if (!VERIFY_INT(a) || !VERIFY_INT(b)) - { - fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - res = a/b; - if (!VERIFY_INT(res)) - { - fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line); -#ifdef FIXED_DEBUG_ASSERT - celt_assert(0); -#endif - } - celt_mips+=70; - return res; -} - -static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) -{ - x = PSHR32(x, SIG_SHIFT); - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return EXTRACT16(x); -} -#define SIG2WORD16(x) (SIG2WORD16_generic(x)) - - -#undef PRINT_MIPS -#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); - -#endif diff --git a/thirdparty/opus/celt/fixed_generic.h b/thirdparty/opus/celt/fixed_generic.h deleted file mode 100644 index 1cfd6d6989..0000000000 --- a/thirdparty/opus/celt/fixed_generic.h +++ /dev/null @@ -1,167 +0,0 @@ -/* Copyright (C) 2007-2009 Xiph.Org Foundation - Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2008 CSIRO */ -/** - @file fixed_generic.h - @brief Generic fixed-point operations -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef FIXED_GENERIC_H -#define FIXED_GENERIC_H - -/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */ -#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) - -/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16)) -#else -#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) -#endif - -/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16)) -#else -#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) -#endif - -/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15)) -#else -#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) -#endif - -/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ -#if OPUS_FAST_INT64 -#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31)) -#else -#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) -#endif - -/** Compile-time conversion of float constant to 16-bit value */ -#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) - -/** Compile-time conversion of float constant to 32-bit value */ -#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) - -/** Negate a 16-bit value */ -#define NEG16(x) (-(x)) -/** Negate a 32-bit value */ -#define NEG32(x) (-(x)) - -/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */ -#define EXTRACT16(x) ((opus_val16)(x)) -/** Change a 16-bit value into a 32-bit value */ -#define EXTEND32(x) ((opus_val32)(x)) - -/** Arithmetic shift-right of a 16-bit value */ -#define SHR16(a,shift) ((a) >> (shift)) -/** Arithmetic shift-left of a 16-bit value */ -#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift))) -/** Arithmetic shift-right of a 32-bit value */ -#define SHR32(a,shift) ((a) >> (shift)) -/** Arithmetic shift-left of a 32-bit value */ -#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift))) - -/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */ -#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift)) -/** 32-bit arithmetic shift right where the argument can be negative */ -#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) - -/** "RAW" macros, should not be used outside of this header file */ -#define SHR(a,shift) ((a) >> (shift)) -#define SHL(a,shift) SHL32(a,shift) -#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) -#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) - -#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x))) - -/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ -#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) -/** Divide by two */ -#define HALF16(x) (SHR16(x,1)) -#define HALF32(x) (SHR32(x,1)) - -/** Add two 16-bit values */ -#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b))) -/** Subtract two 16-bit values */ -#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b)) -/** Add two 32-bit values */ -#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b)) -/** Subtract two 32-bit values */ -#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) - -/** 16x16 multiplication where the result fits in 16 bits */ -#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) - -/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */ -/** 16x16 multiplication where the result fits in 32 bits */ -#define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b))) - -/** 16x16 multiply-add where the result fits in 32 bits */ -#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b)))) -/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. - b must fit in 31 bits. - Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) - -/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. - Results fits in 32 bits */ -#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) - -#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) -#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) -#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) -#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) -#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) - -#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13)) -#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14)) -#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15)) - -/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */ -#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b)))) - -/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ -#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) - -#if defined(MIPSr1_ASM) -#include "mips/fixed_generic_mipsr1.h" -#endif - -static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) -{ - x = PSHR32(x, SIG_SHIFT); - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return EXTRACT16(x); -} -#define SIG2WORD16(x) (SIG2WORD16_generic(x)) - -#endif diff --git a/thirdparty/opus/celt/float_cast.h b/thirdparty/opus/celt/float_cast.h deleted file mode 100644 index ed5a39b543..0000000000 --- a/thirdparty/opus/celt/float_cast.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* Version 1.1 */ - -#ifndef FLOAT_CAST_H -#define FLOAT_CAST_H - - -#include "arch.h" - -/*============================================================================ -** On Intel Pentium processors (especially PIII and probably P4), converting -** from float to int is very slow. To meet the C specs, the code produced by -** most C compilers targeting Pentium needs to change the FPU rounding mode -** before the float to int conversion is performed. -** -** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It -** is this flushing of the pipeline which is so slow. -** -** Fortunately the ISO C99 specifications define the functions lrint, lrintf, -** llrint and llrintf which fix this problem as a side effect. -** -** On Unix-like systems, the configure process should have detected the -** presence of these functions. If they weren't found we have to replace them -** here with a standard C cast. -*/ - -/* -** The C99 prototypes for lrint and lrintf are as follows: -** -** long int lrintf (float x) ; -** long int lrint (double x) ; -*/ - -/* The presence of the required functions are detected during the configure -** process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in -** the config.h file. -*/ - -#if (HAVE_LRINTF) - -/* These defines enable functionality introduced with the 1999 ISO C -** standard. They must be defined before the inclusion of math.h to -** engage them. If optimisation is enabled, these functions will be -** inlined. With optimisation switched off, you have to link in the -** maths library using -lm. -*/ - -#define _ISOC9X_SOURCE 1 -#define _ISOC99_SOURCE 1 - -#define __USE_ISOC9X 1 -#define __USE_ISOC99 1 - -#include <math.h> -#define float2int(x) lrintf(x) - -#elif (defined(HAVE_LRINT)) - -#define _ISOC9X_SOURCE 1 -#define _ISOC99_SOURCE 1 - -#define __USE_ISOC9X 1 -#define __USE_ISOC99 1 - -#include <math.h> -#define float2int(x) lrint(x) - -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_X64) - #include <xmmintrin.h> - - __inline long int float2int(float value) - { - return _mm_cvtss_si32(_mm_load_ss(&value)); - } -#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86) - #include <math.h> - - /* Win32 doesn't seem to have these functions. - ** Therefore implement OPUS_INLINE versions of these functions here. - */ - - __inline long int - float2int (float flt) - { int intgr; - - _asm - { fld flt - fistp intgr - } ; - - return intgr ; - } - -#else - -#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) - /* supported by gcc in C99 mode, but not by all other compilers */ - #warning "Don't have the functions lrint() and lrintf ()." - #warning "Replacing these functions with a standard C cast." -#endif /* __STDC_VERSION__ >= 199901L */ - #include <math.h> - #define float2int(flt) ((int)(floor(.5+flt))) -#endif - -#ifndef DISABLE_FLOAT_API -static OPUS_INLINE opus_int16 FLOAT2INT16(float x) -{ - x = x*CELT_SIG_SCALE; - x = MAX32(x, -32768); - x = MIN32(x, 32767); - return (opus_int16)float2int(x); -} -#endif /* DISABLE_FLOAT_API */ - -#endif /* FLOAT_CAST_H */ diff --git a/thirdparty/opus/celt/kiss_fft.c b/thirdparty/opus/celt/kiss_fft.c deleted file mode 100644 index 1f8fd05321..0000000000 --- a/thirdparty/opus/celt/kiss_fft.c +++ /dev/null @@ -1,604 +0,0 @@ -/*Copyright (c) 2003-2004, Mark Borgerding - Lots of modifications by Jean-Marc Valin - Copyright (c) 2005-2007, Xiph.Org Foundation - Copyright (c) 2008, Xiph.Org Foundation, CSIRO - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -/* This code is originally from Mark Borgerding's KISS-FFT but has been - heavily modified to better suit Opus */ - -#ifndef SKIP_CONFIG_H -# ifdef HAVE_CONFIG_H -# include "config.h" -# endif -#endif - -#include "_kiss_fft_guts.h" -#include "arch.h" -#include "os_support.h" -#include "mathops.h" -#include "stack_alloc.h" - -/* The guts header contains all the multiplication and addition macros that are defined for - complex numbers. It also delares the kf_ internal functions. -*/ - -static void kf_bfly2( - kiss_fft_cpx * Fout, - int m, - int N - ) -{ - kiss_fft_cpx * Fout2; - int i; - (void)m; -#ifdef CUSTOM_MODES - if (m==1) - { - celt_assert(m==1); - for (i=0;i<N;i++) - { - kiss_fft_cpx t; - Fout2 = Fout + 1; - t = *Fout2; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - Fout += 2; - } - } else -#endif - { - opus_val16 tw; - tw = QCONST16(0.7071067812f, 15); - /* We know that m==4 here because the radix-2 is just after a radix-4 */ - celt_assert(m==4); - for (i=0;i<N;i++) - { - kiss_fft_cpx t; - Fout2 = Fout + 4; - t = Fout2[0]; - C_SUB( Fout2[0] , Fout[0] , t ); - C_ADDTO( Fout[0] , t ); - - t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); - t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); - C_SUB( Fout2[1] , Fout[1] , t ); - C_ADDTO( Fout[1] , t ); - - t.r = Fout2[2].i; - t.i = -Fout2[2].r; - C_SUB( Fout2[2] , Fout[2] , t ); - C_ADDTO( Fout[2] , t ); - - t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); - t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); - C_SUB( Fout2[3] , Fout[3] , t ); - C_ADDTO( Fout[3] , t ); - Fout += 8; - } - } -} - -static void kf_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i; - - if (m==1) - { - /* Degenerate case where all the twiddles are 1. */ - for (i=0;i<N;i++) - { - kiss_fft_cpx scratch0, scratch1; - - C_SUB( scratch0 , *Fout, Fout[2] ); - C_ADDTO(*Fout, Fout[2]); - C_ADD( scratch1 , Fout[1] , Fout[3] ); - C_SUB( Fout[2], *Fout, scratch1 ); - C_ADDTO( *Fout , scratch1 ); - C_SUB( scratch1 , Fout[1] , Fout[3] ); - - Fout[1].r = scratch0.r + scratch1.i; - Fout[1].i = scratch0.i - scratch1.r; - Fout[3].r = scratch0.r - scratch1.i; - Fout[3].i = scratch0.i + scratch1.r; - Fout+=4; - } - } else { - int j; - kiss_fft_cpx scratch[6]; - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - const int m2=2*m; - const int m3=3*m; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - /* m is guaranteed to be a multiple of 4. */ - for (j=0;j<m;j++) - { - C_MUL(scratch[0],Fout[m] , *tw1 ); - C_MUL(scratch[1],Fout[m2] , *tw2 ); - C_MUL(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; - } - } - } -} - - -#ifndef RADIX_TWO_ONLY - -static void kf_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i; - size_t k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; -#ifdef FIXED_POINT - /*epi3.r = -16384;*/ /* Unused */ - epi3.i = -28378; -#else - epi3 = st->twiddles[fstride*m]; -#endif - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw1=tw2=st->twiddles; - /* For non-custom modes, m is guaranteed to be a multiple of 4. */ - k=m; - do { - - C_MUL(scratch[1],Fout[m] , *tw1); - C_MUL(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - } while(--k); - } -} - - -#ifndef OVERRIDE_kf_bfly5 -static void kf_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int i, u; - kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - -#ifdef FIXED_POINT - ya.r = 10126; - ya.i = -31164; - yb.r = -26510; - yb.i = -19261; -#else - ya = st->twiddles[fstride*m]; - yb = st->twiddles[fstride*2*m]; -#endif - tw=st->twiddles; - - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - /* For non-custom modes, m is guaranteed to be a multiple of 4. */ - for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); - C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); - scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); - scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} -#endif /* OVERRIDE_kf_bfly5 */ - - -#endif - - -#ifdef CUSTOM_MODES - -static -void compute_bitrev_table( - int Fout, - opus_int16 *f, - const size_t fstride, - int in_stride, - opus_int16 * factors, - const kiss_fft_state *st - ) -{ - const int p=*factors++; /* the radix */ - const int m=*factors++; /* stage's fft length/p */ - - /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ - if (m==1) - { - int j; - for (j=0;j<p;j++) - { - *f = Fout+j; - f += fstride*in_stride; - } - } else { - int j; - for (j=0;j<p;j++) - { - compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st); - f += fstride*in_stride; - Fout += m; - } - } -} - -/* facbuf is populated by p1,m1,p2,m2, ... - where - p[i] * m[i] = m[i-1] - m0 = n */ -static -int kf_factor(int n,opus_int16 * facbuf) -{ - int p=4; - int i; - int stages=0; - int nbak = n; - - /*factor out powers of 4, powers of 2, then any remaining primes */ - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p>32000 || (opus_int32)p*(opus_int32)p > n) - p = n; /* no more factors, skip to end */ - } - n /= p; -#ifdef RADIX_TWO_ONLY - if (p!=2 && p != 4) -#else - if (p>5) -#endif - { - return 0; - } - facbuf[2*stages] = p; - if (p==2 && stages > 1) - { - facbuf[2*stages] = 4; - facbuf[2] = 2; - } - stages++; - } while (n > 1); - n = nbak; - /* Reverse the order to get the radix 4 at the end, so we can use the - fast degenerate case. It turns out that reversing the order also - improves the noise behaviour. */ - for (i=0;i<stages/2;i++) - { - int tmp; - tmp = facbuf[2*i]; - facbuf[2*i] = facbuf[2*(stages-i-1)]; - facbuf[2*(stages-i-1)] = tmp; - } - for (i=0;i<stages;i++) - { - n /= facbuf[2*i]; - facbuf[2*i+1] = n; - } - return 1; -} - -static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) -{ - int i; -#ifdef FIXED_POINT - for (i=0;i<nfft;++i) { - opus_val32 phase = -i; - kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); - } -#else - for (i=0;i<nfft;++i) { - const double pi=3.14159265358979323846264338327; - double phase = ( -2*pi /nfft ) * i; - kf_cexp(twiddles+i, phase ); - } -#endif -} - -int opus_fft_alloc_arch_c(kiss_fft_state *st) { - (void)st; - return 0; -} - -/* - * - * Allocates all necessary storage space for the fft and ifft. - * The return value is a contiguous block of memory. As such, - * It can be freed with free(). - * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, - const kiss_fft_state *base, int arch) -{ - kiss_fft_state *st=NULL; - size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ - - if ( lenmem==NULL ) { - st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded ); - }else{ - if (mem != NULL && *lenmem >= memneeded) - st = (kiss_fft_state*)mem; - *lenmem = memneeded; - } - if (st) { - opus_int16 *bitrev; - kiss_twiddle_cpx *twiddles; - - st->nfft=nfft; -#ifdef FIXED_POINT - st->scale_shift = celt_ilog2(st->nfft); - if (st->nfft == 1<<st->scale_shift) - st->scale = Q15ONE; - else - st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); -#else - st->scale = 1.f/nfft; -#endif - if (base != NULL) - { - st->twiddles = base->twiddles; - st->shift = 0; - while (st->shift < 32 && nfft<<st->shift != base->nfft) - st->shift++; - if (st->shift>=32) - goto fail; - } else { - st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); - compute_twiddles(twiddles, nfft); - st->shift = -1; - } - if (!kf_factor(nfft,st->factors)) - { - goto fail; - } - - /* bitrev */ - st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); - if (st->bitrev==NULL) - goto fail; - compute_bitrev_table(0, bitrev, 1,1, st->factors,st); - - /* Initialize architecture specific fft parameters */ - if (opus_fft_alloc_arch(st, arch)) - goto fail; - } - return st; -fail: - opus_fft_free(st, arch); - return NULL; -} - -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) -{ - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); -} - -void opus_fft_free_arch_c(kiss_fft_state *st) { - (void)st; -} - -void opus_fft_free(const kiss_fft_state *cfg, int arch) -{ - if (cfg) - { - opus_fft_free_arch((kiss_fft_state *)cfg, arch); - opus_free((opus_int16*)cfg->bitrev); - if (cfg->shift < 0) - opus_free((kiss_twiddle_cpx*)cfg->twiddles); - opus_free((kiss_fft_state*)cfg); - } -} - -#endif /* CUSTOM_MODES */ - -void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) -{ - int m2, m; - int p; - int L; - int fstride[MAXFACTORS]; - int i; - int shift; - - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) - { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - kf_bfly2(fout, m, fstride[i]); - break; - case 4: - kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - #ifndef RADIX_TWO_ONLY - case 3: - kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 5: - kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - #endif - } - m = m2; - } -} - -void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) -{ - int i; - opus_val16 scale; -#ifdef FIXED_POINT - /* Allows us to scale with MULT16_32_Q16(), which is faster than - MULT16_32_Q15() on ARM. */ - int scale_shift = st->scale_shift-1; -#endif - scale = st->scale; - - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;i<st->nfft;i++) - { - kiss_fft_cpx x = fin[i]; - fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); - fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); - } - opus_fft_impl(st, fout); -} - - -void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) -{ - int i; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;i<st->nfft;i++) - fout[st->bitrev[i]] = fin[i]; - for (i=0;i<st->nfft;i++) - fout[i].i = -fout[i].i; - opus_fft_impl(st, fout); - for (i=0;i<st->nfft;i++) - fout[i].i = -fout[i].i; -} diff --git a/thirdparty/opus/celt/kiss_fft.h b/thirdparty/opus/celt/kiss_fft.h deleted file mode 100644 index bffa2bfad6..0000000000 --- a/thirdparty/opus/celt/kiss_fft.h +++ /dev/null @@ -1,200 +0,0 @@ -/*Copyright (c) 2003-2004, Mark Borgerding - Lots of modifications by Jean-Marc Valin - Copyright (c) 2005-2007, Xiph.Org Foundation - Copyright (c) 2008, Xiph.Org Foundation, CSIRO - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_H -#define KISS_FFT_H - -#include <stdlib.h> -#include <math.h> -#include "arch.h" -#include "cpu_support.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef USE_SIMD -# include <xmmintrin.h> -# define kiss_fft_scalar __m128 -#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes) -#else -#define KISS_FFT_MALLOC opus_alloc -#endif - -#ifdef FIXED_POINT -#include "arch.h" - -# define kiss_fft_scalar opus_int32 -# define kiss_twiddle_scalar opus_int16 - - -#else -# ifndef kiss_fft_scalar -/* default is float */ -# define kiss_fft_scalar float -# define kiss_twiddle_scalar float -# define KF_SUFFIX _celt_single -# endif -#endif - -typedef struct { - kiss_fft_scalar r; - kiss_fft_scalar i; -}kiss_fft_cpx; - -typedef struct { - kiss_twiddle_scalar r; - kiss_twiddle_scalar i; -}kiss_twiddle_cpx; - -#define MAXFACTORS 8 -/* e.g. an fft of length 128 has 4 factors - as far as kissfft is concerned - 4*4*4*2 - */ - -typedef struct arch_fft_state{ - int is_supported; - void *priv; -} arch_fft_state; - -typedef struct kiss_fft_state{ - int nfft; - opus_val16 scale; -#ifdef FIXED_POINT - int scale_shift; -#endif - int shift; - opus_int16 factors[2*MAXFACTORS]; - const opus_int16 *bitrev; - const kiss_twiddle_cpx *twiddles; - arch_fft_state *arch_fft; -} kiss_fft_state; - -#if defined(HAVE_ARM_NE10) -#include "arm/fft_arm.h" -#endif - -/*typedef struct kiss_fft_state* kiss_fft_cfg;*/ - -/** - * opus_fft_alloc - * - * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. - * - * typical usage: kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL); - * - * The return value from fft_alloc is a cfg buffer used internally - * by the fft routine or NULL. - * - * If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc. - * The returned value should be free()d when done to avoid memory leaks. - * - * The state can be placed in a user supplied buffer 'mem': - * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, - * then the function places the cfg in mem and the size used in *lenmem - * and returns mem. - * - * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), - * then the function returns NULL and places the minimum cfg - * buffer size in *lenmem. - * */ - -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); - -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); - -/** - * opus_fft(cfg,in_out_buf) - * - * Perform an FFT on a complex input buffer. - * for a forward FFT, - * fin should be f[0] , f[1] , ... ,f[nfft-1] - * fout will be F[0] , F[1] , ... ,F[nfft-1] - * Note that each element is complex and can be accessed like - f[k].r and f[k].i - * */ -void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); - -void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); -void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); - -void opus_fft_free(const kiss_fft_state *cfg, int arch); - - -void opus_fft_free_arch_c(kiss_fft_state *st); -int opus_fft_alloc_arch_c(kiss_fft_state *st); - -#if !defined(OVERRIDE_OPUS_FFT) -/* Is run-time CPU detection enabled on this platform? */ -#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) - -extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( - kiss_fft_state *st); - -#define opus_fft_alloc_arch(_st, arch) \ - ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) - -extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( - kiss_fft_state *st); -#define opus_fft_free_arch(_st, arch) \ - ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) - -extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, kiss_fft_cpx *fout); -#define opus_fft(_cfg, _fin, _fout, arch) \ - ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) - -extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, - const kiss_fft_cpx *fin, kiss_fft_cpx *fout); -#define opus_ifft(_cfg, _fin, _fout, arch) \ - ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) - -#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ - -#define opus_fft_alloc_arch(_st, arch) \ - ((void)(arch), opus_fft_alloc_arch_c(_st)) - -#define opus_fft_free_arch(_st, arch) \ - ((void)(arch), opus_fft_free_arch_c(_st)) - -#define opus_fft(_cfg, _fin, _fout, arch) \ - ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) - -#define opus_ifft(_cfg, _fin, _fout, arch) \ - ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) - -#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ -#endif /* end if !defined(OVERRIDE_OPUS_FFT) */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/celt/laplace.c b/thirdparty/opus/celt/laplace.c deleted file mode 100644 index a7bca874b6..0000000000 --- a/thirdparty/opus/celt/laplace.c +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2007 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "laplace.h" -#include "mathops.h" - -/* The minimum probability of an energy delta (out of 32768). */ -#define LAPLACE_LOG_MINP (0) -#define LAPLACE_MINP (1<<LAPLACE_LOG_MINP) -/* The minimum number of guaranteed representable energy deltas (in one - direction). */ -#define LAPLACE_NMIN (16) - -/* When called, decay is positive and at most 11456. */ -static unsigned ec_laplace_get_freq1(unsigned fs0, int decay) -{ - unsigned ft; - ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN) - fs0; - return ft*(opus_int32)(16384-decay)>>15; -} - -void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay) -{ - unsigned fl; - int val = *value; - fl = 0; - if (val) - { - int s; - int i; - s = -(val<0); - val = (val+s)^s; - fl = fs; - fs = ec_laplace_get_freq1(fs, decay); - /* Search the decaying part of the PDF.*/ - for (i=1; fs > 0 && i < val; i++) - { - fs *= 2; - fl += fs+2*LAPLACE_MINP; - fs = (fs*(opus_int32)decay)>>15; - } - /* Everything beyond that has probability LAPLACE_MINP. */ - if (!fs) - { - int di; - int ndi_max; - ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP; - ndi_max = (ndi_max-s)>>1; - di = IMIN(val - i, ndi_max - 1); - fl += (2*di+1+s)*LAPLACE_MINP; - fs = IMIN(LAPLACE_MINP, 32768-fl); - *value = (i+di+s)^s; - } - else - { - fs += LAPLACE_MINP; - fl += fs&~s; - } - celt_assert(fl+fs<=32768); - celt_assert(fs>0); - } - ec_encode_bin(enc, fl, fl+fs, 15); -} - -int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay) -{ - int val=0; - unsigned fl; - unsigned fm; - fm = ec_decode_bin(dec, 15); - fl = 0; - if (fm >= fs) - { - val++; - fl = fs; - fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP; - /* Search the decaying part of the PDF.*/ - while(fs > LAPLACE_MINP && fm >= fl+2*fs) - { - fs *= 2; - fl += fs; - fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15; - fs += LAPLACE_MINP; - val++; - } - /* Everything beyond that has probability LAPLACE_MINP. */ - if (fs <= LAPLACE_MINP) - { - int di; - di = (fm-fl)>>(LAPLACE_LOG_MINP+1); - val += di; - fl += 2*di*LAPLACE_MINP; - } - if (fm < fl+fs) - val = -val; - else - fl += fs; - } - celt_assert(fl<32768); - celt_assert(fs>0); - celt_assert(fl<=fm); - celt_assert(fm<IMIN(fl+fs,32768)); - ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768); - return val; -} diff --git a/thirdparty/opus/celt/laplace.h b/thirdparty/opus/celt/laplace.h deleted file mode 100644 index 46c14b5da5..0000000000 --- a/thirdparty/opus/celt/laplace.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2007 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "entenc.h" -#include "entdec.h" - -/** Encode a value that is assumed to be the realisation of a - Laplace-distributed random process - @param enc Entropy encoder state - @param value Value to encode - @param fs Probability of 0, multiplied by 32768 - @param decay Probability of the value +/- 1, multiplied by 16384 -*/ -void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay); - -/** Decode a value that is assumed to be the realisation of a - Laplace-distributed random process - @param dec Entropy decoder state - @param fs Probability of 0, multiplied by 32768 - @param decay Probability of the value +/- 1, multiplied by 16384 - @return Value decoded - */ -int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay); diff --git a/thirdparty/opus/celt/mathops.c b/thirdparty/opus/celt/mathops.c deleted file mode 100644 index 21a01f52e4..0000000000 --- a/thirdparty/opus/celt/mathops.c +++ /dev/null @@ -1,208 +0,0 @@ -/* Copyright (c) 2002-2008 Jean-Marc Valin - Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file mathops.h - @brief Various math functions -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mathops.h" - -/*Compute floor(sqrt(_val)) with exact arithmetic. - This has been tested on all possible 32-bit inputs.*/ -unsigned isqrt32(opus_uint32 _val){ - unsigned b; - unsigned g; - int bshift; - /*Uses the second method from - http://www.azillionmonkeys.com/qed/sqroot.html - The main idea is to search for the largest binary digit b such that - (g+b)*(g+b) <= _val, and add it to the solution g.*/ - g=0; - bshift=(EC_ILOG(_val)-1)>>1; - b=1U<<bshift; - do{ - opus_uint32 t; - t=(((opus_uint32)g<<1)+b)<<bshift; - if(t<=_val){ - g+=b; - _val-=t; - } - b>>=1; - bshift--; - } - while(bshift>=0); - return g; -} - -#ifdef FIXED_POINT - -opus_val32 frac_div32(opus_val32 a, opus_val32 b) -{ - opus_val16 rcp; - opus_val32 result, rem; - int shift = celt_ilog2(b)-29; - a = VSHR32(a,shift); - b = VSHR32(b,shift); - /* 16-bit reciprocal */ - rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); - result = MULT16_32_Q15(rcp, a); - rem = PSHR32(a,2)-MULT32_32_Q31(result, b); - result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); - if (result >= 536870912) /* 2^29 */ - return 2147483647; /* 2^31 - 1 */ - else if (result <= -536870912) /* -2^29 */ - return -2147483647; /* -2^31 */ - else - return SHL32(result, 2); -} - -/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ -opus_val16 celt_rsqrt_norm(opus_val32 x) -{ - opus_val16 n; - opus_val16 r; - opus_val16 r2; - opus_val16 y; - /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ - n = x-32768; - /* Get a rough initial guess for the root. - The optimal minimax quadratic approximation (using relative error) is - r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). - Coefficients here, and the final result r, are Q14.*/ - r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); - /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. - We can compute the result from n and r using Q15 multiplies with some - adjustment, carefully done to avoid overflow. - Range of y is [-1564,1594]. */ - r2 = MULT16_16_Q15(r, r); - y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); - /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). - This yields the Q14 reciprocal square root of the Q16 x, with a maximum - relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a - peak absolute error of 2.26591/16384. */ - return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, - SUB16(MULT16_16_Q15(y, 12288), 16384)))); -} - -/** Sqrt approximation (QX input, QX/2 output) */ -opus_val32 celt_sqrt(opus_val32 x) -{ - int k; - opus_val16 n; - opus_val32 rt; - static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; - if (x==0) - return 0; - else if (x>=1073741824) - return 32767; - k = (celt_ilog2(x)>>1)-7; - x = VSHR32(x, 2*k); - n = x-32768; - rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], - MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); - rt = VSHR32(rt,7-k); - return rt; -} - -#define L1 32767 -#define L2 -7651 -#define L3 8277 -#define L4 -626 - -static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) -{ - opus_val16 x2; - - x2 = MULT16_16_P15(x,x); - return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 - )))))))); -} - -#undef L1 -#undef L2 -#undef L3 -#undef L4 - -opus_val16 celt_cos_norm(opus_val32 x) -{ - x = x&0x0001ffff; - if (x>SHL32(EXTEND32(1), 16)) - x = SUB32(SHL32(EXTEND32(1), 17),x); - if (x&0x00007fff) - { - if (x<SHL32(EXTEND32(1), 15)) - { - return _celt_cos_pi_2(EXTRACT16(x)); - } else { - return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x))); - } - } else { - if (x&0x0000ffff) - return 0; - else if (x&0x0001ffff) - return -32767; - else - return 32767; - } -} - -/** Reciprocal approximation (Q15 input, Q16 output) */ -opus_val32 celt_rcp(opus_val32 x) -{ - int i; - opus_val16 n; - opus_val16 r; - celt_assert2(x>0, "celt_rcp() only defined for positive values"); - i = celt_ilog2(x); - /* n is Q15 with range [0,1). */ - n = VSHR32(x,i-15)-32768; - /* Start with a linear approximation: - r = 1.8823529411764706-0.9411764705882353*n. - The coefficients and the result are Q14 in the range [15420,30840].*/ - r = ADD16(30840, MULT16_16_Q15(-15420, n)); - /* Perform two Newton iterations: - r -= r*((r*n)-1.Q15) - = r*((r*n)+(r-1.Q15)). */ - r = SUB16(r, MULT16_16_Q15(r, - ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); - /* We subtract an extra 1 in the second iteration to avoid overflow; it also - neatly compensates for truncation error in the rest of the process. */ - r = SUB16(r, ADD16(1, MULT16_16_Q15(r, - ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); - /* r is now the Q15 solution to 2/(n+1), with a maximum relative error - of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute - error of 1.24665/32768. */ - return VSHR32(EXTEND32(r),i-16); -} - -#endif diff --git a/thirdparty/opus/celt/mathops.h b/thirdparty/opus/celt/mathops.h deleted file mode 100644 index a0525a9610..0000000000 --- a/thirdparty/opus/celt/mathops.h +++ /dev/null @@ -1,258 +0,0 @@ -/* Copyright (c) 2002-2008 Jean-Marc Valin - Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file mathops.h - @brief Various math functions -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MATHOPS_H -#define MATHOPS_H - -#include "arch.h" -#include "entcode.h" -#include "os_support.h" - -/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ -#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) - -unsigned isqrt32(opus_uint32 _val); - -#ifndef OVERRIDE_CELT_MAXABS16 -static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) -{ - int i; - opus_val16 maxval = 0; - opus_val16 minval = 0; - for (i=0;i<len;i++) - { - maxval = MAX16(maxval, x[i]); - minval = MIN16(minval, x[i]); - } - return MAX32(EXTEND32(maxval),-EXTEND32(minval)); -} -#endif - -#ifndef OVERRIDE_CELT_MAXABS32 -#ifdef FIXED_POINT -static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len) -{ - int i; - opus_val32 maxval = 0; - opus_val32 minval = 0; - for (i=0;i<len;i++) - { - maxval = MAX32(maxval, x[i]); - minval = MIN32(minval, x[i]); - } - return MAX32(maxval, -minval); -} -#else -#define celt_maxabs32(x,len) celt_maxabs16(x,len) -#endif -#endif - - -#ifndef FIXED_POINT - -#define PI 3.141592653f -#define celt_sqrt(x) ((float)sqrt(x)) -#define celt_rsqrt(x) (1.f/celt_sqrt(x)) -#define celt_rsqrt_norm(x) (celt_rsqrt(x)) -#define celt_cos_norm(x) ((float)cos((.5f*PI)*(x))) -#define celt_rcp(x) (1.f/(x)) -#define celt_div(a,b) ((a)/(b)) -#define frac_div32(a,b) ((float)(a)/(b)) - -#ifdef FLOAT_APPROX - -/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127 - denorm, +/- inf and NaN are *not* handled */ - -/** Base-2 log approximation (log2(x)). */ -static OPUS_INLINE float celt_log2(float x) -{ - int integer; - float frac; - union { - float f; - opus_uint32 i; - } in; - in.f = x; - integer = (in.i>>23)-127; - in.i -= integer<<23; - frac = in.f - 1.5f; - frac = -0.41445418f + frac*(0.95909232f - + frac*(-0.33951290f + frac*0.16541097f)); - return 1+integer+frac; -} - -/** Base-2 exponential approximation (2^x). */ -static OPUS_INLINE float celt_exp2(float x) -{ - int integer; - float frac; - union { - float f; - opus_uint32 i; - } res; - integer = floor(x); - if (integer < -50) - return 0; - frac = x-integer; - /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */ - res.f = 0.99992522f + frac * (0.69583354f - + frac * (0.22606716f + 0.078024523f*frac)); - res.i = (res.i + (integer<<23)) & 0x7fffffff; - return res.f; -} - -#else -#define celt_log2(x) ((float)(1.442695040888963387*log(x))) -#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x))) -#endif - -#endif - -#ifdef FIXED_POINT - -#include "os_support.h" - -#ifndef OVERRIDE_CELT_ILOG2 -/** Integer log in base2. Undefined for zero and negative numbers */ -static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) -{ - celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers"); - return EC_ILOG(x)-1; -} -#endif - - -/** Integer log in base2. Defined for zero, but not for negative numbers */ -static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x) -{ - return x <= 0 ? 0 : celt_ilog2(x); -} - -opus_val16 celt_rsqrt_norm(opus_val32 x); - -opus_val32 celt_sqrt(opus_val32 x); - -opus_val16 celt_cos_norm(opus_val32 x); - -/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */ -static OPUS_INLINE opus_val16 celt_log2(opus_val32 x) -{ - int i; - opus_val16 n, frac; - /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605, - 0.15530808010959576, -0.08556153059057618 */ - static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401}; - if (x==0) - return -32767; - i = celt_ilog2(x); - n = VSHR32(x,i-15)-32768-16384; - frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4])))))))); - return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT); -} - -/* - K0 = 1 - K1 = log(2) - K2 = 3-4*log(2) - K3 = 3*log(2) - 2 -*/ -#define D0 16383 -#define D1 22804 -#define D2 14819 -#define D3 10204 - -static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x) -{ - opus_val16 frac; - frac = SHL16(x, 4); - return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac)))))); -} -/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */ -static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x) -{ - int integer; - opus_val16 frac; - integer = SHR16(x,10); - if (integer>14) - return 0x7f000000; - else if (integer < -15) - return 0; - frac = celt_exp2_frac(x-SHL16(integer,10)); - return VSHR32(EXTEND32(frac), -integer-2); -} - -opus_val32 celt_rcp(opus_val32 x); - -#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b)) - -opus_val32 frac_div32(opus_val32 a, opus_val32 b); - -#define M1 32767 -#define M2 -21 -#define M3 -11943 -#define M4 4936 - -/* Atan approximation using a 4th order polynomial. Input is in Q15 format - and normalized by pi/4. Output is in Q15 format */ -static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x) -{ - return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); -} - -#undef M1 -#undef M2 -#undef M3 -#undef M4 - -/* atan2() approximation valid for positive input values */ -static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) -{ - if (y < x) - { - opus_val32 arg; - arg = celt_div(SHL32(EXTEND32(y),15),x); - if (arg >= 32767) - arg = 32767; - return SHR16(celt_atan01(EXTRACT16(arg)),1); - } else { - opus_val32 arg; - arg = celt_div(SHL32(EXTEND32(x),15),y); - if (arg >= 32767) - arg = 32767; - return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1); - } -} - -#endif /* FIXED_POINT */ -#endif /* MATHOPS_H */ diff --git a/thirdparty/opus/celt/mdct.c b/thirdparty/opus/celt/mdct.c deleted file mode 100644 index 5315ad11a3..0000000000 --- a/thirdparty/opus/celt/mdct.c +++ /dev/null @@ -1,343 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* This is a simple MDCT implementation that uses a N/4 complex FFT - to do most of the work. It should be relatively straightforward to - plug in pretty much and FFT here. - - This replaces the Vorbis FFT (and uses the exact same API), which - was a bit too messy and that was ending up duplicating code - (might as well use the same FFT everywhere). - - The algorithm is similar to (and inspired from) Fabrice Bellard's - MDCT implementation in FFMPEG, but has differences in signs, ordering - and scaling in many places. -*/ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include "mdct.h" -#include "kiss_fft.h" -#include "_kiss_fft_guts.h" -#include <math.h> -#include "os_support.h" -#include "mathops.h" -#include "stack_alloc.h" - -#if defined(MIPSr1_ASM) -#include "mips/mdct_mipsr1.h" -#endif - - -#ifdef CUSTOM_MODES - -int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) -{ - int i; - kiss_twiddle_scalar *trig; - int shift; - int N2=N>>1; - l->n = N; - l->maxshift = maxshift; - for (i=0;i<=maxshift;i++) - { - if (i==0) - l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); - else - l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); -#ifndef ENABLE_TI_DSPLIB55 - if (l->kfft[i]==NULL) - return 0; -#endif - } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); - if (l->trig==NULL) - return 0; - for (shift=0;shift<=maxshift;shift++) - { - /* We have enough points that sine isn't necessary */ -#if defined(FIXED_POINT) -#if 1 - for (i=0;i<N2;i++) - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N)); -#else - for (i=0;i<N2;i++) - trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N)))); -#endif -#else - for (i=0;i<N2;i++) - trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N); -#endif - trig += N2; - N2 >>= 1; - N >>= 1; - } - return 1; -} - -void clt_mdct_clear(mdct_lookup *l, int arch) -{ - int i; - for (i=0;i<=l->maxshift;i++) - opus_fft_free(l->kfft[i], arch); - opus_free((kiss_twiddle_scalar*)l->trig); -} - -#endif /* CUSTOM_MODES */ - -/* Forward MDCT trashes the input array */ -#ifndef OVERRIDE_clt_mdct_forward -void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_cpx, f2); - const kiss_fft_state *st = l->kfft[shift]; - const kiss_twiddle_scalar *trig; - opus_val16 scale; -#ifdef FIXED_POINT - /* Allows us to scale with MULT16_32_Q16(), which is faster than - MULT16_32_Q15() on ARM. */ - int scale_shift = st->scale_shift-1; -#endif - SAVE_STACK; - (void)arch; - scale = st->scale; - - N = l->n; - trig = l->trig; - for (i=0;i<shift;i++) - { - N >>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N4, kiss_fft_cpx); - - /* Consider the input to be composed of four blocks: [a, b, c, d] */ - /* Window, shuffle, fold */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); - const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<((overlap+3)>>2);i++) - { - /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); - *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - wp1 = window; - wp2 = window+overlap-1; - for(;i<N4-((overlap+3)>>2);i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = *xp2; - *yp++ = *xp1; - xp1+=2; - xp2-=2; - } - for(;i<N4;i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); - *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - } - /* Pre-rotation */ - { - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &trig[0]; - for(i=0;i<N4;i++) - { - kiss_fft_cpx yc; - kiss_twiddle_scalar t0, t1; - kiss_fft_scalar re, im, yr, yi; - t0 = t[i]; - t1 = t[N4+i]; - re = *yp++; - im = *yp++; - yr = S_MUL(re,t0) - S_MUL(im,t1); - yi = S_MUL(im,t0) + S_MUL(re,t1); - yc.r = yr; - yc.i = yi; - yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); - yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); - f2[st->bitrev[i]] = yc; - } - } - - /* N/4 complex FFT, does not downscale anymore */ - opus_fft_impl(st, f2); - - /* Post-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_cpx * OPUS_RESTRICT fp = f2; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &trig[0]; - /* Temp pointers to make it really clear to the compiler what we're doing */ - for(i=0;i<N4;i++) - { - kiss_fft_scalar yr, yi; - yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); - yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); - *yp1 = yr; - *yp2 = yi; - fp++; - yp1 += 2*stride; - yp2 -= 2*stride; - } - } - RESTORE_STACK; -} -#endif /* OVERRIDE_clt_mdct_forward */ - -#ifndef OVERRIDE_clt_mdct_backward -void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - const kiss_twiddle_scalar *trig; - (void) arch; - - N = l->n; - trig = l->trig; - for (i=0;i<shift;i++) - { - N >>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - /* Pre-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); - const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; - const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; - for(i=0;i<N4;i++) - { - int rev; - kiss_fft_scalar yr, yi; - rev = *bitrev++; - yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); - yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); - /* We swap real and imag because we use an FFT instead of an IFFT. */ - yp[2*rev+1] = yr; - yp[2*rev] = yi; - /* Storing the pre-rotation directly in the bitrev order. */ - xp1+=2*stride; - xp2-=2*stride; - } - } - - opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); - - /* Post-rotate and de-shuffle from both ends of the buffer at once to make - it in-place. */ - { - kiss_fft_scalar * yp0 = out+(overlap>>1); - kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &trig[0]; - /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the - middle pair will be computed twice. */ - for(i=0;i<(N4+1)>>1;i++) - { - kiss_fft_scalar re, im, yr, yi; - kiss_twiddle_scalar t0, t1; - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp0[1]; - im = yp0[0]; - t0 = t[i]; - t1 = t[N4+i]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp1[1]; - im = yp1[0]; - yp0[0] = yr; - yp1[1] = yi; - - t0 = t[(N4-i-1)]; - t1 = t[(N2-i-1)]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); - yp1[0] = yr; - yp0[1] = yi; - yp0 += 2; - yp1 -= 2; - } - } - - /* Mirror on both sides for TDAC */ - { - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - - for(i = 0; i < overlap/2; i++) - { - kiss_fft_scalar x1, x2; - x1 = *xp1; - x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } -} -#endif /* OVERRIDE_clt_mdct_backward */ diff --git a/thirdparty/opus/celt/mdct.h b/thirdparty/opus/celt/mdct.h deleted file mode 100644 index 160ae4e0f3..0000000000 --- a/thirdparty/opus/celt/mdct.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* This is a simple MDCT implementation that uses a N/4 complex FFT - to do most of the work. It should be relatively straightforward to - plug in pretty much and FFT here. - - This replaces the Vorbis FFT (and uses the exact same API), which - was a bit too messy and that was ending up duplicating code - (might as well use the same FFT everywhere). - - The algorithm is similar to (and inspired from) Fabrice Bellard's - MDCT implementation in FFMPEG, but has differences in signs, ordering - and scaling in many places. -*/ - -#ifndef MDCT_H -#define MDCT_H - -#include "opus_defines.h" -#include "kiss_fft.h" -#include "arch.h" - -typedef struct { - int n; - int maxshift; - const kiss_fft_state *kfft[4]; - const kiss_twiddle_scalar * OPUS_RESTRICT trig; -} mdct_lookup; - -#if defined(HAVE_ARM_NE10) -#include "arm/mdct_arm.h" -#endif - - -int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch); -void clt_mdct_clear(mdct_lookup *l, int arch); - -/** Compute a forward MDCT and scale by 4/N, trashes the input array */ -void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, - int shift, int stride, int arch); - -/** Compute a backward MDCT (no scaling) and performs weighted overlap-add - (scales implicitly by 1/2) */ -void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, - int overlap, int shift, int stride, int arch); - -#if !defined(OVERRIDE_OPUS_MDCT) -/* Is run-time CPU detection enabled on this platform? */ -#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) - -extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])( - const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, - int overlap, int shift, int stride, int arch); - -#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \ - _window, _overlap, _shift, \ - _stride, _arch)) - -extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])( - const mdct_lookup *l, kiss_fft_scalar *in, - kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, - int overlap, int shift, int stride, int arch); - -#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - (*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \ - _window, _overlap, _shift, \ - _stride, _arch) - -#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */ - -#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) - -#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \ - clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) - -#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */ -#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */ - -#endif diff --git a/thirdparty/opus/celt/mfrngcod.h b/thirdparty/opus/celt/mfrngcod.h deleted file mode 100644 index 809152a59a..0000000000 --- a/thirdparty/opus/celt/mfrngcod.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2001-2008 Timothy B. Terriberry - Copyright (c) 2008-2009 Xiph.Org Foundation */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(_mfrngcode_H) -# define _mfrngcode_H (1) -# include "entcode.h" - -/*Constants used by the entropy encoder/decoder.*/ - -/*The number of bits to output at a time.*/ -# define EC_SYM_BITS (8) -/*The total number of bits in each of the state registers.*/ -# define EC_CODE_BITS (32) -/*The maximum symbol value.*/ -# define EC_SYM_MAX ((1U<<EC_SYM_BITS)-1) -/*Bits to shift by to move a symbol into the high-order position.*/ -# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1) -/*Carry bit of the high-order range symbol.*/ -# define EC_CODE_TOP (((opus_uint32)1U)<<(EC_CODE_BITS-1)) -/*Low-order bit of the high-order range symbol.*/ -# define EC_CODE_BOT (EC_CODE_TOP>>EC_SYM_BITS) -/*The number of bits available for the last, partial symbol in the code field.*/ -# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1) -#endif diff --git a/thirdparty/opus/celt/mips/celt_mipsr1.h b/thirdparty/opus/celt/mips/celt_mipsr1.h deleted file mode 100644 index e85661a661..0000000000 --- a/thirdparty/opus/celt/mips/celt_mipsr1.h +++ /dev/null @@ -1,151 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2010 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef __CELT_MIPSR1_H__ -#define __CELT_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define CELT_C - -#include "os_support.h" -#include "mdct.h" -#include <math.h> -#include "celt.h" -#include "pitch.h" -#include "bands.h" -#include "modes.h" -#include "entcode.h" -#include "quant_bands.h" -#include "rate.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "float_cast.h" -#include <stdarg.h> -#include "celt_lpc.h" -#include "vq.h" - -#define OVERRIDE_comb_filter -void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, - opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, - const opus_val16 *window, int overlap, int arch) -{ - int i; - opus_val32 x0, x1, x2, x3, x4; - - (void)arch; - - /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ - opus_val16 g00, g01, g02, g10, g11, g12; - static const opus_val16 gains[3][3] = { - {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, - {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, - {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; - - if (g0==0 && g1==0) - { - /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ - if (x!=y) - OPUS_MOVE(y, x, N); - return; - } - - g00 = MULT16_16_P15(g0, gains[tapset0][0]); - g01 = MULT16_16_P15(g0, gains[tapset0][1]); - g02 = MULT16_16_P15(g0, gains[tapset0][2]); - g10 = MULT16_16_P15(g1, gains[tapset1][0]); - g11 = MULT16_16_P15(g1, gains[tapset1][1]); - g12 = MULT16_16_P15(g1, gains[tapset1][2]); - x1 = x[-T1+1]; - x2 = x[-T1 ]; - x3 = x[-T1-1]; - x4 = x[-T1-2]; - /* If the filter didn't change, we don't need the overlap */ - if (g0==g1 && T0==T1 && tapset0==tapset1) - overlap=0; - - for (i=0;i<overlap;i++) - { - opus_val16 f; - opus_val32 res; - f = MULT16_16_Q15(window[i],window[i]); - x0= x[i-T1+2]; - - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0])); - - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1]))); - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2]))); - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2)); - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1))); - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0))); - - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15)); - - y[i] = x[i] + res; - - x4=x3; - x3=x2; - x2=x1; - x1=x0; - } - - x4 = x[i-T1-2]; - x3 = x[i-T1-1]; - x2 = x[i-T1]; - x1 = x[i-T1+1]; - - if (g1==0) - { - /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ - if (x!=y) - OPUS_MOVE(y+overlap, x+overlap, N-overlap); - return; - } - - for (i=overlap;i<N;i++) - { - opus_val32 res; - x0=x[i-T1+2]; - - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2)); - - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1))); - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0))); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15)); - y[i] = x[i] + res; - x4=x3; - x3=x2; - x2=x1; - x1=x0; - } -} - -#endif /* __CELT_MIPSR1_H__ */ diff --git a/thirdparty/opus/celt/mips/fixed_generic_mipsr1.h b/thirdparty/opus/celt/mips/fixed_generic_mipsr1.h deleted file mode 100644 index 4a05efbf85..0000000000 --- a/thirdparty/opus/celt/mips/fixed_generic_mipsr1.h +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (C) 2007-2009 Xiph.Org Foundation - Copyright (C) 2003-2008 Jean-Marc Valin - Copyright (C) 2007-2008 CSIRO */ -/** - @file fixed_generic.h - @brief Generic fixed-point operations -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CELT_FIXED_GENERIC_MIPSR1_H -#define CELT_FIXED_GENERIC_MIPSR1_H - -#undef MULT16_32_Q15_ADD -static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) { - int m; - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); - asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); - return m; -} - -#undef MULT16_32_Q15_SUB -static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) { - int m; - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); - asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); - return m; -} - -#undef MULT16_16_Q15_ADD -static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) { - int m; - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); - asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); - return m; -} - -#undef MULT16_16_Q15_SUB -static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) { - int m; - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); - asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); - return m; -} - - -#undef MULT16_32_Q16 -static inline int MULT16_32_Q16(int a, int b) -{ - int c; - asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16)); - return c; -} - -#undef MULT16_32_P16 -static inline int MULT16_32_P16(int a, int b) -{ - int c; - asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b)); - asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16)); - return c; -} - -#undef MULT16_32_Q15 -static inline int MULT16_32_Q15(int a, int b) -{ - int c; - asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15)); - return c; -} - -#undef MULT32_32_Q31 -static inline int MULT32_32_Q31(int a, int b) -{ - int r; - asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31)); - return r; -} - -#undef PSHR32 -static inline int PSHR32(int a, int shift) -{ - int r; - asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift)); - return r; -} - -#undef MULT16_16_P15 -static inline int MULT16_16_P15(int a, int b) -{ - int r; - asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b)); - asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r): "0" (r), "i"(15)); - return r; -} - -#endif /* CELT_FIXED_GENERIC_MIPSR1_H */ diff --git a/thirdparty/opus/celt/mips/kiss_fft_mipsr1.h b/thirdparty/opus/celt/mips/kiss_fft_mipsr1.h deleted file mode 100644 index 400ca4de9c..0000000000 --- a/thirdparty/opus/celt/mips/kiss_fft_mipsr1.h +++ /dev/null @@ -1,167 +0,0 @@ -/*Copyright (c) 2013, Xiph.Org Foundation and contributors. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE.*/ - -#ifndef KISS_FFT_MIPSR1_H -#define KISS_FFT_MIPSR1_H - -#if !defined(KISS_FFT_GUTS_H) -#error "This file should only be included from _kiss_fft_guts.h" -#endif - -#ifdef FIXED_POINT - -#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d)) -#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d)) - -#undef S_MUL_ADD -static inline int S_MUL_ADD(int a, int b, int c, int d) { - int m; - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); - asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); - return m; -} - -#undef S_MUL_SUB -static inline int S_MUL_SUB(int a, int b, int c, int d) { - int m; - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b)); - asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15)); - return m; -} - -#undef C_MUL -# define C_MUL(m,a,b) (m=C_MUL_fun(a,b)) -static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { - kiss_fft_cpx m; - - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r)); - asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15)); - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i)); - asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15)); - - return m; -} -#undef C_MULC -# define C_MULC(m,a,b) (m=C_MULC_fun(a,b)) -static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) { - kiss_fft_cpx m; - - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r)); - asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15)); - asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r)); - asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i)); - asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15)); - - return m; -} - -#endif /* FIXED_POINT */ - -#define OVERRIDE_kf_bfly5 -static void kf_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int i, u; - kiss_fft_cpx scratch[13]; - - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - -#ifdef FIXED_POINT - ya.r = 10126; - ya.i = -31164; - yb.r = -26510; - yb.i = -19261; -#else - ya = st->twiddles[fstride*m]; - yb = st->twiddles[fstride*2*m]; -#endif - - tw=st->twiddles; - - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - /* For non-custom modes, m is guaranteed to be a multiple of 4. */ - for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - - C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); - C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r); - - scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i); - scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r); - - scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i); - scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} - - -#endif /* KISS_FFT_MIPSR1_H */ diff --git a/thirdparty/opus/celt/mips/mdct_mipsr1.h b/thirdparty/opus/celt/mips/mdct_mipsr1.h deleted file mode 100644 index 2934dab776..0000000000 --- a/thirdparty/opus/celt/mips/mdct_mipsr1.h +++ /dev/null @@ -1,288 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* This is a simple MDCT implementation that uses a N/4 complex FFT - to do most of the work. It should be relatively straightforward to - plug in pretty much and FFT here. - - This replaces the Vorbis FFT (and uses the exact same API), which - was a bit too messy and that was ending up duplicating code - (might as well use the same FFT everywhere). - - The algorithm is similar to (and inspired from) Fabrice Bellard's - MDCT implementation in FFMPEG, but has differences in signs, ordering - and scaling in many places. -*/ -#ifndef __MDCT_MIPSR1_H__ -#define __MDCT_MIPSR1_H__ - -#ifndef SKIP_CONFIG_H -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#endif - -#include "mdct.h" -#include "kiss_fft.h" -#include "_kiss_fft_guts.h" -#include <math.h> -#include "os_support.h" -#include "mathops.h" -#include "stack_alloc.h" - -/* Forward MDCT trashes the input array */ -#define OVERRIDE_clt_mdct_forward -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 *window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_cpx, f2); - const kiss_fft_state *st = l->kfft[shift]; - const kiss_twiddle_scalar *trig; - opus_val16 scale; -#ifdef FIXED_POINT - /* Allows us to scale with MULT16_32_Q16(), which is faster than - MULT16_32_Q15() on ARM. */ - int scale_shift = st->scale_shift-1; -#endif - - (void)arch; - - SAVE_STACK; - scale = st->scale; - - N = l->n; - trig = l->trig; - for (i=0;i<shift;i++) - { - N >>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N4, kiss_fft_cpx); - - /* Consider the input to be composed of four blocks: [a, b, c, d] */ - /* Window, shuffle, fold */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); - const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<((overlap+3)>>2);i++) - { - /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ - *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2); - *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - wp1 = window; - wp2 = window+overlap-1; - for(;i<N4-((overlap+3)>>2);i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = *xp2; - *yp++ = *xp1; - xp1+=2; - xp2-=2; - } - for(;i<N4;i++) - { - /* Real part arranged as a-bR, Imag part arranged as -c-dR */ - *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]); - *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]); - xp1+=2; - xp2-=2; - wp1+=2; - wp2-=2; - } - } - /* Pre-rotation */ - { - kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &trig[0]; - for(i=0;i<N4;i++) - { - kiss_fft_cpx yc; - kiss_twiddle_scalar t0, t1; - kiss_fft_scalar re, im, yr, yi; - t0 = t[i]; - t1 = t[N4+i]; - re = *yp++; - im = *yp++; - - yr = S_MUL_SUB(re,t0,im,t1); - yi = S_MUL_ADD(im,t0,re,t1); - - yc.r = yr; - yc.i = yi; - yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); - yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); - f2[st->bitrev[i]] = yc; - } - } - - /* N/4 complex FFT, does not downscale anymore */ - opus_fft_impl(st, f2); - - /* Post-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_cpx * OPUS_RESTRICT fp = f2; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &trig[0]; - /* Temp pointers to make it really clear to the compiler what we're doing */ - for(i=0;i<N4;i++) - { - kiss_fft_scalar yr, yi; - yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]); - yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]); - *yp1 = yr; - *yp2 = yi; - fp++; - yp1 += 2*stride; - yp2 -= 2*stride; - } - } - RESTORE_STACK; -} - -#define OVERRIDE_clt_mdct_backward -void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, - const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) -{ - int i; - int N, N2, N4; - const kiss_twiddle_scalar *trig; - - (void)arch; - - N = l->n; - trig = l->trig; - for (i=0;i<shift;i++) - { - N >>= 1; - trig += N; - } - N2 = N>>1; - N4 = N>>2; - - /* Pre-rotate */ - { - /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; - const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); - const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; - const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; - for(i=0;i<N4;i++) - { - int rev; - kiss_fft_scalar yr, yi; - rev = *bitrev++; - yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]); - yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]); - /* We swap real and imag because we use an FFT instead of an IFFT. */ - yp[2*rev+1] = yr; - yp[2*rev] = yi; - /* Storing the pre-rotation directly in the bitrev order. */ - xp1+=2*stride; - xp2-=2*stride; - } - } - - opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); - - /* Post-rotate and de-shuffle from both ends of the buffer at once to make - it in-place. */ - { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &trig[0]; - /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the - middle pair will be computed twice. */ - for(i=0;i<(N4+1)>>1;i++) - { - kiss_fft_scalar re, im, yr, yi; - kiss_twiddle_scalar t0, t1; - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp0[1]; - im = yp0[0]; - t0 = t[i]; - t1 = t[N4+i]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL_ADD(re,t0 , im,t1); - yi = S_MUL_SUB(re,t1 , im,t0); - /* We swap real and imag because we're using an FFT instead of an IFFT. */ - re = yp1[1]; - im = yp1[0]; - yp0[0] = yr; - yp1[1] = yi; - - t0 = t[(N4-i-1)]; - t1 = t[(N2-i-1)]; - /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL_ADD(re,t0,im,t1); - yi = S_MUL_SUB(re,t1,im,t0); - yp1[0] = yr; - yp0[1] = yi; - yp0 += 2; - yp1 -= 2; - } - } - - /* Mirror on both sides for TDAC */ - { - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - - for(i = 0; i < overlap/2; i++) - { - kiss_fft_scalar x1, x2; - x1 = *xp1; - x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } -} -#endif /* __MDCT_MIPSR1_H__ */ diff --git a/thirdparty/opus/celt/mips/pitch_mipsr1.h b/thirdparty/opus/celt/mips/pitch_mipsr1.h deleted file mode 100644 index a9500aff58..0000000000 --- a/thirdparty/opus/celt/mips/pitch_mipsr1.h +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file pitch.h - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PITCH_MIPSR1_H -#define PITCH_MIPSR1_H - -#define OVERRIDE_DUAL_INNER_PROD -static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2, int arch) -{ - int j; - opus_val32 xy01=0; - opus_val32 xy02=0; - - (void)arch; - - asm volatile("MULT $ac1, $0, $0"); - asm volatile("MULT $ac2, $0, $0"); - /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ - for (j=0;j<N;j++) - { - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j])); - asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j])); - ++j; - asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j])); - asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j])); - } - asm volatile ("mflo %0, $ac1": "=r"(xy01)); - asm volatile ("mflo %0, $ac2": "=r"(xy02)); - *xy1 = xy01; - *xy2 = xy02; -} - -static inline void xcorr_kernel_mips(const opus_val16 * x, - const opus_val16 * y, opus_val32 sum[4], int len) -{ - int j; - opus_val16 y_0, y_1, y_2, y_3; - - opus_int64 sum_0, sum_1, sum_2, sum_3; - sum_0 = (opus_int64)sum[0]; - sum_1 = (opus_int64)sum[1]; - sum_2 = (opus_int64)sum[2]; - sum_3 = (opus_int64)sum[3]; - - y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ - y_0=*y++; - y_1=*y++; - y_2=*y++; - for (j=0;j<len-3;j+=4) - { - opus_val16 tmp; - tmp = *x++; - y_3=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_0); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_1); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_2); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_3); - - tmp=*x++; - y_0=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 ); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 ); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_3); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_0); - - tmp=*x++; - y_1=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 ); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 ); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_0); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_1); - - - tmp=*x++; - y_2=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 ); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 ); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_1); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_2); - - } - if (j++<len) - { - opus_val16 tmp = *x++; - y_3=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 ); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 ); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_2); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_3); - } - - if (j++<len) - { - opus_val16 tmp=*x++; - y_0=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 ); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 ); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_3); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_0); - } - - if (j<len) - { - opus_val16 tmp=*x++; - y_1=*y++; - - sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 ); - sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 ); - sum_2 = __builtin_mips_madd( sum_2, tmp, y_0); - sum_3 = __builtin_mips_madd( sum_3, tmp, y_1); - - } - - sum[0] = (opus_val32)sum_0; - sum[1] = (opus_val32)sum_1; - sum[2] = (opus_val32)sum_2; - sum[3] = (opus_val32)sum_3; -} - -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((void)(arch), xcorr_kernel_mips(x, y, sum, len)) - -#endif /* PITCH_MIPSR1_H */ diff --git a/thirdparty/opus/celt/mips/vq_mipsr1.h b/thirdparty/opus/celt/mips/vq_mipsr1.h deleted file mode 100644 index 54cef86133..0000000000 --- a/thirdparty/opus/celt/mips/vq_mipsr1.h +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef __VQ_MIPSR1_H__ -#define __VQ_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mathops.h" -#include "arch.h" - -static unsigned extract_collapse_mask(int *iy, int N, int B); -static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain); -static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread); -static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch); - -#define OVERRIDE_vq_exp_rotation1 -static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) -{ - int i; - opus_val16 ms; - celt_norm *Xptr; - Xptr = X; - ms = NEG16(s); - for (i=0;i<len-stride;i++) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } - Xptr = &X[len-2*stride-1]; - for (i=len-2*stride-1;i>=0;i--) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } -} - -#define OVERRIDE_renormalise_vector - -#define renormalise_vector(X, N, gain, arch) \ - (renormalise_vector_mips(X, N, gain, arch)) - -void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch) -{ - int i; -#ifdef FIXED_POINT - int k; -#endif - opus_val32 E = EPSILON; - opus_val16 g; - opus_val32 t; - celt_norm *xptr = X; - int X0, X1; - - (void)arch; - - asm volatile("mult $ac1, $0, $0"); - asm volatile("MTLO %0, $ac1" : :"r" (E)); - /*if(N %4) - printf("error");*/ - for (i=0;i<N-2;i+=2) - { - X0 = (int)*xptr++; - asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0)); - - X1 = (int)*xptr++; - asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1)); - } - - for (;i<N;i++) - { - X0 = (int)*xptr++; - asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0)); - } - - asm volatile("MFLO %0, $ac1" : "=r" (E)); -#ifdef FIXED_POINT - k = celt_ilog2(E)>>1; -#endif - t = VSHR32(E, 2*(k-7)); - g = MULT16_16_P15(celt_rsqrt_norm(t),gain); - - xptr = X; - for (i=0;i<N;i++) - { - *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); - xptr++; - } - /*return celt_sqrt(E);*/ -} - -#endif /* __VQ_MIPSR1_H__ */ diff --git a/thirdparty/opus/celt/modes.c b/thirdparty/opus/celt/modes.c deleted file mode 100644 index 911686e905..0000000000 --- a/thirdparty/opus/celt/modes.c +++ /dev/null @@ -1,442 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "celt.h" -#include "modes.h" -#include "rate.h" -#include "os_support.h" -#include "stack_alloc.h" -#include "quant_bands.h" -#include "cpu_support.h" - -static const opus_int16 eband5ms[] = { -/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100 -}; - -/* Alternate tuning (partially derived from Vorbis) */ -#define BITALLOC_SIZE 11 -/* Bit allocation table in units of 1/32 bit/sample (0.1875 dB SNR) */ -static const unsigned char band_allocation[] = { -/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 90, 80, 75, 69, 63, 56, 49, 40, 34, 29, 20, 18, 10, 0, 0, 0, 0, 0, 0, 0, 0, -110,100, 90, 84, 78, 71, 65, 58, 51, 45, 39, 32, 26, 20, 12, 0, 0, 0, 0, 0, 0, -118,110,103, 93, 86, 80, 75, 70, 65, 59, 53, 47, 40, 31, 23, 15, 4, 0, 0, 0, 0, -126,119,112,104, 95, 89, 83, 78, 72, 66, 60, 54, 47, 39, 32, 25, 17, 12, 1, 0, 0, -134,127,120,114,103, 97, 91, 85, 78, 72, 66, 60, 54, 47, 41, 35, 29, 23, 16, 10, 1, -144,137,130,124,113,107,101, 95, 88, 82, 76, 70, 64, 57, 51, 45, 39, 33, 26, 15, 1, -152,145,138,132,123,117,111,105, 98, 92, 86, 80, 74, 67, 61, 55, 49, 43, 36, 20, 1, -162,155,148,142,133,127,121,115,108,102, 96, 90, 84, 77, 71, 65, 59, 53, 46, 30, 1, -172,165,158,152,143,137,131,125,118,112,106,100, 94, 87, 81, 75, 69, 63, 56, 45, 20, -200,200,200,200,200,200,200,200,198,193,188,183,178,173,168,163,158,153,148,129,104, -}; - -#ifndef CUSTOM_MODES_ONLY - #ifdef FIXED_POINT - #include "static_modes_fixed.h" - #else - #include "static_modes_float.h" - #endif -#endif /* CUSTOM_MODES_ONLY */ - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -#ifdef CUSTOM_MODES - -/* Defining 25 critical bands for the full 0-20 kHz audio bandwidth - Taken from http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html */ -#define BARK_BANDS 25 -static const opus_int16 bark_freq[BARK_BANDS+1] = { - 0, 100, 200, 300, 400, - 510, 630, 770, 920, 1080, - 1270, 1480, 1720, 2000, 2320, - 2700, 3150, 3700, 4400, 5300, - 6400, 7700, 9500, 12000, 15500, - 20000}; - -static opus_int16 *compute_ebands(opus_int32 Fs, int frame_size, int res, int *nbEBands) -{ - opus_int16 *eBands; - int i, j, lin, low, high, nBark, offset=0; - - /* All modes that have 2.5 ms short blocks use the same definition */ - if (Fs == 400*(opus_int32)frame_size) - { - *nbEBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1; - eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+1)); - for (i=0;i<*nbEBands+1;i++) - eBands[i] = eband5ms[i]; - return eBands; - } - /* Find the number of critical bands supported by our sampling rate */ - for (nBark=1;nBark<BARK_BANDS;nBark++) - if (bark_freq[nBark+1]*2 >= Fs) - break; - - /* Find where the linear part ends (i.e. where the spacing is more than min_width */ - for (lin=0;lin<nBark;lin++) - if (bark_freq[lin+1]-bark_freq[lin] >= res) - break; - - low = (bark_freq[lin]+res/2)/res; - high = nBark-lin; - *nbEBands = low+high; - eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2)); - - if (eBands==NULL) - return NULL; - - /* Linear spacing (min_width) */ - for (i=0;i<low;i++) - eBands[i] = i; - if (low>0) - offset = eBands[low-1]*res - bark_freq[lin-1]; - /* Spacing follows critical bands */ - for (i=0;i<high;i++) - { - int target = bark_freq[lin+i]; - /* Round to an even value */ - eBands[i+low] = (target+offset/2+res)/(2*res)*2; - offset = eBands[i+low]*res - target; - } - /* Enforce the minimum spacing at the boundary */ - for (i=0;i<*nbEBands;i++) - if (eBands[i] < i) - eBands[i] = i; - /* Round to an even value */ - eBands[*nbEBands] = (bark_freq[nBark]+res)/(2*res)*2; - if (eBands[*nbEBands] > frame_size) - eBands[*nbEBands] = frame_size; - for (i=1;i<*nbEBands-1;i++) - { - if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1]) - { - eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2; - } - } - /* Remove any empty bands. */ - for (i=j=0;i<*nbEBands;i++) - if(eBands[i+1]>eBands[j]) - eBands[++j]=eBands[i+1]; - *nbEBands=j; - - for (i=1;i<*nbEBands;i++) - { - /* Every band must be smaller than the last band. */ - celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]); - /* Each band must be no larger than twice the size of the previous one. */ - celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1])); - } - - return eBands; -} - -static void compute_allocation_table(CELTMode *mode) -{ - int i, j; - unsigned char *allocVectors; - int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1; - - mode->nbAllocVectors = BITALLOC_SIZE; - allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands)); - if (allocVectors==NULL) - return; - - /* Check for standard mode */ - if (mode->Fs == 400*(opus_int32)mode->shortMdctSize) - { - for (i=0;i<BITALLOC_SIZE*mode->nbEBands;i++) - allocVectors[i] = band_allocation[i]; - mode->allocVectors = allocVectors; - return; - } - /* If not the standard mode, interpolate */ - /* Compute per-codec-band allocation from per-critical-band matrix */ - for (i=0;i<BITALLOC_SIZE;i++) - { - for (j=0;j<mode->nbEBands;j++) - { - int k; - for (k=0;k<maxBands;k++) - { - if (400*(opus_int32)eband5ms[k] > mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize) - break; - } - if (k>maxBands-1) - allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1]; - else { - opus_int32 a0, a1; - a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1]; - a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize; - allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1] - + a1*band_allocation[i*maxBands+k])/(a0+a1); - } - } - } - - /*printf ("\n"); - for (i=0;i<BITALLOC_SIZE;i++) - { - for (j=0;j<mode->nbEBands;j++) - printf ("%d ", allocVectors[i*mode->nbEBands+j]); - printf ("\n"); - } - exit(0);*/ - - mode->allocVectors = allocVectors; -} - -#endif /* CUSTOM_MODES */ - -CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error) -{ - int i; -#ifdef CUSTOM_MODES - CELTMode *mode=NULL; - int res; - opus_val16 *window; - opus_int16 *logN; - int LM; - int arch = opus_select_arch(); - ALLOC_STACK; -#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA) - if (global_stack==NULL) - goto failure; -#endif -#endif - -#ifndef CUSTOM_MODES_ONLY - for (i=0;i<TOTAL_MODES;i++) - { - int j; - for (j=0;j<4;j++) - { - if (Fs == static_mode_list[i]->Fs && - (frame_size<<j) == static_mode_list[i]->shortMdctSize*static_mode_list[i]->nbShortMdcts) - { - if (error) - *error = OPUS_OK; - return (CELTMode*)static_mode_list[i]; - } - } - } -#endif /* CUSTOM_MODES_ONLY */ - -#ifndef CUSTOM_MODES - if (error) - *error = OPUS_BAD_ARG; - return NULL; -#else - - /* The good thing here is that permutation of the arguments will automatically be invalid */ - - if (Fs < 8000 || Fs > 96000) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - /* Frames of less than 1ms are not supported. */ - if ((opus_int32)frame_size*1000 < Fs) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - - if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0) - { - LM = 3; - } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0) - { - LM = 2; - } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0) - { - LM = 1; - } else - { - LM = 0; - } - - /* Shorts longer than 3.3ms are not supported. */ - if ((opus_int32)(frame_size>>LM)*300 > Fs) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - - mode = opus_alloc(sizeof(CELTMode)); - if (mode==NULL) - goto failure; - mode->Fs = Fs; - - /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis - is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should - approximate that. */ - if(Fs < 12000) /* 8 kHz */ - { - mode->preemph[0] = QCONST16(0.3500061035f, 15); - mode->preemph[1] = -QCONST16(0.1799926758f, 15); - mode->preemph[2] = QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */ - mode->preemph[3] = QCONST16(3.6765136719f, 13); - } else if(Fs < 24000) /* 16 kHz */ - { - mode->preemph[0] = QCONST16(0.6000061035f, 15); - mode->preemph[1] = -QCONST16(0.1799926758f, 15); - mode->preemph[2] = QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */ - mode->preemph[3] = QCONST16(2.2598876953f, 13); - } else if(Fs < 40000) /* 32 kHz */ - { - mode->preemph[0] = QCONST16(0.7799987793f, 15); - mode->preemph[1] = -QCONST16(0.1000061035f, 15); - mode->preemph[2] = QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */ - mode->preemph[3] = QCONST16(1.3333740234f, 13); - } else /* 48 kHz */ - { - mode->preemph[0] = QCONST16(0.8500061035f, 15); - mode->preemph[1] = QCONST16(0.0f, 15); - mode->preemph[2] = QCONST16(1.f, SIG_SHIFT); - mode->preemph[3] = QCONST16(1.f, 13); - } - - mode->maxLM = LM; - mode->nbShortMdcts = 1<<LM; - mode->shortMdctSize = frame_size/mode->nbShortMdcts; - res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize); - - mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands); - if (mode->eBands==NULL) - goto failure; -#if !defined(SMALL_FOOTPRINT) - /* Make sure we don't allocate a band larger than our PVQ table. - 208 should be enough, but let's be paranoid. */ - if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM > - 208) { - goto failure; - } -#endif - - mode->effEBands = mode->nbEBands; - while (mode->eBands[mode->effEBands] > mode->shortMdctSize) - mode->effEBands--; - - /* Overlap must be divisible by 4 */ - mode->overlap = ((mode->shortMdctSize>>2)<<2); - - compute_allocation_table(mode); - if (mode->allocVectors==NULL) - goto failure; - - window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16)); - if (window==NULL) - goto failure; - -#ifndef FIXED_POINT - for (i=0;i<mode->overlap;i++) - window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)); -#else - for (i=0;i<mode->overlap;i++) - window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)))); -#endif - mode->window = window; - - logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16)); - if (logN==NULL) - goto failure; - - for (i=0;i<mode->nbEBands;i++) - logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES); - mode->logN = logN; - - compute_pulse_cache(mode, mode->maxLM); - - if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, - mode->maxLM, arch) == 0) - goto failure; - - if (error) - *error = OPUS_OK; - - return mode; -failure: - if (error) - *error = OPUS_ALLOC_FAIL; - if (mode!=NULL) - opus_custom_mode_destroy(mode); - return NULL; -#endif /* !CUSTOM_MODES */ -} - -#ifdef CUSTOM_MODES -void opus_custom_mode_destroy(CELTMode *mode) -{ - int arch = opus_select_arch(); - - if (mode == NULL) - return; -#ifndef CUSTOM_MODES_ONLY - { - int i; - for (i=0;i<TOTAL_MODES;i++) - { - if (mode == static_mode_list[i]) - { - return; - } - } - } -#endif /* CUSTOM_MODES_ONLY */ - opus_free((opus_int16*)mode->eBands); - opus_free((opus_int16*)mode->allocVectors); - - opus_free((opus_val16*)mode->window); - opus_free((opus_int16*)mode->logN); - - opus_free((opus_int16*)mode->cache.index); - opus_free((unsigned char*)mode->cache.bits); - opus_free((unsigned char*)mode->cache.caps); - clt_mdct_clear(&mode->mdct, arch); - - opus_free((CELTMode *)mode); -} -#endif diff --git a/thirdparty/opus/celt/modes.h b/thirdparty/opus/celt/modes.h deleted file mode 100644 index be813ccc8b..0000000000 --- a/thirdparty/opus/celt/modes.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MODES_H -#define MODES_H - -#include "opus_types.h" -#include "celt.h" -#include "arch.h" -#include "mdct.h" -#include "entenc.h" -#include "entdec.h" - -#define MAX_PERIOD 1024 - -typedef struct { - int size; - const opus_int16 *index; - const unsigned char *bits; - const unsigned char *caps; -} PulseCache; - -/** Mode definition (opaque) - @brief Mode definition - */ -struct OpusCustomMode { - opus_int32 Fs; - int overlap; - - int nbEBands; - int effEBands; - opus_val16 preemph[4]; - const opus_int16 *eBands; /**< Definition for each "pseudo-critical band" */ - - int maxLM; - int nbShortMdcts; - int shortMdctSize; - - int nbAllocVectors; /**< Number of lines in the matrix below */ - const unsigned char *allocVectors; /**< Number of bits in each band for several rates */ - const opus_int16 *logN; - - const opus_val16 *window; - mdct_lookup mdct; - PulseCache cache; -}; - - -#endif diff --git a/thirdparty/opus/celt/opus_custom_demo.c b/thirdparty/opus/celt/opus_custom_demo.c deleted file mode 100644 index ae41c0de5a..0000000000 --- a/thirdparty/opus/celt/opus_custom_demo.c +++ /dev/null @@ -1,210 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_custom.h" -#include "arch.h" -#include <stdio.h> -#include <stdlib.h> -#include <math.h> -#include <string.h> - -#define MAX_PACKET 1275 - -int main(int argc, char *argv[]) -{ - int err; - char *inFile, *outFile; - FILE *fin, *fout; - OpusCustomMode *mode=NULL; - OpusCustomEncoder *enc; - OpusCustomDecoder *dec; - int len; - opus_int32 frame_size, channels, rate; - int bytes_per_packet; - unsigned char data[MAX_PACKET]; - int complexity; -#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) - int i; - double rmsd = 0; -#endif - int count = 0; - opus_int32 skip; - opus_int16 *in, *out; - if (argc != 9 && argc != 8 && argc != 7) - { - fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> " - " <bytes per packet> [<complexity> [packet loss rate]] " - "<input> <output>\n"); - return 1; - } - - rate = (opus_int32)atol(argv[1]); - channels = atoi(argv[2]); - frame_size = atoi(argv[3]); - mode = opus_custom_mode_create(rate, frame_size, NULL); - if (mode == NULL) - { - fprintf(stderr, "failed to create a mode\n"); - return 1; - } - - bytes_per_packet = atoi(argv[4]); - if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET) - { - fprintf (stderr, "bytes per packet must be between 0 and %d\n", - MAX_PACKET); - return 1; - } - - inFile = argv[argc-2]; - fin = fopen(inFile, "rb"); - if (!fin) - { - fprintf (stderr, "Could not open input file %s\n", argv[argc-2]); - return 1; - } - outFile = argv[argc-1]; - fout = fopen(outFile, "wb+"); - if (!fout) - { - fprintf (stderr, "Could not open output file %s\n", argv[argc-1]); - fclose(fin); - return 1; - } - - enc = opus_custom_encoder_create(mode, channels, &err); - if (err != 0) - { - fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err)); - fclose(fin); - fclose(fout); - return 1; - } - dec = opus_custom_decoder_create(mode, channels, &err); - if (err != 0) - { - fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err)); - fclose(fin); - fclose(fout); - return 1; - } - opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip)); - - if (argc>7) - { - complexity=atoi(argv[5]); - opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity)); - } - - in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16)); - out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16)); - - while (!feof(fin)) - { - int ret; - err = fread(in, sizeof(short), frame_size*channels, fin); - if (feof(fin)) - break; - len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet); - if (len <= 0) - fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len)); - - /* This is for simulating bit errors */ -#if 0 - int errors = 0; - int eid = 0; - /* This simulates random bit error */ - for (i=0;i<len*8;i++) - { - if (rand()%atoi(argv[8])==0) - { - if (i<64) - { - errors++; - eid = i; - } - data[i/8] ^= 1<<(7-(i%8)); - } - } - if (errors == 1) - data[eid/8] ^= 1<<(7-(eid%8)); - else if (errors%2 == 1) - data[rand()%8] ^= 1<<rand()%8; -#endif - -#if 1 /* Set to zero to use the encoder's output instead */ - /* This is to simulate packet loss */ - if (argc==9 && rand()%1000<atoi(argv[argc-3])) - /*if (errors && (errors%2==0))*/ - ret = opus_custom_decode(dec, NULL, len, out, frame_size); - else - ret = opus_custom_decode(dec, data, len, out, frame_size); - if (ret < 0) - fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret)); -#else - for (i=0;i<ret*channels;i++) - out[i] = in[i]; -#endif -#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) - for (i=0;i<ret*channels;i++) - { - rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]); - /*out[i] -= in[i];*/ - } -#endif - count++; - fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout); - skip = 0; - } - PRINT_MIPS(stderr); - - opus_custom_encoder_destroy(enc); - opus_custom_decoder_destroy(dec); - fclose(fin); - fclose(fout); - opus_custom_mode_destroy(mode); - free(in); - free(out); -#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) - if (rmsd > 0) - { - rmsd = sqrt(rmsd/(1.0*frame_size*channels*count)); - fprintf (stderr, "Error: encoder doesn't match decoder\n"); - fprintf (stderr, "RMS mismatch is %f\n", rmsd); - return 1; - } else { - fprintf (stderr, "Encoder matches decoder!!\n"); - } -#endif - return 0; -} - diff --git a/thirdparty/opus/celt/os_support.h b/thirdparty/opus/celt/os_support.h deleted file mode 100644 index a2171971e9..0000000000 --- a/thirdparty/opus/celt/os_support.h +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (C) 2007 Jean-Marc Valin - - File: os_support.h - This is the (tiny) OS abstraction layer. Aside from math.h, this is the - only place where system headers are allowed. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef OS_SUPPORT_H -#define OS_SUPPORT_H - -#ifdef CUSTOM_SUPPORT -# include "custom_support.h" -#endif - -#include "opus_types.h" -#include "opus_defines.h" - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> - -/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */ -#ifndef OVERRIDE_OPUS_ALLOC -static OPUS_INLINE void *opus_alloc (size_t size) -{ - return malloc(size); -} -#endif - -/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */ -#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH -static OPUS_INLINE void *opus_alloc_scratch (size_t size) -{ - /* Scratch space doesn't need to be cleared */ - return opus_alloc(size); -} -#endif - -/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */ -#ifndef OVERRIDE_OPUS_FREE -static OPUS_INLINE void opus_free (void *ptr) -{ - free(ptr); -} -#endif - -/** Copy n elements from src to dst. The 0* term provides compile-time type checking */ -#ifndef OVERRIDE_OPUS_COPY -#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) -#endif - -/** Copy n elements from src to dst, allowing overlapping regions. The 0* term - provides compile-time type checking */ -#ifndef OVERRIDE_OPUS_MOVE -#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) -#endif - -/** Set n elements of dst to zero */ -#ifndef OVERRIDE_OPUS_CLEAR -#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) -#endif - -/*#ifdef __GNUC__ -#pragma GCC poison printf sprintf -#pragma GCC poison malloc free realloc calloc -#endif*/ - -#endif /* OS_SUPPORT_H */ - diff --git a/thirdparty/opus/celt/pitch.c b/thirdparty/opus/celt/pitch.c deleted file mode 100644 index bf46e7d562..0000000000 --- a/thirdparty/opus/celt/pitch.c +++ /dev/null @@ -1,557 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file pitch.c - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pitch.h" -#include "os_support.h" -#include "modes.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "celt_lpc.h" - -static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, - int max_pitch, int *best_pitch -#ifdef FIXED_POINT - , int yshift, opus_val32 maxcorr -#endif - ) -{ - int i, j; - opus_val32 Syy=1; - opus_val16 best_num[2]; - opus_val32 best_den[2]; -#ifdef FIXED_POINT - int xshift; - - xshift = celt_ilog2(maxcorr)-14; -#endif - - best_num[0] = -1; - best_num[1] = -1; - best_den[0] = 0; - best_den[1] = 0; - best_pitch[0] = 0; - best_pitch[1] = 1; - for (j=0;j<len;j++) - Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift)); - for (i=0;i<max_pitch;i++) - { - if (xcorr[i]>0) - { - opus_val16 num; - opus_val32 xcorr16; - xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); -#ifndef FIXED_POINT - /* Considering the range of xcorr16, this should avoid both underflows - and overflows (inf) when squaring xcorr16 */ - xcorr16 *= 1e-12f; -#endif - num = MULT16_16_Q15(xcorr16,xcorr16); - if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) - { - if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) - { - best_num[1] = best_num[0]; - best_den[1] = best_den[0]; - best_pitch[1] = best_pitch[0]; - best_num[0] = num; - best_den[0] = Syy; - best_pitch[0] = i; - } else { - best_num[1] = num; - best_den[1] = Syy; - best_pitch[1] = i; - } - } - } - Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); - Syy = MAX32(1, Syy); - } -} - -static void celt_fir5(const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - opus_val16 *mem) -{ - int i; - opus_val16 num0, num1, num2, num3, num4; - opus_val32 mem0, mem1, mem2, mem3, mem4; - num0=num[0]; - num1=num[1]; - num2=num[2]; - num3=num[3]; - num4=num[4]; - mem0=mem[0]; - mem1=mem[1]; - mem2=mem[2]; - mem3=mem[3]; - mem4=mem[4]; - for (i=0;i<N;i++) - { - opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); - sum = MAC16_16(sum,num0,mem0); - sum = MAC16_16(sum,num1,mem1); - sum = MAC16_16(sum,num2,mem2); - sum = MAC16_16(sum,num3,mem3); - sum = MAC16_16(sum,num4,mem4); - mem4 = mem3; - mem3 = mem2; - mem2 = mem1; - mem1 = mem0; - mem0 = x[i]; - y[i] = ROUND16(sum, SIG_SHIFT); - } - mem[0]=mem0; - mem[1]=mem1; - mem[2]=mem2; - mem[3]=mem3; - mem[4]=mem4; -} - - -void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, - int len, int C, int arch) -{ - int i; - opus_val32 ac[5]; - opus_val16 tmp=Q15ONE; - opus_val16 lpc[4], mem[5]={0,0,0,0,0}; - opus_val16 lpc2[5]; - opus_val16 c1 = QCONST16(.8f,15); -#ifdef FIXED_POINT - int shift; - opus_val32 maxabs = celt_maxabs32(x[0], len); - if (C==2) - { - opus_val32 maxabs_1 = celt_maxabs32(x[1], len); - maxabs = MAX32(maxabs, maxabs_1); - } - if (maxabs<1) - maxabs=1; - shift = celt_ilog2(maxabs)-10; - if (shift<0) - shift=0; - if (C==2) - shift++; -#endif - for (i=1;i<len>>1;i++) - x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift); - x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift); - if (C==2) - { - for (i=1;i<len>>1;i++) - x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); - x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); - } - - _celt_autocorr(x_lp, ac, NULL, 0, - 4, len>>1, arch); - - /* Noise floor -40 dB */ -#ifdef FIXED_POINT - ac[0] += SHR32(ac[0],13); -#else - ac[0] *= 1.0001f; -#endif - /* Lag windowing */ - for (i=1;i<=4;i++) - { - /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ -#ifdef FIXED_POINT - ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); -#else - ac[i] -= ac[i]*(.008f*i)*(.008f*i); -#endif - } - - _celt_lpc(lpc, ac, 4); - for (i=0;i<4;i++) - { - tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); - lpc[i] = MULT16_16_Q15(lpc[i], tmp); - } - /* Add a zero */ - lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT); - lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]); - lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); - lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); - lpc2[4] = MULT16_16_Q15(c1,lpc[3]); - celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); -} - -/* Pure C implementation. */ -#ifdef FIXED_POINT -opus_val32 -#else -void -#endif -#if defined(OVERRIDE_PITCH_XCORR) -celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch) -#else -celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch) -#endif -{ - -#if 0 /* This is a simple version of the pitch correlation that should work - well on DSPs like Blackfin and TI C5x/C6x */ - int i, j; -#ifdef FIXED_POINT - opus_val32 maxcorr=1; -#endif -#if !defined(OVERRIDE_PITCH_XCORR) - (void)arch; -#endif - for (i=0;i<max_pitch;i++) - { - opus_val32 sum = 0; - for (j=0;j<len;j++) - sum = MAC16_16(sum, _x[j], _y[i+j]); - xcorr[i] = sum; -#ifdef FIXED_POINT - maxcorr = MAX32(maxcorr, sum); -#endif - } -#ifdef FIXED_POINT - return maxcorr; -#endif - -#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ - int i; - /*The EDSP version requires that max_pitch is at least 1, and that _x is - 32-bit aligned. - Since it's hard to put asserts in assembly, put them here.*/ -#ifdef FIXED_POINT - opus_val32 maxcorr=1; -#endif - celt_assert(max_pitch>0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); - for (i=0;i<max_pitch-3;i+=4) - { - opus_val32 sum[4]={0,0,0,0}; -#if defined(OVERRIDE_PITCH_XCORR) - xcorr_kernel_c(_x, _y+i, sum, len); -#else - xcorr_kernel(_x, _y+i, sum, len, arch); -#endif - xcorr[i]=sum[0]; - xcorr[i+1]=sum[1]; - xcorr[i+2]=sum[2]; - xcorr[i+3]=sum[3]; -#ifdef FIXED_POINT - sum[0] = MAX32(sum[0], sum[1]); - sum[2] = MAX32(sum[2], sum[3]); - sum[0] = MAX32(sum[0], sum[2]); - maxcorr = MAX32(maxcorr, sum[0]); -#endif - } - /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ - for (;i<max_pitch;i++) - { - opus_val32 sum; -#if defined(OVERRIDE_PITCH_XCORR) - sum = celt_inner_prod_c(_x, _y+i, len); -#else - sum = celt_inner_prod(_x, _y+i, len, arch); -#endif - xcorr[i] = sum; -#ifdef FIXED_POINT - maxcorr = MAX32(maxcorr, sum); -#endif - } -#ifdef FIXED_POINT - return maxcorr; -#endif -#endif -} - -void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, - int len, int max_pitch, int *pitch, int arch) -{ - int i, j; - int lag; - int best_pitch[2]={0,0}; - VARDECL(opus_val16, x_lp4); - VARDECL(opus_val16, y_lp4); - VARDECL(opus_val32, xcorr); -#ifdef FIXED_POINT - opus_val32 maxcorr; - opus_val32 xmax, ymax; - int shift=0; -#endif - int offset; - - SAVE_STACK; - - celt_assert(len>0); - celt_assert(max_pitch>0); - lag = len+max_pitch; - - ALLOC(x_lp4, len>>2, opus_val16); - ALLOC(y_lp4, lag>>2, opus_val16); - ALLOC(xcorr, max_pitch>>1, opus_val32); - - /* Downsample by 2 again */ - for (j=0;j<len>>2;j++) - x_lp4[j] = x_lp[2*j]; - for (j=0;j<lag>>2;j++) - y_lp4[j] = y[2*j]; - -#ifdef FIXED_POINT - xmax = celt_maxabs16(x_lp4, len>>2); - ymax = celt_maxabs16(y_lp4, lag>>2); - shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11; - if (shift>0) - { - for (j=0;j<len>>2;j++) - x_lp4[j] = SHR16(x_lp4[j], shift); - for (j=0;j<lag>>2;j++) - y_lp4[j] = SHR16(y_lp4[j], shift); - /* Use double the shift for a MAC */ - shift *= 2; - } else { - shift = 0; - } -#endif - - /* Coarse search with 4x decimation */ - -#ifdef FIXED_POINT - maxcorr = -#endif - celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch); - - find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch -#ifdef FIXED_POINT - , 0, maxcorr -#endif - ); - - /* Finer search with 2x decimation */ -#ifdef FIXED_POINT - maxcorr=1; -#endif - for (i=0;i<max_pitch>>1;i++) - { - opus_val32 sum; - xcorr[i] = 0; - if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) - continue; -#ifdef FIXED_POINT - sum = 0; - for (j=0;j<len>>1;j++) - sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); -#else - sum = celt_inner_prod_c(x_lp, y+i, len>>1); -#endif - xcorr[i] = MAX32(-1, sum); -#ifdef FIXED_POINT - maxcorr = MAX32(maxcorr, sum); -#endif - } - find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch -#ifdef FIXED_POINT - , shift+1, maxcorr -#endif - ); - - /* Refine by pseudo-interpolation */ - if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1) - { - opus_val32 a, b, c; - a = xcorr[best_pitch[0]-1]; - b = xcorr[best_pitch[0]]; - c = xcorr[best_pitch[0]+1]; - if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a)) - offset = 1; - else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c)) - offset = -1; - else - offset = 0; - } else { - offset = 0; - } - *pitch = 2*best_pitch[0]-offset; - - RESTORE_STACK; -} - -#ifdef FIXED_POINT -static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) -{ - opus_val32 x2y2; - int sx, sy, shift; - opus_val32 g; - opus_val16 den; - if (xy == 0 || xx == 0 || yy == 0) - return 0; - sx = celt_ilog2(xx)-14; - sy = celt_ilog2(yy)-14; - shift = sx + sy; - x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy)); - if (shift & 1) { - if (x2y2 < 32768) - { - x2y2 <<= 1; - shift--; - } else { - x2y2 >>= 1; - shift++; - } - } - den = celt_rsqrt_norm(x2y2); - g = MULT16_32_Q15(den, xy); - g = VSHR32(g, (shift>>1)-1); - return EXTRACT16(MIN32(g, Q15ONE)); -} -#else -static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) -{ - return xy/celt_sqrt(1+xx*yy); -} -#endif - -static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; -opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) -{ - int k, i, T, T0; - opus_val16 g, g0; - opus_val16 pg; - opus_val32 xy,xx,yy,xy2; - opus_val32 xcorr[3]; - opus_val32 best_xy, best_yy; - int offset; - int minperiod0; - VARDECL(opus_val32, yy_lookup); - SAVE_STACK; - - minperiod0 = minperiod; - maxperiod /= 2; - minperiod /= 2; - *T0_ /= 2; - prev_period /= 2; - N /= 2; - x += maxperiod; - if (*T0_>=maxperiod) - *T0_=maxperiod-1; - - T = T0 = *T0_; - ALLOC(yy_lookup, maxperiod+1, opus_val32); - dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch); - yy_lookup[0] = xx; - yy=xx; - for (i=1;i<=maxperiod;i++) - { - yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]); - yy_lookup[i] = MAX32(0, yy); - } - yy = yy_lookup[T0]; - best_xy = xy; - best_yy = yy; - g = g0 = compute_pitch_gain(xy, xx, yy); - /* Look for any pitch at T/k */ - for (k=2;k<=15;k++) - { - int T1, T1b; - opus_val16 g1; - opus_val16 cont=0; - opus_val16 thresh; - T1 = celt_udiv(2*T0+k, 2*k); - if (T1 < minperiod) - break; - /* Look for another strong correlation at T1b */ - if (k==2) - { - if (T1+T0>maxperiod) - T1b = T0; - else - T1b = T0+T1; - } else - { - T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); - } - dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); - xy = HALF32(xy + xy2); - yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]); - g1 = compute_pitch_gain(xy, xx, yy); - if (abs(T1-prev_period)<=1) - cont = prev_gain; - else if (abs(T1-prev_period)<=2 && 5*k*k < T0) - cont = HALF16(prev_gain); - else - cont = 0; - thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); - /* Bias against very high pitch (very short period) to avoid false-positives - due to short-term correlation */ - if (T1<3*minperiod) - thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont); - else if (T1<2*minperiod) - thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont); - if (g1 > thresh) - { - best_xy = xy; - best_yy = yy; - T = T1; - g = g1; - } - } - best_xy = MAX32(0, best_xy); - if (best_yy <= best_xy) - pg = Q15ONE; - else - pg = SHR32(frac_div32(best_xy,best_yy+1),16); - - for (k=0;k<3;k++) - xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch); - if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) - offset = 1; - else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) - offset = -1; - else - offset = 0; - if (pg > g) - pg = g; - *T0_ = 2*T+offset; - - if (*T0_<minperiod0) - *T0_=minperiod0; - RESTORE_STACK; - return pg; -} diff --git a/thirdparty/opus/celt/pitch.h b/thirdparty/opus/celt/pitch.h deleted file mode 100644 index d3503532a0..0000000000 --- a/thirdparty/opus/celt/pitch.h +++ /dev/null @@ -1,200 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file pitch.h - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PITCH_H -#define PITCH_H - -#include "modes.h" -#include "cpu_support.h" - -#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \ - || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) -#include "x86/pitch_sse.h" -#endif - -#if defined(MIPSr1_ASM) -#include "mips/pitch_mipsr1.h" -#endif - -#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \ - || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) -# include "arm/pitch_arm.h" -#endif - -void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, - int len, int C, int arch); - -void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, - int len, int max_pitch, int *pitch, int arch); - -opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, - int N, int *T0, int prev_period, opus_val16 prev_gain, int arch); - - -/* OPT: This is the kernel you really want to optimize. It gets used a lot - by the prefilter and by the PLC. */ -static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) -{ - int j; - opus_val16 y_0, y_1, y_2, y_3; - celt_assert(len>=3); - y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ - y_0=*y++; - y_1=*y++; - y_2=*y++; - for (j=0;j<len-3;j+=4) - { - opus_val16 tmp; - tmp = *x++; - y_3=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_0); - sum[1] = MAC16_16(sum[1],tmp,y_1); - sum[2] = MAC16_16(sum[2],tmp,y_2); - sum[3] = MAC16_16(sum[3],tmp,y_3); - tmp=*x++; - y_0=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_1); - sum[1] = MAC16_16(sum[1],tmp,y_2); - sum[2] = MAC16_16(sum[2],tmp,y_3); - sum[3] = MAC16_16(sum[3],tmp,y_0); - tmp=*x++; - y_1=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_2); - sum[1] = MAC16_16(sum[1],tmp,y_3); - sum[2] = MAC16_16(sum[2],tmp,y_0); - sum[3] = MAC16_16(sum[3],tmp,y_1); - tmp=*x++; - y_2=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_3); - sum[1] = MAC16_16(sum[1],tmp,y_0); - sum[2] = MAC16_16(sum[2],tmp,y_1); - sum[3] = MAC16_16(sum[3],tmp,y_2); - } - if (j++<len) - { - opus_val16 tmp = *x++; - y_3=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_0); - sum[1] = MAC16_16(sum[1],tmp,y_1); - sum[2] = MAC16_16(sum[2],tmp,y_2); - sum[3] = MAC16_16(sum[3],tmp,y_3); - } - if (j++<len) - { - opus_val16 tmp=*x++; - y_0=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_1); - sum[1] = MAC16_16(sum[1],tmp,y_2); - sum[2] = MAC16_16(sum[2],tmp,y_3); - sum[3] = MAC16_16(sum[3],tmp,y_0); - } - if (j<len) - { - opus_val16 tmp=*x++; - y_1=*y++; - sum[0] = MAC16_16(sum[0],tmp,y_2); - sum[1] = MAC16_16(sum[1],tmp,y_3); - sum[2] = MAC16_16(sum[2],tmp,y_0); - sum[3] = MAC16_16(sum[3],tmp,y_1); - } -} - -#ifndef OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((void)(arch),xcorr_kernel_c(x, y, sum, len)) -#endif /* OVERRIDE_XCORR_KERNEL */ - - -static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - opus_val32 xy01=0; - opus_val32 xy02=0; - for (i=0;i<N;i++) - { - xy01 = MAC16_16(xy01, x[i], y01[i]); - xy02 = MAC16_16(xy02, x[i], y02[i]); - } - *xy1 = xy01; - *xy2 = xy02; -} - -#ifndef OVERRIDE_DUAL_INNER_PROD -# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ - ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2)) -#endif - -/*We make sure a C version is always available for cases where the overhead of - vectorization and passing around an arch flag aren't worth it.*/ -static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x, - const opus_val16 *y, int N) -{ - int i; - opus_val32 xy=0; - for (i=0;i<N;i++) - xy = MAC16_16(xy, x[i], y[i]); - return xy; -} - -#if !defined(OVERRIDE_CELT_INNER_PROD) -# define celt_inner_prod(x, y, N, arch) \ - ((void)(arch),celt_inner_prod_c(x, y, N)) -#endif - -#ifdef NON_STATIC_COMB_FILTER_CONST_C -void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12); -#endif - - -#ifdef FIXED_POINT -opus_val32 -#else -void -#endif -celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch); - -#if !defined(OVERRIDE_PITCH_XCORR) -#ifdef FIXED_POINT -opus_val32 -#else -void -#endif -celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch); - -#endif - -#endif diff --git a/thirdparty/opus/celt/quant_bands.c b/thirdparty/opus/celt/quant_bands.c deleted file mode 100644 index 95076e0af2..0000000000 --- a/thirdparty/opus/celt/quant_bands.c +++ /dev/null @@ -1,556 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "quant_bands.h" -#include "laplace.h" -#include <math.h> -#include "os_support.h" -#include "arch.h" -#include "mathops.h" -#include "stack_alloc.h" -#include "rate.h" - -#ifdef FIXED_POINT -/* Mean energy in each band quantized in Q4 */ -const signed char eMeans[25] = { - 103,100, 92, 85, 81, - 77, 72, 70, 78, 75, - 73, 71, 78, 74, 69, - 72, 70, 74, 76, 71, - 60, 60, 60, 60, 60 -}; -#else -/* Mean energy in each band quantized in Q4 and converted back to float */ -const opus_val16 eMeans[25] = { - 6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f, - 4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f, - 4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f, - 4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f, - 3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f -}; -#endif -/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */ -#ifdef FIXED_POINT -static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384}; -static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554}; -static const opus_val16 beta_intra = 4915; -#else -static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.}; -static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.}; -static const opus_val16 beta_intra = 4915/32768.; -#endif - -/*Parameters of the Laplace-like probability models used for the coarse energy. - There is one pair of parameters for each frame size, prediction type - (inter/intra), and band number. - The first number of each pair is the probability of 0, and the second is the - decay rate, both in Q8 precision.*/ -static const unsigned char e_prob_model[4][2][42] = { - /*120 sample frames.*/ - { - /*Inter*/ - { - 72, 127, 65, 129, 66, 128, 65, 128, 64, 128, 62, 128, 64, 128, - 64, 128, 92, 78, 92, 79, 92, 78, 90, 79, 116, 41, 115, 40, - 114, 40, 132, 26, 132, 26, 145, 17, 161, 12, 176, 10, 177, 11 - }, - /*Intra*/ - { - 24, 179, 48, 138, 54, 135, 54, 132, 53, 134, 56, 133, 55, 132, - 55, 132, 61, 114, 70, 96, 74, 88, 75, 88, 87, 74, 89, 66, - 91, 67, 100, 59, 108, 50, 120, 40, 122, 37, 97, 43, 78, 50 - } - }, - /*240 sample frames.*/ - { - /*Inter*/ - { - 83, 78, 84, 81, 88, 75, 86, 74, 87, 71, 90, 73, 93, 74, - 93, 74, 109, 40, 114, 36, 117, 34, 117, 34, 143, 17, 145, 18, - 146, 19, 162, 12, 165, 10, 178, 7, 189, 6, 190, 8, 177, 9 - }, - /*Intra*/ - { - 23, 178, 54, 115, 63, 102, 66, 98, 69, 99, 74, 89, 71, 91, - 73, 91, 78, 89, 86, 80, 92, 66, 93, 64, 102, 59, 103, 60, - 104, 60, 117, 52, 123, 44, 138, 35, 133, 31, 97, 38, 77, 45 - } - }, - /*480 sample frames.*/ - { - /*Inter*/ - { - 61, 90, 93, 60, 105, 42, 107, 41, 110, 45, 116, 38, 113, 38, - 112, 38, 124, 26, 132, 27, 136, 19, 140, 20, 155, 14, 159, 16, - 158, 18, 170, 13, 177, 10, 187, 8, 192, 6, 175, 9, 159, 10 - }, - /*Intra*/ - { - 21, 178, 59, 110, 71, 86, 75, 85, 84, 83, 91, 66, 88, 73, - 87, 72, 92, 75, 98, 72, 105, 58, 107, 54, 115, 52, 114, 55, - 112, 56, 129, 51, 132, 40, 150, 33, 140, 29, 98, 35, 77, 42 - } - }, - /*960 sample frames.*/ - { - /*Inter*/ - { - 42, 121, 96, 66, 108, 43, 111, 40, 117, 44, 123, 32, 120, 36, - 119, 33, 127, 33, 134, 34, 139, 21, 147, 23, 152, 20, 158, 25, - 154, 26, 166, 21, 173, 16, 184, 13, 184, 10, 150, 13, 139, 15 - }, - /*Intra*/ - { - 22, 178, 63, 114, 74, 82, 84, 83, 92, 82, 103, 62, 96, 72, - 96, 67, 101, 73, 107, 72, 113, 55, 118, 52, 125, 52, 118, 52, - 117, 55, 135, 49, 137, 39, 157, 32, 145, 29, 97, 33, 77, 40 - } - } -}; - -static const unsigned char small_energy_icdf[3]={2,1,0}; - -static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C) -{ - int c, i; - opus_val32 dist = 0; - c=0; do { - for (i=start;i<end;i++) - { - opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3)); - dist = MAC16_16(dist, d,d); - } - } while (++c<C); - return MIN32(200,SHR32(dist,2*DB_SHIFT-6)); -} - -static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, - const opus_val16 *eBands, opus_val16 *oldEBands, - opus_int32 budget, opus_int32 tell, - const unsigned char *prob_model, opus_val16 *error, ec_enc *enc, - int C, int LM, int intra, opus_val16 max_decay, int lfe) -{ - int i, c; - int badness = 0; - opus_val32 prev[2] = {0,0}; - opus_val16 coef; - opus_val16 beta; - - if (tell+3 <= budget) - ec_enc_bit_logp(enc, intra, 3); - if (intra) - { - coef = 0; - beta = beta_intra; - } else { - beta = beta_coef[LM]; - coef = pred_coef[LM]; - } - - /* Encode at a fixed coarse resolution */ - for (i=start;i<end;i++) - { - c=0; - do { - int bits_left; - int qi, qi0; - opus_val32 q; - opus_val16 x; - opus_val32 f, tmp; - opus_val16 oldE; - opus_val16 decay_bound; - x = eBands[i+c*m->nbEBands]; - oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); -#ifdef FIXED_POINT - f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c]; - /* Rounding to nearest integer here is really important! */ - qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7); - decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT), - SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay))); -#else - f = x-coef*oldE-prev[c]; - /* Rounding to nearest integer here is really important! */ - qi = (int)floor(.5f+f); - decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay; -#endif - /* Prevent the energy from going down too quickly (e.g. for bands - that have just one bin) */ - if (qi < 0 && x < decay_bound) - { - qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT); - if (qi > 0) - qi = 0; - } - qi0 = qi; - /* If we don't have enough bits to encode all the energy, just assume - something safe. */ - tell = ec_tell(enc); - bits_left = budget-tell-3*C*(end-i); - if (i!=start && bits_left < 30) - { - if (bits_left < 24) - qi = IMIN(1, qi); - if (bits_left < 16) - qi = IMAX(-1, qi); - } - if (lfe && i>=2) - qi = IMIN(qi, 0); - if (budget-tell >= 15) - { - int pi; - pi = 2*IMIN(i,20); - ec_laplace_encode(enc, &qi, - prob_model[pi]<<7, prob_model[pi+1]<<6); - } - else if(budget-tell >= 2) - { - qi = IMAX(-1, IMIN(qi, 1)); - ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2); - } - else if(budget-tell >= 1) - { - qi = IMIN(0, qi); - ec_enc_bit_logp(enc, -qi, 1); - } - else - qi = -1; - error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT); - badness += abs(qi0-qi); - q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); - - tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7); -#ifdef FIXED_POINT - tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); -#endif - oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); - prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); - } while (++c < C); - } - return lfe ? 0 : badness; -} - -void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, - const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, - opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes, - int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe) -{ - int intra; - opus_val16 max_decay; - VARDECL(opus_val16, oldEBands_intra); - VARDECL(opus_val16, error_intra); - ec_enc enc_start_state; - opus_uint32 tell; - int badness1=0; - opus_int32 intra_bias; - opus_val32 new_distortion; - SAVE_STACK; - - intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C); - intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512)); - new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C); - - tell = ec_tell(enc); - if (tell+3 > budget) - two_pass = intra = 0; - - max_decay = QCONST16(16.f,DB_SHIFT); - if (end-start>10) - { -#ifdef FIXED_POINT - max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3)); -#else - max_decay = MIN32(max_decay, .125f*nbAvailableBytes); -#endif - } - if (lfe) - max_decay = QCONST16(3.f,DB_SHIFT); - enc_start_state = *enc; - - ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); - ALLOC(error_intra, C*m->nbEBands, opus_val16); - OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands); - - if (two_pass || intra) - { - badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget, - tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe); - } - - if (!intra) - { - unsigned char *intra_buf; - ec_enc enc_intra_state; - opus_int32 tell_intra; - opus_uint32 nstart_bytes; - opus_uint32 nintra_bytes; - opus_uint32 save_bytes; - int badness2; - VARDECL(unsigned char, intra_bits); - - tell_intra = ec_tell_frac(enc); - - enc_intra_state = *enc; - - nstart_bytes = ec_range_bytes(&enc_start_state); - nintra_bytes = ec_range_bytes(&enc_intra_state); - intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes; - save_bytes = nintra_bytes-nstart_bytes; - if (save_bytes == 0) - save_bytes = ALLOC_NONE; - ALLOC(intra_bits, save_bytes, unsigned char); - /* Copy bits from intra bit-stream */ - OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes); - - *enc = enc_start_state; - - badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget, - tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe); - - if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra))) - { - *enc = enc_intra_state; - /* Copy intra bits to bit-stream */ - OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes); - OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); - OPUS_COPY(error, error_intra, C*m->nbEBands); - intra = 1; - } - } else { - OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); - OPUS_COPY(error, error_intra, C*m->nbEBands); - } - - if (intra) - *delayedIntra = new_distortion; - else - *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra), - new_distortion); - - RESTORE_STACK; -} - -void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C) -{ - int i, c; - - /* Encode finer resolution */ - for (i=start;i<end;i++) - { - opus_int16 frac = 1<<fine_quant[i]; - if (fine_quant[i] <= 0) - continue; - c=0; - do { - int q2; - opus_val16 offset; -#ifdef FIXED_POINT - /* Has to be without rounding */ - q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]); -#else - q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac); -#endif - if (q2 > frac-1) - q2 = frac-1; - if (q2<0) - q2 = 0; - ec_enc_bits(enc, q2, fine_quant[i]); -#ifdef FIXED_POINT - offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT)); -#else - offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f; -#endif - oldEBands[i+c*m->nbEBands] += offset; - error[i+c*m->nbEBands] -= offset; - /*printf ("%f ", error[i] - offset);*/ - } while (++c < C); - } -} - -void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C) -{ - int i, prio, c; - - /* Use up the remaining bits */ - for (prio=0;prio<2;prio++) - { - for (i=start;i<end && bits_left>=C ;i++) - { - if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio) - continue; - c=0; - do { - int q2; - opus_val16 offset; - q2 = error[i+c*m->nbEBands]<0 ? 0 : 1; - ec_enc_bits(enc, q2, 1); -#ifdef FIXED_POINT - offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1); -#else - offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); -#endif - oldEBands[i+c*m->nbEBands] += offset; - bits_left--; - } while (++c < C); - } - } -} - -void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM) -{ - const unsigned char *prob_model = e_prob_model[LM][intra]; - int i, c; - opus_val32 prev[2] = {0, 0}; - opus_val16 coef; - opus_val16 beta; - opus_int32 budget; - opus_int32 tell; - - if (intra) - { - coef = 0; - beta = beta_intra; - } else { - beta = beta_coef[LM]; - coef = pred_coef[LM]; - } - - budget = dec->storage*8; - - /* Decode at a fixed coarse resolution */ - for (i=start;i<end;i++) - { - c=0; - do { - int qi; - opus_val32 q; - opus_val32 tmp; - /* It would be better to express this invariant as a - test on C at function entry, but that isn't enough - to make the static analyzer happy. */ - celt_assert(c<2); - tell = ec_tell(dec); - if(budget-tell>=15) - { - int pi; - pi = 2*IMIN(i,20); - qi = ec_laplace_decode(dec, - prob_model[pi]<<7, prob_model[pi+1]<<6); - } - else if(budget-tell>=2) - { - qi = ec_dec_icdf(dec, small_energy_icdf, 2); - qi = (qi>>1)^-(qi&1); - } - else if(budget-tell>=1) - { - qi = -ec_dec_bit_logp(dec, 1); - } - else - qi = -1; - q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); - - oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); - tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7); -#ifdef FIXED_POINT - tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); -#endif - oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); - prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); - } while (++c < C); - } -} - -void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C) -{ - int i, c; - /* Decode finer resolution */ - for (i=start;i<end;i++) - { - if (fine_quant[i] <= 0) - continue; - c=0; - do { - int q2; - opus_val16 offset; - q2 = ec_dec_bits(dec, fine_quant[i]); -#ifdef FIXED_POINT - offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT)); -#else - offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f; -#endif - oldEBands[i+c*m->nbEBands] += offset; - } while (++c < C); - } -} - -void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C) -{ - int i, prio, c; - - /* Use up the remaining bits */ - for (prio=0;prio<2;prio++) - { - for (i=start;i<end && bits_left>=C ;i++) - { - if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio) - continue; - c=0; - do { - int q2; - opus_val16 offset; - q2 = ec_dec_bits(dec, 1); -#ifdef FIXED_POINT - offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1); -#else - offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); -#endif - oldEBands[i+c*m->nbEBands] += offset; - bits_left--; - } while (++c < C); - } - } -} - -void amp2Log2(const CELTMode *m, int effEnd, int end, - celt_ener *bandE, opus_val16 *bandLogE, int C) -{ - int c, i; - c=0; - do { - for (i=0;i<effEnd;i++) - bandLogE[i+c*m->nbEBands] = - celt_log2(SHL32(bandE[i+c*m->nbEBands],2)) - - SHL16((opus_val16)eMeans[i],6); - for (i=effEnd;i<end;i++) - bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT); - } while (++c < C); -} diff --git a/thirdparty/opus/celt/quant_bands.h b/thirdparty/opus/celt/quant_bands.h deleted file mode 100644 index 0490bca4b4..0000000000 --- a/thirdparty/opus/celt/quant_bands.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef QUANT_BANDS -#define QUANT_BANDS - -#include "arch.h" -#include "modes.h" -#include "entenc.h" -#include "entdec.h" -#include "mathops.h" - -#ifdef FIXED_POINT -extern const signed char eMeans[25]; -#else -extern const opus_val16 eMeans[25]; -#endif - -void amp2Log2(const CELTMode *m, int effEnd, int end, - celt_ener *bandE, opus_val16 *bandLogE, int C); - -void log2Amp(const CELTMode *m, int start, int end, - celt_ener *eBands, const opus_val16 *oldEBands, int C); - -void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, - const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, - opus_val16 *error, ec_enc *enc, int C, int LM, - int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra, - int two_pass, int loss_rate, int lfe); - -void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C); - -void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C); - -void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM); - -void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C); - -void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C); - -#endif /* QUANT_BANDS */ diff --git a/thirdparty/opus/celt/rate.c b/thirdparty/opus/celt/rate.c deleted file mode 100644 index 7dfa5be8a6..0000000000 --- a/thirdparty/opus/celt/rate.c +++ /dev/null @@ -1,639 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <math.h> -#include "modes.h" -#include "cwrs.h" -#include "arch.h" -#include "os_support.h" - -#include "entcode.h" -#include "rate.h" - -static const unsigned char LOG2_FRAC_TABLE[24]={ - 0, - 8,13, - 16,19,21,23, - 24,26,27,28,29,30,31,32, - 32,33,34,34,35,36,36,37,37 -}; - -#ifdef CUSTOM_MODES - -/*Determines if V(N,K) fits in a 32-bit unsigned integer. - N and K are themselves limited to 15 bits.*/ -static int fits_in32(int _n, int _k) -{ - static const opus_int16 maxN[15] = { - 32767, 32767, 32767, 1476, 283, 109, 60, 40, - 29, 24, 20, 18, 16, 14, 13}; - static const opus_int16 maxK[15] = { - 32767, 32767, 32767, 32767, 1172, 238, 95, 53, - 36, 27, 22, 18, 16, 15, 13}; - if (_n>=14) - { - if (_k>=14) - return 0; - else - return _n <= maxN[_k]; - } else { - return _k <= maxK[_n]; - } -} - -void compute_pulse_cache(CELTMode *m, int LM) -{ - int C; - int i; - int j; - int curr=0; - int nbEntries=0; - int entryN[100], entryK[100], entryI[100]; - const opus_int16 *eBands = m->eBands; - PulseCache *cache = &m->cache; - opus_int16 *cindex; - unsigned char *bits; - unsigned char *cap; - - cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2)); - cache->index = cindex; - - /* Scan for all unique band sizes */ - for (i=0;i<=LM+1;i++) - { - for (j=0;j<m->nbEBands;j++) - { - int k; - int N = (eBands[j+1]-eBands[j])<<i>>1; - cindex[i*m->nbEBands+j] = -1; - /* Find other bands that have the same size */ - for (k=0;k<=i;k++) - { - int n; - for (n=0;n<m->nbEBands && (k!=i || n<j);n++) - { - if (N == (eBands[n+1]-eBands[n])<<k>>1) - { - cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n]; - break; - } - } - } - if (cache->index[i*m->nbEBands+j] == -1 && N!=0) - { - int K; - entryN[nbEntries] = N; - K = 0; - while (fits_in32(N,get_pulses(K+1)) && K<MAX_PSEUDO) - K++; - entryK[nbEntries] = K; - cindex[i*m->nbEBands+j] = curr; - entryI[nbEntries] = curr; - - curr += K+1; - nbEntries++; - } - } - } - bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr); - cache->bits = bits; - cache->size = curr; - /* Compute the cache for all unique sizes */ - for (i=0;i<nbEntries;i++) - { - unsigned char *ptr = bits+entryI[i]; - opus_int16 tmp[CELT_MAX_PULSES+1]; - get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES); - for (j=1;j<=entryK[i];j++) - ptr[j] = tmp[get_pulses(j)]-1; - ptr[0] = entryK[i]; - } - - /* Compute the maximum rate for each band at which we'll reliably use as - many bits as we ask for. */ - cache->caps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands); - for (i=0;i<=LM;i++) - { - for (C=1;C<=2;C++) - { - for (j=0;j<m->nbEBands;j++) - { - int N0; - int max_bits; - N0 = m->eBands[j+1]-m->eBands[j]; - /* N=1 bands only have a sign bit and fine bits. */ - if (N0<<i == 1) - max_bits = C*(1+MAX_FINE_BITS)<<BITRES; - else - { - const unsigned char *pcache; - opus_int32 num; - opus_int32 den; - int LM0; - int N; - int offset; - int ndof; - int qb; - int k; - LM0 = 0; - /* Even-sized bands bigger than N=2 can be split one more time. - As of commit 44203907 all bands >1 are even, including custom modes.*/ - if (N0 > 2) - { - N0>>=1; - LM0--; - } - /* N0=1 bands can't be split down to N<2. */ - else if (N0 <= 1) - { - LM0=IMIN(i,1); - N0<<=LM0; - } - /* Compute the cost for the lowest-level PVQ of a fully split - band. */ - pcache = bits + cindex[(LM0+1)*m->nbEBands+j]; - max_bits = pcache[pcache[0]]+1; - /* Add in the cost of coding regular splits. */ - N = N0; - for(k=0;k<i-LM0;k++){ - max_bits <<= 1; - /* Offset the number of qtheta bits by log2(N)/2 - + QTHETA_OFFSET compared to their "fair share" of - total/N */ - offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET; - /* The number of qtheta bits we'll allocate if the remainder - is to be max_bits. - The average measured cost for theta is 0.89701 times qb, - approximated here as 459/512. */ - num=459*(opus_int32)((2*N-1)*offset+max_bits); - den=((opus_int32)(2*N-1)<<9)-459; - qb = IMIN((num+(den>>1))/den, 57); - celt_assert(qb >= 0); - max_bits += qb; - N <<= 1; - } - /* Add in the cost of a stereo split, if necessary. */ - if (C==2) - { - max_bits <<= 1; - offset = ((m->logN[j]+(i<<BITRES))>>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET); - ndof = 2*N-1-(N==2); - /* The average measured cost for theta with the step PDF is - 0.95164 times qb, approximated here as 487/512. */ - num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset); - den = ((opus_int32)ndof<<9)-(N==2?512:487); - qb = IMIN((num+(den>>1))/den, (N==2?64:61)); - celt_assert(qb >= 0); - max_bits += qb; - } - /* Add the fine bits we'll use. */ - /* Compensate for the extra DoF in stereo */ - ndof = C*N + ((C==2 && N>2) ? 1 : 0); - /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET - compared to their "fair share" of total/N */ - offset = ((m->logN[j] + (i<<BITRES))>>1)-FINE_OFFSET; - /* N=2 is the only point that doesn't match the curve */ - if (N==2) - offset += 1<<BITRES>>2; - /* The number of fine bits we'll allocate if the remainder is - to be max_bits. */ - num = max_bits+ndof*offset; - den = (ndof-1)<<BITRES; - qb = IMIN((num+(den>>1))/den, MAX_FINE_BITS); - celt_assert(qb >= 0); - max_bits += C*qb<<BITRES; - } - max_bits = (4*max_bits/(C*((m->eBands[j+1]-m->eBands[j])<<i)))-64; - celt_assert(max_bits >= 0); - celt_assert(max_bits < 256); - *cap++ = (unsigned char)max_bits; - } - } - } -} - -#endif /* CUSTOM_MODES */ - -#define ALLOC_STEPS 6 - -static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start, - const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance, - int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits, - int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) -{ - opus_int32 psum; - int lo, hi; - int i, j; - int logM; - int stereo; - int codedBands=-1; - int alloc_floor; - opus_int32 left, percoeff; - int done; - opus_int32 balance; - SAVE_STACK; - - alloc_floor = C<<BITRES; - stereo = C>1; - - logM = LM<<BITRES; - lo = 0; - hi = 1<<ALLOC_STEPS; - for (i=0;i<ALLOC_STEPS;i++) - { - int mid = (lo+hi)>>1; - psum = 0; - done = 0; - for (j=end;j-->start;) - { - int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS); - if (tmp >= thresh[j] || done) - { - done = 1; - /* Don't allocate more than we can actually use */ - psum += IMIN(tmp, cap[j]); - } else { - if (tmp >= alloc_floor) - psum += alloc_floor; - } - } - if (psum > total) - hi = mid; - else - lo = mid; - } - psum = 0; - /*printf ("interp bisection gave %d\n", lo);*/ - done = 0; - for (j=end;j-->start;) - { - int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS); - if (tmp < thresh[j] && !done) - { - if (tmp >= alloc_floor) - tmp = alloc_floor; - else - tmp = 0; - } else - done = 1; - /* Don't allocate more than we can actually use */ - tmp = IMIN(tmp, cap[j]); - bits[j] = tmp; - psum += tmp; - } - - /* Decide which bands to skip, working backwards from the end. */ - for (codedBands=end;;codedBands--) - { - int band_width; - int band_bits; - int rem; - j = codedBands-1; - /* Never skip the first band, nor a band that has been boosted by - dynalloc. - In the first case, we'd be coding a bit to signal we're going to waste - all the other bits. - In the second case, we'd be coding a bit to redistribute all the bits - we just signaled should be cocentrated in this band. */ - if (j<=skip_start) - { - /* Give the bit we reserved to end skipping back. */ - total += skip_rsv; - break; - } - /*Figure out how many left-over bits we would be adding to this band. - This can include bits we've stolen back from higher, skipped bands.*/ - left = total-psum; - percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); - left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; - rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); - band_width = m->eBands[codedBands]-m->eBands[j]; - band_bits = (int)(bits[j] + percoeff*band_width + rem); - /*Only code a skip decision if we're above the threshold for this band. - Otherwise it is force-skipped. - This ensures that we have enough bits to code the skip flag.*/ - if (band_bits >= IMAX(thresh[j], alloc_floor+(1<<BITRES))) - { - if (encode) - { - /*This if() block is the only part of the allocation function that - is not a mandatory part of the bitstream: any bands we choose to - skip here must be explicitly signaled.*/ - /*Choose a threshold with some hysteresis to keep bands from - fluctuating in and out.*/ -#ifdef FUZZING - if ((rand()&0x1) == 0) -#else - if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth)) -#endif - { - ec_enc_bit_logp(ec, 1, 1); - break; - } - ec_enc_bit_logp(ec, 0, 1); - } else if (ec_dec_bit_logp(ec, 1)) { - break; - } - /*We used a bit to skip this band.*/ - psum += 1<<BITRES; - band_bits -= 1<<BITRES; - } - /*Reclaim the bits originally allocated to this band.*/ - psum -= bits[j]+intensity_rsv; - if (intensity_rsv > 0) - intensity_rsv = LOG2_FRAC_TABLE[j-start]; - psum += intensity_rsv; - if (band_bits >= alloc_floor) - { - /*If we have enough for a fine energy bit per channel, use it.*/ - psum += alloc_floor; - bits[j] = alloc_floor; - } else { - /*Otherwise this band gets nothing at all.*/ - bits[j] = 0; - } - } - - celt_assert(codedBands > start); - /* Code the intensity and dual stereo parameters. */ - if (intensity_rsv > 0) - { - if (encode) - { - *intensity = IMIN(*intensity, codedBands); - ec_enc_uint(ec, *intensity-start, codedBands+1-start); - } - else - *intensity = start+ec_dec_uint(ec, codedBands+1-start); - } - else - *intensity = 0; - if (*intensity <= start) - { - total += dual_stereo_rsv; - dual_stereo_rsv = 0; - } - if (dual_stereo_rsv > 0) - { - if (encode) - ec_enc_bit_logp(ec, *dual_stereo, 1); - else - *dual_stereo = ec_dec_bit_logp(ec, 1); - } - else - *dual_stereo = 0; - - /* Allocate the remaining bits */ - left = total-psum; - percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); - left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; - for (j=start;j<codedBands;j++) - bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j])); - for (j=start;j<codedBands;j++) - { - int tmp = (int)IMIN(left, m->eBands[j+1]-m->eBands[j]); - bits[j] += tmp; - left -= tmp; - } - /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/ - - balance = 0; - for (j=start;j<codedBands;j++) - { - int N0, N, den; - int offset; - int NClogN; - opus_int32 excess, bit; - - celt_assert(bits[j] >= 0); - N0 = m->eBands[j+1]-m->eBands[j]; - N=N0<<LM; - bit = (opus_int32)bits[j]+balance; - - if (N>1) - { - excess = MAX32(bit-cap[j],0); - bits[j] = bit-excess; - - /* Compensate for the extra DoF in stereo */ - den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0)); - - NClogN = den*(m->logN[j] + logM); - - /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET - compared to their "fair share" of total/N */ - offset = (NClogN>>1)-den*FINE_OFFSET; - - /* N=2 is the only point that doesn't match the curve */ - if (N==2) - offset += den<<BITRES>>2; - - /* Changing the offset for allocating the second and third - fine energy bit */ - if (bits[j] + offset < den*2<<BITRES) - offset += NClogN>>2; - else if (bits[j] + offset < den*3<<BITRES) - offset += NClogN>>3; - - /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1)))); - ebits[j] = celt_udiv(ebits[j], den)>>BITRES; - - /* Make sure not to bust */ - if (C*ebits[j] > (bits[j]>>BITRES)) - ebits[j] = bits[j] >> stereo >> BITRES; - - /* More than that is useless because that's about as far as PVQ can go */ - ebits[j] = IMIN(ebits[j], MAX_FINE_BITS); - - /* If we rounded down or capped this band, make it a candidate for the - final fine energy pass */ - fine_priority[j] = ebits[j]*(den<<BITRES) >= bits[j]+offset; - - /* Remove the allocated fine bits; the rest are assigned to PVQ */ - bits[j] -= C*ebits[j]<<BITRES; - - } else { - /* For N=1, all bits go to fine energy except for a single sign bit */ - excess = MAX32(0,bit-(C<<BITRES)); - bits[j] = bit-excess; - ebits[j] = 0; - fine_priority[j] = 1; - } - - /* Fine energy can't take advantage of the re-balancing in - quant_all_bands(). - Instead, do the re-balancing here.*/ - if(excess > 0) - { - int extra_fine; - int extra_bits; - extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]); - ebits[j] += extra_fine; - extra_bits = extra_fine*C<<BITRES; - fine_priority[j] = extra_bits >= excess-balance; - excess -= extra_bits; - } - balance = excess; - - celt_assert(bits[j] >= 0); - celt_assert(ebits[j] >= 0); - } - /* Save any remaining bits over the cap for the rebalancing in - quant_all_bands(). */ - *_balance = balance; - - /* The skipped bands use all their bits for fine energy. */ - for (;j<end;j++) - { - ebits[j] = bits[j] >> stereo >> BITRES; - celt_assert(C*ebits[j]<<BITRES == bits[j]); - bits[j] = 0; - fine_priority[j] = ebits[j]<1; - } - RESTORE_STACK; - return codedBands; -} - -int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, - opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) -{ - int lo, hi, len, j; - int codedBands; - int skip_start; - int skip_rsv; - int intensity_rsv; - int dual_stereo_rsv; - VARDECL(int, bits1); - VARDECL(int, bits2); - VARDECL(int, thresh); - VARDECL(int, trim_offset); - SAVE_STACK; - - total = IMAX(total, 0); - len = m->nbEBands; - skip_start = start; - /* Reserve a bit to signal the end of manually skipped bands. */ - skip_rsv = total >= 1<<BITRES ? 1<<BITRES : 0; - total -= skip_rsv; - /* Reserve bits for the intensity and dual stereo parameters. */ - intensity_rsv = dual_stereo_rsv = 0; - if (C==2) - { - intensity_rsv = LOG2_FRAC_TABLE[end-start]; - if (intensity_rsv>total) - intensity_rsv = 0; - else - { - total -= intensity_rsv; - dual_stereo_rsv = total>=1<<BITRES ? 1<<BITRES : 0; - total -= dual_stereo_rsv; - } - } - ALLOC(bits1, len, int); - ALLOC(bits2, len, int); - ALLOC(thresh, len, int); - ALLOC(trim_offset, len, int); - - for (j=start;j<end;j++) - { - /* Below this threshold, we're sure not to allocate any PVQ bits */ - thresh[j] = IMAX((C)<<BITRES, (3*(m->eBands[j+1]-m->eBands[j])<<LM<<BITRES)>>4); - /* Tilt of the allocation curve */ - trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1) - *(1<<(LM+BITRES))>>6; - /* Giving less resolution to single-coefficient bands because they get - more benefit from having one coarse value per coefficient*/ - if ((m->eBands[j+1]-m->eBands[j])<<LM==1) - trim_offset[j] -= C<<BITRES; - } - lo = 1; - hi = m->nbAllocVectors - 1; - do - { - int done = 0; - int psum = 0; - int mid = (lo+hi) >> 1; - for (j=end;j-->start;) - { - int bitsj; - int N = m->eBands[j+1]-m->eBands[j]; - bitsj = C*N*m->allocVectors[mid*len+j]<<LM>>2; - if (bitsj > 0) - bitsj = IMAX(0, bitsj + trim_offset[j]); - bitsj += offsets[j]; - if (bitsj >= thresh[j] || done) - { - done = 1; - /* Don't allocate more than we can actually use */ - psum += IMIN(bitsj, cap[j]); - } else { - if (bitsj >= C<<BITRES) - psum += C<<BITRES; - } - } - if (psum > total) - hi = mid - 1; - else - lo = mid + 1; - /*printf ("lo = %d, hi = %d\n", lo, hi);*/ - } - while (lo <= hi); - hi = lo--; - /*printf ("interp between %d and %d\n", lo, hi);*/ - for (j=start;j<end;j++) - { - int bits1j, bits2j; - int N = m->eBands[j+1]-m->eBands[j]; - bits1j = C*N*m->allocVectors[lo*len+j]<<LM>>2; - bits2j = hi>=m->nbAllocVectors ? - cap[j] : C*N*m->allocVectors[hi*len+j]<<LM>>2; - if (bits1j > 0) - bits1j = IMAX(0, bits1j + trim_offset[j]); - if (bits2j > 0) - bits2j = IMAX(0, bits2j + trim_offset[j]); - if (lo > 0) - bits1j += offsets[j]; - bits2j += offsets[j]; - if (offsets[j]>0) - skip_start = j; - bits2j = IMAX(0,bits2j-bits1j); - bits1[j] = bits1j; - bits2[j] = bits2j; - } - codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap, - total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv, - pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth); - RESTORE_STACK; - return codedBands; -} - diff --git a/thirdparty/opus/celt/rate.h b/thirdparty/opus/celt/rate.h deleted file mode 100644 index 515f7687ce..0000000000 --- a/thirdparty/opus/celt/rate.h +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef RATE_H -#define RATE_H - -#define MAX_PSEUDO 40 -#define LOG_MAX_PSEUDO 6 - -#define CELT_MAX_PULSES 128 - -#define MAX_FINE_BITS 8 - -#define FINE_OFFSET 21 -#define QTHETA_OFFSET 4 -#define QTHETA_OFFSET_TWOPHASE 16 - -#include "cwrs.h" -#include "modes.h" - -void compute_pulse_cache(CELTMode *m, int LM); - -static OPUS_INLINE int get_pulses(int i) -{ - return i<8 ? i : (8 + (i&7)) << ((i>>3)-1); -} - -static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits) -{ - int i; - int lo, hi; - const unsigned char *cache; - - LM++; - cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; - - lo = 0; - hi = cache[0]; - bits--; - for (i=0;i<LOG_MAX_PSEUDO;i++) - { - int mid = (lo+hi+1)>>1; - /* OPT: Make sure this is implemented with a conditional move */ - if ((int)cache[mid] >= bits) - hi = mid; - else - lo = mid; - } - if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits) - return lo; - else - return hi; -} - -static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses) -{ - const unsigned char *cache; - - LM++; - cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; - return pulses == 0 ? 0 : cache[pulses]+1; -} - -/** Compute the pulse allocation, i.e. how many pulses will go in each - * band. - @param m mode - @param offsets Requested increase or decrease in the number of bits for - each band - @param total Number of bands - @param pulses Number of pulses per band (returned) - @return Total number of bits allocated -*/ -int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero, - opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth); - -#endif diff --git a/thirdparty/opus/celt/stack_alloc.h b/thirdparty/opus/celt/stack_alloc.h deleted file mode 100644 index 2b51c8d80c..0000000000 --- a/thirdparty/opus/celt/stack_alloc.h +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright (C) 2002-2003 Jean-Marc Valin - Copyright (C) 2007-2009 Xiph.Org Foundation */ -/** - @file stack_alloc.h - @brief Temporary memory allocation on stack -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef STACK_ALLOC_H -#define STACK_ALLOC_H - -#include "opus_types.h" -#include "opus_defines.h" - -#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK)) -#error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode." -#endif - -#ifdef USE_ALLOCA -# ifdef WIN32 -# include <malloc.h> -# else -# ifdef HAVE_ALLOCA_H -# include <alloca.h> -# else -# include <stdlib.h> -# endif -# endif -#endif - -/** - * @def ALIGN(stack, size) - * - * Aligns the stack to a 'size' boundary - * - * @param stack Stack - * @param size New size boundary - */ - -/** - * @def PUSH(stack, size, type) - * - * Allocates 'size' elements of type 'type' on the stack - * - * @param stack Stack - * @param size Number of elements - * @param type Type of element - */ - -/** - * @def VARDECL(var) - * - * Declare variable on stack - * - * @param var Variable to declare - */ - -/** - * @def ALLOC(var, size, type) - * - * Allocate 'size' elements of 'type' on stack - * - * @param var Name of variable to allocate - * @param size Number of elements - * @param type Type of element - */ - -#if defined(VAR_ARRAYS) - -#define VARDECL(type, var) -#define ALLOC(var, size, type) type var[size] -#define SAVE_STACK -#define RESTORE_STACK -#define ALLOC_STACK -/* C99 does not allow VLAs of size zero */ -#define ALLOC_NONE 1 - -#elif defined(USE_ALLOCA) - -#define VARDECL(type, var) type *var - -# ifdef WIN32 -# define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size))) -# else -# define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size))) -# endif - -#define SAVE_STACK -#define RESTORE_STACK -#define ALLOC_STACK -#define ALLOC_NONE 0 - -#else - -#ifdef CELT_C -char *scratch_ptr=0; -char *global_stack=0; -#else -extern char *global_stack; -extern char *scratch_ptr; -#endif /* CELT_C */ - -#ifdef ENABLE_VALGRIND - -#include <valgrind/memcheck.h> - -#ifdef CELT_C -char *global_stack_top=0; -#else -extern char *global_stack_top; -#endif /* CELT_C */ - -#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) -#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char)))) -#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)) -#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack; - -#else - -#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) -#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) -#if 0 /* Set this to 1 to instrument pseudostack usage */ -#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) -#else -#define RESTORE_STACK (global_stack = _saved_stack) -#endif -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; - -#endif /* ENABLE_VALGRIND */ - -#include "os_support.h" -#define VARDECL(type, var) type *var -#define ALLOC(var, size, type) var = PUSH(global_stack, size, type) -#define SAVE_STACK char *_saved_stack = global_stack; -#define ALLOC_NONE 0 - -#endif /* VAR_ARRAYS */ - - -#ifdef ENABLE_VALGRIND - -#include <valgrind/memcheck.h> -#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) -#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) -#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) -#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) -#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0) -#define OPUS_FPRINTF fprintf - -#else - -static OPUS_INLINE int _opus_false(void) {return 0;} -#define OPUS_CHECK_ARRAY(ptr, len) _opus_false() -#define OPUS_CHECK_VALUE(value) _opus_false() -#define OPUS_PRINT_INT(value) do{}while(0) -#define OPUS_FPRINTF (void) - -#endif - - -#endif /* STACK_ALLOC_H */ diff --git a/thirdparty/opus/celt/static_modes_fixed.h b/thirdparty/opus/celt/static_modes_fixed.h deleted file mode 100644 index 8717d626cb..0000000000 --- a/thirdparty/opus/celt/static_modes_fixed.h +++ /dev/null @@ -1,892 +0,0 @@ -/* The contents of this file was automatically generated by dump_modes.c - with arguments: 48000 960 - It contains static definitions for some pre-defined modes. */ -#include "modes.h" -#include "rate.h" - -#ifdef HAVE_ARM_NE10 -#define OVERRIDE_FFT 1 -#include "static_modes_fixed_arm_ne10.h" -#endif - -#ifndef DEF_WINDOW120 -#define DEF_WINDOW120 -static const opus_val16 window120[120] = { -2, 20, 55, 108, 178, -266, 372, 494, 635, 792, -966, 1157, 1365, 1590, 1831, -2089, 2362, 2651, 2956, 3276, -3611, 3961, 4325, 4703, 5094, -5499, 5916, 6346, 6788, 7241, -7705, 8179, 8663, 9156, 9657, -10167, 10684, 11207, 11736, 12271, -12810, 13353, 13899, 14447, 14997, -15547, 16098, 16648, 17197, 17744, -18287, 18827, 19363, 19893, 20418, -20936, 21447, 21950, 22445, 22931, -23407, 23874, 24330, 24774, 25208, -25629, 26039, 26435, 26819, 27190, -27548, 27893, 28224, 28541, 28845, -29135, 29411, 29674, 29924, 30160, -30384, 30594, 30792, 30977, 31151, -31313, 31463, 31602, 31731, 31849, -31958, 32057, 32148, 32229, 32303, -32370, 32429, 32481, 32528, 32568, -32604, 32634, 32661, 32683, 32701, -32717, 32729, 32740, 32748, 32754, -32758, 32762, 32764, 32766, 32767, -32767, 32767, 32767, 32767, 32767, -}; -#endif - -#ifndef DEF_LOGN400 -#define DEF_LOGN400 -static const opus_int16 logN400[21] = { -0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; -#endif - -#ifndef DEF_PULSE_CACHE50 -#define DEF_PULSE_CACHE50 -static const opus_int16 cache_index50[105] = { --1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, -82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, -41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, -41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305, -318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240, -305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, -240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, -}; -static const unsigned char cache_bits50[392] = { -40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, -31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50, -51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65, -66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61, -64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92, -94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123, -124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, -97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, -142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35, -28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149, -153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225, -229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157, -166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63, -86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250, -25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, -185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89, -110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41, -74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138, -163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214, -228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49, -90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47, -87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57, -106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, -224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, -182, 234, }; -static const unsigned char cache_caps50[168] = { -224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, -178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, -240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, -160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172, -138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207, -204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185, -185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39, -207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201, -188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193, -193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204, -204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175, -140, 66, 40, }; -#endif - -#ifndef FFT_TWIDDLES48000_960 -#define FFT_TWIDDLES48000_960 -static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { -{32767, 0}, {32766, -429}, -{32757, -858}, {32743, -1287}, -{32724, -1715}, {32698, -2143}, -{32667, -2570}, {32631, -2998}, -{32588, -3425}, {32541, -3851}, -{32488, -4277}, {32429, -4701}, -{32364, -5125}, {32295, -5548}, -{32219, -5971}, {32138, -6393}, -{32051, -6813}, {31960, -7231}, -{31863, -7650}, {31760, -8067}, -{31652, -8481}, {31539, -8895}, -{31419, -9306}, {31294, -9716}, -{31165, -10126}, {31030, -10532}, -{30889, -10937}, {30743, -11340}, -{30592, -11741}, {30436, -12141}, -{30274, -12540}, {30107, -12935}, -{29936, -13328}, {29758, -13718}, -{29577, -14107}, {29390, -14493}, -{29197, -14875}, {29000, -15257}, -{28797, -15635}, {28590, -16010}, -{28379, -16384}, {28162, -16753}, -{27940, -17119}, {27714, -17484}, -{27482, -17845}, {27246, -18205}, -{27006, -18560}, {26760, -18911}, -{26510, -19260}, {26257, -19606}, -{25997, -19947}, {25734, -20286}, -{25466, -20621}, {25194, -20952}, -{24918, -21281}, {24637, -21605}, -{24353, -21926}, {24063, -22242}, -{23770, -22555}, {23473, -22865}, -{23171, -23171}, {22866, -23472}, -{22557, -23769}, {22244, -24063}, -{21927, -24352}, {21606, -24636}, -{21282, -24917}, {20954, -25194}, -{20622, -25465}, {20288, -25733}, -{19949, -25997}, {19607, -26255}, -{19261, -26509}, {18914, -26760}, -{18561, -27004}, {18205, -27246}, -{17846, -27481}, {17485, -27713}, -{17122, -27940}, {16755, -28162}, -{16385, -28378}, {16012, -28590}, -{15636, -28797}, {15258, -28999}, -{14878, -29197}, {14494, -29389}, -{14108, -29576}, {13720, -29757}, -{13329, -29934}, {12937, -30107}, -{12540, -30274}, {12142, -30435}, -{11744, -30592}, {11342, -30743}, -{10939, -30889}, {10534, -31030}, -{10127, -31164}, {9718, -31294}, -{9307, -31418}, {8895, -31537}, -{8482, -31652}, {8067, -31759}, -{7650, -31862}, {7233, -31960}, -{6815, -32051}, {6393, -32138}, -{5973, -32219}, {5549, -32294}, -{5127, -32364}, {4703, -32429}, -{4278, -32487}, {3852, -32541}, -{3426, -32588}, {2999, -32630}, -{2572, -32667}, {2144, -32698}, -{1716, -32724}, {1287, -32742}, -{860, -32757}, {430, -32766}, -{0, -32767}, {-429, -32766}, -{-858, -32757}, {-1287, -32743}, -{-1715, -32724}, {-2143, -32698}, -{-2570, -32667}, {-2998, -32631}, -{-3425, -32588}, {-3851, -32541}, -{-4277, -32488}, {-4701, -32429}, -{-5125, -32364}, {-5548, -32295}, -{-5971, -32219}, {-6393, -32138}, -{-6813, -32051}, {-7231, -31960}, -{-7650, -31863}, {-8067, -31760}, -{-8481, -31652}, {-8895, -31539}, -{-9306, -31419}, {-9716, -31294}, -{-10126, -31165}, {-10532, -31030}, -{-10937, -30889}, {-11340, -30743}, -{-11741, -30592}, {-12141, -30436}, -{-12540, -30274}, {-12935, -30107}, -{-13328, -29936}, {-13718, -29758}, -{-14107, -29577}, {-14493, -29390}, -{-14875, -29197}, {-15257, -29000}, -{-15635, -28797}, {-16010, -28590}, -{-16384, -28379}, {-16753, -28162}, -{-17119, -27940}, {-17484, -27714}, -{-17845, -27482}, {-18205, -27246}, -{-18560, -27006}, {-18911, -26760}, -{-19260, -26510}, {-19606, -26257}, -{-19947, -25997}, {-20286, -25734}, -{-20621, -25466}, {-20952, -25194}, -{-21281, -24918}, {-21605, -24637}, -{-21926, -24353}, {-22242, -24063}, -{-22555, -23770}, {-22865, -23473}, -{-23171, -23171}, {-23472, -22866}, -{-23769, -22557}, {-24063, -22244}, -{-24352, -21927}, {-24636, -21606}, -{-24917, -21282}, {-25194, -20954}, -{-25465, -20622}, {-25733, -20288}, -{-25997, -19949}, {-26255, -19607}, -{-26509, -19261}, {-26760, -18914}, -{-27004, -18561}, {-27246, -18205}, -{-27481, -17846}, {-27713, -17485}, -{-27940, -17122}, {-28162, -16755}, -{-28378, -16385}, {-28590, -16012}, -{-28797, -15636}, {-28999, -15258}, -{-29197, -14878}, {-29389, -14494}, -{-29576, -14108}, {-29757, -13720}, -{-29934, -13329}, {-30107, -12937}, -{-30274, -12540}, {-30435, -12142}, -{-30592, -11744}, {-30743, -11342}, -{-30889, -10939}, {-31030, -10534}, -{-31164, -10127}, {-31294, -9718}, -{-31418, -9307}, {-31537, -8895}, -{-31652, -8482}, {-31759, -8067}, -{-31862, -7650}, {-31960, -7233}, -{-32051, -6815}, {-32138, -6393}, -{-32219, -5973}, {-32294, -5549}, -{-32364, -5127}, {-32429, -4703}, -{-32487, -4278}, {-32541, -3852}, -{-32588, -3426}, {-32630, -2999}, -{-32667, -2572}, {-32698, -2144}, -{-32724, -1716}, {-32742, -1287}, -{-32757, -860}, {-32766, -430}, -{-32767, 0}, {-32766, 429}, -{-32757, 858}, {-32743, 1287}, -{-32724, 1715}, {-32698, 2143}, -{-32667, 2570}, {-32631, 2998}, -{-32588, 3425}, {-32541, 3851}, -{-32488, 4277}, {-32429, 4701}, -{-32364, 5125}, {-32295, 5548}, -{-32219, 5971}, {-32138, 6393}, -{-32051, 6813}, {-31960, 7231}, -{-31863, 7650}, {-31760, 8067}, -{-31652, 8481}, {-31539, 8895}, -{-31419, 9306}, {-31294, 9716}, -{-31165, 10126}, {-31030, 10532}, -{-30889, 10937}, {-30743, 11340}, -{-30592, 11741}, {-30436, 12141}, -{-30274, 12540}, {-30107, 12935}, -{-29936, 13328}, {-29758, 13718}, -{-29577, 14107}, {-29390, 14493}, -{-29197, 14875}, {-29000, 15257}, -{-28797, 15635}, {-28590, 16010}, -{-28379, 16384}, {-28162, 16753}, -{-27940, 17119}, {-27714, 17484}, -{-27482, 17845}, {-27246, 18205}, -{-27006, 18560}, {-26760, 18911}, -{-26510, 19260}, {-26257, 19606}, -{-25997, 19947}, {-25734, 20286}, -{-25466, 20621}, {-25194, 20952}, -{-24918, 21281}, {-24637, 21605}, -{-24353, 21926}, {-24063, 22242}, -{-23770, 22555}, {-23473, 22865}, -{-23171, 23171}, {-22866, 23472}, -{-22557, 23769}, {-22244, 24063}, -{-21927, 24352}, {-21606, 24636}, -{-21282, 24917}, {-20954, 25194}, -{-20622, 25465}, {-20288, 25733}, -{-19949, 25997}, {-19607, 26255}, -{-19261, 26509}, {-18914, 26760}, -{-18561, 27004}, {-18205, 27246}, -{-17846, 27481}, {-17485, 27713}, -{-17122, 27940}, {-16755, 28162}, -{-16385, 28378}, {-16012, 28590}, -{-15636, 28797}, {-15258, 28999}, -{-14878, 29197}, {-14494, 29389}, -{-14108, 29576}, {-13720, 29757}, -{-13329, 29934}, {-12937, 30107}, -{-12540, 30274}, {-12142, 30435}, -{-11744, 30592}, {-11342, 30743}, -{-10939, 30889}, {-10534, 31030}, -{-10127, 31164}, {-9718, 31294}, -{-9307, 31418}, {-8895, 31537}, -{-8482, 31652}, {-8067, 31759}, -{-7650, 31862}, {-7233, 31960}, -{-6815, 32051}, {-6393, 32138}, -{-5973, 32219}, {-5549, 32294}, -{-5127, 32364}, {-4703, 32429}, -{-4278, 32487}, {-3852, 32541}, -{-3426, 32588}, {-2999, 32630}, -{-2572, 32667}, {-2144, 32698}, -{-1716, 32724}, {-1287, 32742}, -{-860, 32757}, {-430, 32766}, -{0, 32767}, {429, 32766}, -{858, 32757}, {1287, 32743}, -{1715, 32724}, {2143, 32698}, -{2570, 32667}, {2998, 32631}, -{3425, 32588}, {3851, 32541}, -{4277, 32488}, {4701, 32429}, -{5125, 32364}, {5548, 32295}, -{5971, 32219}, {6393, 32138}, -{6813, 32051}, {7231, 31960}, -{7650, 31863}, {8067, 31760}, -{8481, 31652}, {8895, 31539}, -{9306, 31419}, {9716, 31294}, -{10126, 31165}, {10532, 31030}, -{10937, 30889}, {11340, 30743}, -{11741, 30592}, {12141, 30436}, -{12540, 30274}, {12935, 30107}, -{13328, 29936}, {13718, 29758}, -{14107, 29577}, {14493, 29390}, -{14875, 29197}, {15257, 29000}, -{15635, 28797}, {16010, 28590}, -{16384, 28379}, {16753, 28162}, -{17119, 27940}, {17484, 27714}, -{17845, 27482}, {18205, 27246}, -{18560, 27006}, {18911, 26760}, -{19260, 26510}, {19606, 26257}, -{19947, 25997}, {20286, 25734}, -{20621, 25466}, {20952, 25194}, -{21281, 24918}, {21605, 24637}, -{21926, 24353}, {22242, 24063}, -{22555, 23770}, {22865, 23473}, -{23171, 23171}, {23472, 22866}, -{23769, 22557}, {24063, 22244}, -{24352, 21927}, {24636, 21606}, -{24917, 21282}, {25194, 20954}, -{25465, 20622}, {25733, 20288}, -{25997, 19949}, {26255, 19607}, -{26509, 19261}, {26760, 18914}, -{27004, 18561}, {27246, 18205}, -{27481, 17846}, {27713, 17485}, -{27940, 17122}, {28162, 16755}, -{28378, 16385}, {28590, 16012}, -{28797, 15636}, {28999, 15258}, -{29197, 14878}, {29389, 14494}, -{29576, 14108}, {29757, 13720}, -{29934, 13329}, {30107, 12937}, -{30274, 12540}, {30435, 12142}, -{30592, 11744}, {30743, 11342}, -{30889, 10939}, {31030, 10534}, -{31164, 10127}, {31294, 9718}, -{31418, 9307}, {31537, 8895}, -{31652, 8482}, {31759, 8067}, -{31862, 7650}, {31960, 7233}, -{32051, 6815}, {32138, 6393}, -{32219, 5973}, {32294, 5549}, -{32364, 5127}, {32429, 4703}, -{32487, 4278}, {32541, 3852}, -{32588, 3426}, {32630, 2999}, -{32667, 2572}, {32698, 2144}, -{32724, 1716}, {32742, 1287}, -{32757, 860}, {32766, 430}, -}; -#ifndef FFT_BITREV480 -#define FFT_BITREV480 -static const opus_int16 fft_bitrev480[480] = { -0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, -8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, -16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, -24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, -4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, -12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, -20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, -28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, -1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, -9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, -17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, -25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, -5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, -13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, -21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, -29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, -2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, -10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, -18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, -26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, -6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, -14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, -22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, -30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, -3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, -11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, -19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, -27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, -7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, -15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, -23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, -31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, -}; -#endif - -#ifndef FFT_BITREV240 -#define FFT_BITREV240 -static const opus_int16 fft_bitrev240[240] = { -0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, -4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, -8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, -12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, -1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, -5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, -9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, -13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, -2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, -6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, -10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, -14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, -3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, -7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, -11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, -15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, -}; -#endif - -#ifndef FFT_BITREV120 -#define FFT_BITREV120 -static const opus_int16 fft_bitrev120[120] = { -0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, -4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, -1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, -5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, -2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, -6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, -3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, -7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, -}; -#endif - -#ifndef FFT_BITREV60 -#define FFT_BITREV60 -static const opus_int16 fft_bitrev60[60] = { -0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, -1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, -2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, -3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, -}; -#endif - -#ifndef FFT_STATE48000_960_0 -#define FFT_STATE48000_960_0 -static const kiss_fft_state fft_state48000_960_0 = { -480, /* nfft */ -17476, /* scale */ -8, /* scale_shift */ --1, /* shift */ -{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev480, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_480, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_1 -#define FFT_STATE48000_960_1 -static const kiss_fft_state fft_state48000_960_1 = { -240, /* nfft */ -17476, /* scale */ -7, /* scale_shift */ -1, /* shift */ -{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev240, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_240, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_2 -#define FFT_STATE48000_960_2 -static const kiss_fft_state fft_state48000_960_2 = { -120, /* nfft */ -17476, /* scale */ -6, /* scale_shift */ -2, /* shift */ -{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev120, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_120, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_3 -#define FFT_STATE48000_960_3 -static const kiss_fft_state fft_state48000_960_3 = { -60, /* nfft */ -17476, /* scale */ -5, /* scale_shift */ -3, /* shift */ -{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev60, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_60, -#else -NULL, -#endif -}; -#endif - -#endif - -#ifndef MDCT_TWIDDLES960 -#define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[1800] = { -32767, 32767, 32767, 32766, 32765, -32763, 32761, 32759, 32756, 32753, -32750, 32746, 32742, 32738, 32733, -32728, 32722, 32717, 32710, 32704, -32697, 32690, 32682, 32674, 32666, -32657, 32648, 32639, 32629, 32619, -32609, 32598, 32587, 32576, 32564, -32552, 32539, 32526, 32513, 32500, -32486, 32472, 32457, 32442, 32427, -32411, 32395, 32379, 32362, 32345, -32328, 32310, 32292, 32274, 32255, -32236, 32217, 32197, 32177, 32157, -32136, 32115, 32093, 32071, 32049, -32027, 32004, 31981, 31957, 31933, -31909, 31884, 31859, 31834, 31809, -31783, 31756, 31730, 31703, 31676, -31648, 31620, 31592, 31563, 31534, -31505, 31475, 31445, 31415, 31384, -31353, 31322, 31290, 31258, 31226, -31193, 31160, 31127, 31093, 31059, -31025, 30990, 30955, 30920, 30884, -30848, 30812, 30775, 30738, 30701, -30663, 30625, 30587, 30548, 30509, -30470, 30430, 30390, 30350, 30309, -30269, 30227, 30186, 30144, 30102, -30059, 30016, 29973, 29930, 29886, -29842, 29797, 29752, 29707, 29662, -29616, 29570, 29524, 29477, 29430, -29383, 29335, 29287, 29239, 29190, -29142, 29092, 29043, 28993, 28943, -28892, 28842, 28791, 28739, 28688, -28636, 28583, 28531, 28478, 28425, -28371, 28317, 28263, 28209, 28154, -28099, 28044, 27988, 27932, 27876, -27820, 27763, 27706, 27648, 27591, -27533, 27474, 27416, 27357, 27298, -27238, 27178, 27118, 27058, 26997, -26936, 26875, 26814, 26752, 26690, -26628, 26565, 26502, 26439, 26375, -26312, 26247, 26183, 26119, 26054, -25988, 25923, 25857, 25791, 25725, -25658, 25592, 25524, 25457, 25389, -25322, 25253, 25185, 25116, 25047, -24978, 24908, 24838, 24768, 24698, -24627, 24557, 24485, 24414, 24342, -24270, 24198, 24126, 24053, 23980, -23907, 23834, 23760, 23686, 23612, -23537, 23462, 23387, 23312, 23237, -23161, 23085, 23009, 22932, 22856, -22779, 22701, 22624, 22546, 22468, -22390, 22312, 22233, 22154, 22075, -21996, 21916, 21836, 21756, 21676, -21595, 21515, 21434, 21352, 21271, -21189, 21107, 21025, 20943, 20860, -20777, 20694, 20611, 20528, 20444, -20360, 20276, 20192, 20107, 20022, -19937, 19852, 19767, 19681, 19595, -19509, 19423, 19336, 19250, 19163, -19076, 18988, 18901, 18813, 18725, -18637, 18549, 18460, 18372, 18283, -18194, 18104, 18015, 17925, 17835, -17745, 17655, 17565, 17474, 17383, -17292, 17201, 17110, 17018, 16927, -16835, 16743, 16650, 16558, 16465, -16372, 16279, 16186, 16093, 15999, -15906, 15812, 15718, 15624, 15529, -15435, 15340, 15245, 15150, 15055, -14960, 14864, 14769, 14673, 14577, -14481, 14385, 14288, 14192, 14095, -13998, 13901, 13804, 13706, 13609, -13511, 13414, 13316, 13218, 13119, -13021, 12923, 12824, 12725, 12626, -12527, 12428, 12329, 12230, 12130, -12030, 11930, 11831, 11730, 11630, -11530, 11430, 11329, 11228, 11128, -11027, 10926, 10824, 10723, 10622, -10520, 10419, 10317, 10215, 10113, -10011, 9909, 9807, 9704, 9602, -9499, 9397, 9294, 9191, 9088, -8985, 8882, 8778, 8675, 8572, -8468, 8364, 8261, 8157, 8053, -7949, 7845, 7741, 7637, 7532, -7428, 7323, 7219, 7114, 7009, -6905, 6800, 6695, 6590, 6485, -6380, 6274, 6169, 6064, 5958, -5853, 5747, 5642, 5536, 5430, -5325, 5219, 5113, 5007, 4901, -4795, 4689, 4583, 4476, 4370, -4264, 4157, 4051, 3945, 3838, -3732, 3625, 3518, 3412, 3305, -3198, 3092, 2985, 2878, 2771, -2664, 2558, 2451, 2344, 2237, -2130, 2023, 1916, 1809, 1702, -1594, 1487, 1380, 1273, 1166, -1059, 952, 844, 737, 630, -523, 416, 308, 201, 94, --13, -121, -228, -335, -442, --550, -657, -764, -871, -978, --1086, -1193, -1300, -1407, -1514, --1621, -1728, -1835, -1942, -2049, --2157, -2263, -2370, -2477, -2584, --2691, -2798, -2905, -3012, -3118, --3225, -3332, -3439, -3545, -3652, --3758, -3865, -3971, -4078, -4184, --4290, -4397, -4503, -4609, -4715, --4821, -4927, -5033, -5139, -5245, --5351, -5457, -5562, -5668, -5774, --5879, -5985, -6090, -6195, -6301, --6406, -6511, -6616, -6721, -6826, --6931, -7036, -7140, -7245, -7349, --7454, -7558, -7663, -7767, -7871, --7975, -8079, -8183, -8287, -8390, --8494, -8597, -8701, -8804, -8907, --9011, -9114, -9217, -9319, -9422, --9525, -9627, -9730, -9832, -9934, --10037, -10139, -10241, -10342, -10444, --10546, -10647, -10748, -10850, -10951, --11052, -11153, -11253, -11354, -11455, --11555, -11655, -11756, -11856, -11955, --12055, -12155, -12254, -12354, -12453, --12552, -12651, -12750, -12849, -12947, --13046, -13144, -13242, -13340, -13438, --13536, -13633, -13731, -13828, -13925, --14022, -14119, -14216, -14312, -14409, --14505, -14601, -14697, -14793, -14888, --14984, -15079, -15174, -15269, -15364, --15459, -15553, -15647, -15741, -15835, --15929, -16023, -16116, -16210, -16303, --16396, -16488, -16581, -16673, -16766, --16858, -16949, -17041, -17133, -17224, --17315, -17406, -17497, -17587, -17678, --17768, -17858, -17948, -18037, -18127, --18216, -18305, -18394, -18483, -18571, --18659, -18747, -18835, -18923, -19010, --19098, -19185, -19271, -19358, -19444, --19531, -19617, -19702, -19788, -19873, --19959, -20043, -20128, -20213, -20297, --20381, -20465, -20549, -20632, -20715, --20798, -20881, -20963, -21046, -21128, --21210, -21291, -21373, -21454, -21535, --21616, -21696, -21776, -21856, -21936, --22016, -22095, -22174, -22253, -22331, --22410, -22488, -22566, -22643, -22721, --22798, -22875, -22951, -23028, -23104, --23180, -23256, -23331, -23406, -23481, --23556, -23630, -23704, -23778, -23852, --23925, -23998, -24071, -24144, -24216, --24288, -24360, -24432, -24503, -24574, --24645, -24716, -24786, -24856, -24926, --24995, -25064, -25133, -25202, -25270, --25339, -25406, -25474, -25541, -25608, --25675, -25742, -25808, -25874, -25939, --26005, -26070, -26135, -26199, -26264, --26327, -26391, -26455, -26518, -26581, --26643, -26705, -26767, -26829, -26891, --26952, -27013, -27073, -27133, -27193, --27253, -27312, -27372, -27430, -27489, --27547, -27605, -27663, -27720, -27777, --27834, -27890, -27946, -28002, -28058, --28113, -28168, -28223, -28277, -28331, --28385, -28438, -28491, -28544, -28596, --28649, -28701, -28752, -28803, -28854, --28905, -28955, -29006, -29055, -29105, --29154, -29203, -29251, -29299, -29347, --29395, -29442, -29489, -29535, -29582, --29628, -29673, -29719, -29764, -29808, --29853, -29897, -29941, -29984, -30027, --30070, -30112, -30154, -30196, -30238, --30279, -30320, -30360, -30400, -30440, --30480, -30519, -30558, -30596, -30635, --30672, -30710, -30747, -30784, -30821, --30857, -30893, -30929, -30964, -30999, --31033, -31068, -31102, -31135, -31168, --31201, -31234, -31266, -31298, -31330, --31361, -31392, -31422, -31453, -31483, --31512, -31541, -31570, -31599, -31627, --31655, -31682, -31710, -31737, -31763, --31789, -31815, -31841, -31866, -31891, --31915, -31939, -31963, -31986, -32010, --32032, -32055, -32077, -32099, -32120, --32141, -32162, -32182, -32202, -32222, --32241, -32260, -32279, -32297, -32315, --32333, -32350, -32367, -32383, -32399, --32415, -32431, -32446, -32461, -32475, --32489, -32503, -32517, -32530, -32542, --32555, -32567, -32579, -32590, -32601, --32612, -32622, -32632, -32641, -32651, --32659, -32668, -32676, -32684, -32692, --32699, -32706, -32712, -32718, -32724, --32729, -32734, -32739, -32743, -32747, --32751, -32754, -32757, -32760, -32762, --32764, -32765, -32767, -32767, -32767, -32767, 32767, 32765, 32761, 32756, -32750, 32742, 32732, 32722, 32710, -32696, 32681, 32665, 32647, 32628, -32608, 32586, 32562, 32538, 32512, -32484, 32455, 32425, 32393, 32360, -32326, 32290, 32253, 32214, 32174, -32133, 32090, 32046, 32001, 31954, -31906, 31856, 31805, 31753, 31700, -31645, 31588, 31530, 31471, 31411, -31349, 31286, 31222, 31156, 31089, -31020, 30951, 30880, 30807, 30733, -30658, 30582, 30504, 30425, 30345, -30263, 30181, 30096, 30011, 29924, -29836, 29747, 29656, 29564, 29471, -29377, 29281, 29184, 29086, 28987, -28886, 28784, 28681, 28577, 28471, -28365, 28257, 28147, 28037, 27925, -27812, 27698, 27583, 27467, 27349, -27231, 27111, 26990, 26868, 26744, -26620, 26494, 26367, 26239, 26110, -25980, 25849, 25717, 25583, 25449, -25313, 25176, 25038, 24900, 24760, -24619, 24477, 24333, 24189, 24044, -23898, 23751, 23602, 23453, 23303, -23152, 22999, 22846, 22692, 22537, -22380, 22223, 22065, 21906, 21746, -21585, 21423, 21261, 21097, 20933, -20767, 20601, 20434, 20265, 20096, -19927, 19756, 19584, 19412, 19239, -19065, 18890, 18714, 18538, 18361, -18183, 18004, 17824, 17644, 17463, -17281, 17098, 16915, 16731, 16546, -16361, 16175, 15988, 15800, 15612, -15423, 15234, 15043, 14852, 14661, -14469, 14276, 14083, 13889, 13694, -13499, 13303, 13107, 12910, 12713, -12515, 12317, 12118, 11918, 11718, -11517, 11316, 11115, 10913, 10710, -10508, 10304, 10100, 9896, 9691, -9486, 9281, 9075, 8869, 8662, -8455, 8248, 8040, 7832, 7623, -7415, 7206, 6996, 6787, 6577, -6366, 6156, 5945, 5734, 5523, -5311, 5100, 4888, 4675, 4463, -4251, 4038, 3825, 3612, 3399, -3185, 2972, 2758, 2544, 2330, -2116, 1902, 1688, 1474, 1260, -1045, 831, 617, 402, 188, --27, -241, -456, -670, -885, --1099, -1313, -1528, -1742, -1956, --2170, -2384, -2598, -2811, -3025, --3239, -3452, -3665, -3878, -4091, --4304, -4516, -4728, -4941, -5153, --5364, -5576, -5787, -5998, -6209, --6419, -6629, -6839, -7049, -7258, --7467, -7676, -7884, -8092, -8300, --8507, -8714, -8920, -9127, -9332, --9538, -9743, -9947, -10151, -10355, --10558, -10761, -10963, -11165, -11367, --11568, -11768, -11968, -12167, -12366, --12565, -12762, -12960, -13156, -13352, --13548, -13743, -13937, -14131, -14324, --14517, -14709, -14900, -15091, -15281, --15470, -15659, -15847, -16035, -16221, --16407, -16593, -16777, -16961, -17144, --17326, -17508, -17689, -17869, -18049, --18227, -18405, -18582, -18758, -18934, --19108, -19282, -19455, -19627, -19799, --19969, -20139, -20308, -20475, -20642, --20809, -20974, -21138, -21301, -21464, --21626, -21786, -21946, -22105, -22263, --22420, -22575, -22730, -22884, -23037, --23189, -23340, -23490, -23640, -23788, --23935, -24080, -24225, -24369, -24512, --24654, -24795, -24934, -25073, -25211, --25347, -25482, -25617, -25750, -25882, --26013, -26143, -26272, -26399, -26526, --26651, -26775, -26898, -27020, -27141, --27260, -27379, -27496, -27612, -27727, --27841, -27953, -28065, -28175, -28284, --28391, -28498, -28603, -28707, -28810, --28911, -29012, -29111, -29209, -29305, --29401, -29495, -29587, -29679, -29769, --29858, -29946, -30032, -30118, -30201, --30284, -30365, -30445, -30524, -30601, --30677, -30752, -30825, -30897, -30968, --31038, -31106, -31172, -31238, -31302, --31365, -31426, -31486, -31545, -31602, --31658, -31713, -31766, -31818, -31869, --31918, -31966, -32012, -32058, -32101, --32144, -32185, -32224, -32262, -32299, --32335, -32369, -32401, -32433, -32463, --32491, -32518, -32544, -32568, -32591, --32613, -32633, -32652, -32669, -32685, --32700, -32713, -32724, -32735, -32744, --32751, -32757, -32762, -32766, -32767, -32767, 32764, 32755, 32741, 32720, -32694, 32663, 32626, 32583, 32535, -32481, 32421, 32356, 32286, 32209, -32128, 32041, 31948, 31850, 31747, -31638, 31523, 31403, 31278, 31148, -31012, 30871, 30724, 30572, 30415, -30253, 30086, 29913, 29736, 29553, -29365, 29172, 28974, 28771, 28564, -28351, 28134, 27911, 27684, 27452, -27216, 26975, 26729, 26478, 26223, -25964, 25700, 25432, 25159, 24882, -24601, 24315, 24026, 23732, 23434, -23133, 22827, 22517, 22204, 21886, -21565, 21240, 20912, 20580, 20244, -19905, 19563, 19217, 18868, 18516, -18160, 17802, 17440, 17075, 16708, -16338, 15964, 15588, 15210, 14829, -14445, 14059, 13670, 13279, 12886, -12490, 12093, 11693, 11291, 10888, -10482, 10075, 9666, 9255, 8843, -8429, 8014, 7597, 7180, 6760, -6340, 5919, 5496, 5073, 4649, -4224, 3798, 3372, 2945, 2517, -2090, 1661, 1233, 804, 375, --54, -483, -911, -1340, -1768, --2197, -2624, -3052, -3479, -3905, --4330, -4755, -5179, -5602, -6024, --6445, -6865, -7284, -7702, -8118, --8533, -8946, -9358, -9768, -10177, --10584, -10989, -11392, -11793, -12192, --12589, -12984, -13377, -13767, -14155, --14541, -14924, -15305, -15683, -16058, --16430, -16800, -17167, -17531, -17892, --18249, -18604, -18956, -19304, -19649, --19990, -20329, -20663, -20994, -21322, --21646, -21966, -22282, -22595, -22904, --23208, -23509, -23806, -24099, -24387, --24672, -24952, -25228, -25499, -25766, --26029, -26288, -26541, -26791, -27035, --27275, -27511, -27741, -27967, -28188, --28405, -28616, -28823, -29024, -29221, --29412, -29599, -29780, -29957, -30128, --30294, -30455, -30611, -30761, -30906, --31046, -31181, -31310, -31434, -31552, --31665, -31773, -31875, -31972, -32063, --32149, -32229, -32304, -32373, -32437, --32495, -32547, -32594, -32635, -32671, --32701, -32726, -32745, -32758, -32766, -32767, 32754, 32717, 32658, 32577, -32473, 32348, 32200, 32029, 31837, -31624, 31388, 31131, 30853, 30553, -30232, 29891, 29530, 29148, 28746, -28324, 27883, 27423, 26944, 26447, -25931, 25398, 24847, 24279, 23695, -23095, 22478, 21846, 21199, 20538, -19863, 19174, 18472, 17757, 17030, -16291, 15541, 14781, 14010, 13230, -12441, 11643, 10837, 10024, 9204, -8377, 7545, 6708, 5866, 5020, -4171, 3319, 2464, 1608, 751, --107, -965, -1822, -2678, -3532, --4383, -5232, -6077, -6918, -7754, --8585, -9409, -10228, -11039, -11843, --12639, -13426, -14204, -14972, -15730, --16477, -17213, -17937, -18648, -19347, --20033, -20705, -21363, -22006, -22634, --23246, -23843, -24423, -24986, -25533, --26062, -26573, -27066, -27540, -27995, --28431, -28848, -29245, -29622, -29979, --30315, -30630, -30924, -31197, -31449, --31679, -31887, -32074, -32239, -32381, --32501, -32600, -32675, -32729, -32759, -}; -#endif - -static const CELTMode mode48000_960_120 = { -48000, /* Fs */ -120, /* overlap */ -21, /* nbEBands */ -21, /* effEBands */ -{27853, 0, 4096, 8192, }, /* preemph */ -eband5ms, /* eBands */ -3, /* maxLM */ -8, /* nbShortMdcts */ -120, /* shortMdctSize */ -11, /* nbAllocVectors */ -band_allocation, /* allocVectors */ -logN400, /* logN */ -window120, /* window */ -{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */ -{392, cache_index50, cache_bits50, cache_caps50}, /* cache */ -}; - -/* List of all the available modes */ -#define TOTAL_MODES 1 -static const CELTMode * const static_mode_list[TOTAL_MODES] = { -&mode48000_960_120, -}; diff --git a/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h b/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h deleted file mode 100644 index b8ef0cee98..0000000000 --- a/thirdparty/opus/celt/static_modes_fixed_arm_ne10.h +++ /dev/null @@ -1,388 +0,0 @@ -/* The contents of this file was automatically generated by - * dump_mode_arm_ne10.c with arguments: 48000 960 - * It contains static definitions for some pre-defined modes. */ -#include <NE10_init.h> - -#ifndef NE10_FFT_PARAMS48000_960 -#define NE10_FFT_PARAMS48000_960 -static const ne10_int32_t ne10_factors_480[64] = { -4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_240[64] = { -3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_120[64] = { -3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_60[64] = { -2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, -{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, -{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, -{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, -{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, -{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, -{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, -{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, -{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, -{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, -{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, -{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, -{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, -{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, -{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, -{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, -{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, -{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, -{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, -{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, -{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, -{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, -{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, -{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, -{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570}, -{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154}, -{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172}, -{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068}, -{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822}, -{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146}, -{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682}, -{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231}, -{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791}, -{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029}, -{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313}, -{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783}, -{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661}, -{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541}, -{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450}, -{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914}, -{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194}, -{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807}, -{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067}, -{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658}, -{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677}, -{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704}, -{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277}, -{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512}, -{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510}, -{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682}, -{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424}, -{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524}, -{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195}, -{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164}, -{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771}, -{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961}, -{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705}, -{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540}, -{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994}, -{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540}, -{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646}, -{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814}, -{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593}, -{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667}, -{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, -{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, -{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, -{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, -{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, -{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, -{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, -{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, -{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, -{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, -{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, -{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, -{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, -{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, -{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, -{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, -{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, -{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, -{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, -{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, -{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584}, -{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622}, -{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955}, -{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651}, -{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703}, -{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114}, -{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330}, -{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403}, -{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078}, -{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372}, -{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738}, -{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248}, -{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400}, -{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552}, -{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141}, -{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553}, -{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377}, -{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448}, -{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240}, -{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574}, -{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630}, -{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288}, -{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960}, -{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934}, -{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097}, -{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189}, -{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282}, -{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280}, -{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875}, -{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603}, -{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339}, -{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386}, -{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674}, -{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010}, -{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564}, -{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860}, -{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118}, -{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768}, -{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839}, -{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595}, -{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001}, -{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837}, -{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918}, -{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354}, -{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119}, -{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555}, -{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188}, -{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928}, -{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484}, -{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706}, -{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252}, -{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149}, -{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287}, -{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850}, -{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059}, -{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202}, -{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458}, -{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823}, -{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542}, -{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015}, -}; -static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, -{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, -{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, -{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, -{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, -{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613}, -{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871}, -{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968}, -{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325}, -{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332}, -{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564}, -{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851}, -{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214}, -{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280}, -{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181}, -{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394}, -{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397}, -{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362}, -{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378}, -{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959}, -{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233}, -{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903}, -{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838}, -{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516}, -{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762}, -{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, -{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, -{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, -{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, -{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, -{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, -{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, -{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, -{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, -{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, -{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496}, -{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855}, -{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883}, -{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330}, -{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268}, -{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035}, -{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910}, -{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276}, -{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785}, -{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298}, -{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465}, -{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435}, -{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248}, -{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368}, -{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912}, -{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536}, -{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647}, -{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493}, -{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827}, -{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413}, -{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096}, -{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084}, -{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406}, -{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881}, -{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895}, -{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257}, -{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506}, -{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107}, -{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239}, -{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668}, -}; -static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172}, -{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682}, -{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313}, -{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450}, -{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067}, -{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277}, -{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424}, -{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771}, -{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994}, -{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955}, -{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330}, -{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738}, -{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141}, -{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240}, -{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960}, -{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282}, -{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339}, -{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564}, -{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839}, -{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918}, -{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188}, -{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252}, -{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059}, -{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542}, -}; -static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = { -{0,0}, {2147483647,0}, {2147483647,0}, -{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394}, -{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496}, -{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096}, -{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152}, -{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968}, -{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851}, -{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394}, -{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959}, -{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516}, -{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313}, -{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424}, -{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496}, -{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268}, -{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785}, -{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248}, -{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647}, -{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096}, -{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895}, -{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239}, -}; -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = { -120, -(ne10_int32_t *)ne10_factors_480, -(ne10_fft_cpx_int32_t *)ne10_twiddles_480, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120], -}; -static const arch_fft_state cfg_arch_480 = { -1, -(void *)&ne10_fft_state_int32_t_480, -}; - -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = { -60, -(ne10_int32_t *)ne10_factors_240, -(ne10_fft_cpx_int32_t *)ne10_twiddles_240, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60], -}; -static const arch_fft_state cfg_arch_240 = { -1, -(void *)&ne10_fft_state_int32_t_240, -}; - -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = { -30, -(ne10_int32_t *)ne10_factors_120, -(ne10_fft_cpx_int32_t *)ne10_twiddles_120, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30], -}; -static const arch_fft_state cfg_arch_120 = { -1, -(void *)&ne10_fft_state_int32_t_120, -}; - -static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = { -15, -(ne10_int32_t *)ne10_factors_60, -(ne10_fft_cpx_int32_t *)ne10_twiddles_60, -NULL, -(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15], -}; -static const arch_fft_state cfg_arch_60 = { -1, -(void *)&ne10_fft_state_int32_t_60, -}; - -#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/thirdparty/opus/celt/static_modes_float.h b/thirdparty/opus/celt/static_modes_float.h deleted file mode 100644 index e102a38391..0000000000 --- a/thirdparty/opus/celt/static_modes_float.h +++ /dev/null @@ -1,888 +0,0 @@ -/* The contents of this file was automatically generated by dump_modes.c - with arguments: 48000 960 - It contains static definitions for some pre-defined modes. */ -#include "modes.h" -#include "rate.h" - -#ifdef HAVE_ARM_NE10 -#define OVERRIDE_FFT 1 -#include "static_modes_float_arm_ne10.h" -#endif - -#ifndef DEF_WINDOW120 -#define DEF_WINDOW120 -static const opus_val16 window120[120] = { -6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f, -0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, -0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, -0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f, -0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f, -0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f, -0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f, -0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f, -0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f, -0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f, -0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f, -0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f, -0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f, -0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f, -0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f, -0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f, -0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f, -0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f, -0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f, -0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f, -0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, -0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, -0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, -0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f, -}; -#endif - -#ifndef DEF_LOGN400 -#define DEF_LOGN400 -static const opus_int16 logN400[21] = { -0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; -#endif - -#ifndef DEF_PULSE_CACHE50 -#define DEF_PULSE_CACHE50 -static const opus_int16 cache_index50[105] = { --1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, -82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, -41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, -41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305, -318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240, -305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, -240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, -}; -static const unsigned char cache_bits50[392] = { -40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, -31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50, -51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65, -66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61, -64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92, -94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123, -124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, -97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, -142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35, -28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149, -153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225, -229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157, -166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63, -86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250, -25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, -185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89, -110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41, -74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138, -163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214, -228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49, -90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47, -87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57, -106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, -224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, -182, 234, }; -static const unsigned char cache_caps50[168] = { -224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, -178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, -240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, -160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172, -138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207, -204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185, -185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39, -207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201, -188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193, -193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204, -204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175, -140, 66, 40, }; -#endif - -#ifndef FFT_TWIDDLES48000_960 -#define FFT_TWIDDLES48000_960 -static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { -{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f}, -{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f}, -{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f}, -{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f}, -{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f}, -{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f}, -{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f}, -{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f}, -{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f}, -{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f}, -{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f}, -{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f}, -{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f}, -{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f}, -{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f}, -{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f}, -{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f}, -{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f}, -{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f}, -{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f}, -{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f}, -{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f}, -{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f}, -{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f}, -{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f}, -{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f}, -{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f}, -{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f}, -{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f}, -{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f}, -{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f}, -{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f}, -{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f}, -{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f}, -{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f}, -{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f}, -{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f}, -{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f}, -{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f}, -{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f}, -{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f}, -{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f}, -{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f}, -{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f}, -{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f}, -{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f}, -{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f}, -{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f}, -{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f}, -{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f}, -{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f}, -{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f}, -{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f}, -{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f}, -{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f}, -{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f}, -{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f}, -{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f}, -{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f}, -{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f}, -{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f}, -{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f}, -{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f}, -{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f}, -{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f}, -{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f}, -{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f}, -{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f}, -{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f}, -{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f}, -{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f}, -{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f}, -{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f}, -{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f}, -{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f}, -{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f}, -{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f}, -{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f}, -{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f}, -{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f}, -{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f}, -{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f}, -{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f}, -{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f}, -{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f}, -{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f}, -{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f}, -{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f}, -{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f}, -{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f}, -{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f}, -{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f}, -{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f}, -{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f}, -{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f}, -{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f}, -{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f}, -{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f}, -{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f}, -{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f}, -{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f}, -{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f}, -{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f}, -{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f}, -{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f}, -{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f}, -{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f}, -{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f}, -{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f}, -{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f}, -{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f}, -{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f}, -{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f}, -{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f}, -{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f}, -{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f}, -{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f}, -{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f}, -{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f}, -{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f}, -{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f}, -{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f}, -{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f}, -{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f}, -{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f}, -{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f}, -{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f}, -{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f}, -{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f}, -{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f}, -{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f}, -{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f}, -{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f}, -{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f}, -{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f}, -{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f}, -{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f}, -{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f}, -{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f}, -{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f}, -{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f}, -{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f}, -{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f}, -{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f}, -{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f}, -{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f}, -{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f}, -{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f}, -{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f}, -{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f}, -{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f}, -{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f}, -{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f}, -{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f}, -{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f}, -{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f}, -{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f}, -{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f}, -{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f}, -{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f}, -{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f}, -{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f}, -{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f}, -{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f}, -{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f}, -{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f}, -{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f}, -{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f}, -{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f}, -{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f}, -{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f}, -{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f}, -{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f}, -{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f}, -{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f}, -{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f}, -{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f}, -{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f}, -{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f}, -{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f}, -{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f}, -{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f}, -{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f}, -{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f}, -{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f}, -{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f}, -{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f}, -{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f}, -{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f}, -{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f}, -{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f}, -{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f}, -{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f}, -{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f}, -{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f}, -{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f}, -{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f}, -{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f}, -{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f}, -{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f}, -{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f}, -{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f}, -{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f}, -{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f}, -{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f}, -{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f}, -{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f}, -{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f}, -{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f}, -{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f}, -{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f}, -{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f}, -{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f}, -{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f}, -{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f}, -{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f}, -{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f}, -{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f}, -{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f}, -{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f}, -{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f}, -{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f}, -{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f}, -{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f}, -{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f}, -{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f}, -{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f}, -{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f}, -{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f}, -{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f}, -{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f}, -{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f}, -{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f}, -{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f}, -{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f}, -{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f}, -{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f}, -{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f}, -{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f}, -{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f}, -}; -#ifndef FFT_BITREV480 -#define FFT_BITREV480 -static const opus_int16 fft_bitrev480[480] = { -0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, -8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, -16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, -24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, -4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, -12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, -20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, -28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, -1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, -9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, -17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, -25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, -5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, -13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, -21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, -29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, -2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, -10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, -18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, -26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, -6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, -14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, -22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, -30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, -3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, -11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, -19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, -27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, -7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, -15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, -23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, -31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, -}; -#endif - -#ifndef FFT_BITREV240 -#define FFT_BITREV240 -static const opus_int16 fft_bitrev240[240] = { -0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, -4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, -8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, -12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, -1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, -5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, -9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, -13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, -2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, -6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, -10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, -14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, -3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, -7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, -11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, -15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, -}; -#endif - -#ifndef FFT_BITREV120 -#define FFT_BITREV120 -static const opus_int16 fft_bitrev120[120] = { -0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, -4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, -1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, -5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, -2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, -6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, -3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, -7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, -}; -#endif - -#ifndef FFT_BITREV60 -#define FFT_BITREV60 -static const opus_int16 fft_bitrev60[60] = { -0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, -1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, -2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, -3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, -}; -#endif - -#ifndef FFT_STATE48000_960_0 -#define FFT_STATE48000_960_0 -static const kiss_fft_state fft_state48000_960_0 = { -480, /* nfft */ -0.002083333f, /* scale */ --1, /* shift */ -{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev480, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_480, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_1 -#define FFT_STATE48000_960_1 -static const kiss_fft_state fft_state48000_960_1 = { -240, /* nfft */ -0.004166667f, /* scale */ -1, /* shift */ -{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev240, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_240, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_2 -#define FFT_STATE48000_960_2 -static const kiss_fft_state fft_state48000_960_2 = { -120, /* nfft */ -0.008333333f, /* scale */ -2, /* shift */ -{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev120, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_120, -#else -NULL, -#endif -}; -#endif - -#ifndef FFT_STATE48000_960_3 -#define FFT_STATE48000_960_3 -static const kiss_fft_state fft_state48000_960_3 = { -60, /* nfft */ -0.016666667f, /* scale */ -3, /* shift */ -{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ -fft_bitrev60, /* bitrev */ -fft_twiddles48000_960, /* bitrev */ -#ifdef OVERRIDE_FFT -(arch_fft_state *)&cfg_arch_60, -#else -NULL, -#endif -}; -#endif - -#endif - -#ifndef MDCT_TWIDDLES960 -#define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[1800] = { -0.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f, -0.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f, -0.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f, -0.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f, -0.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f, -0.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f, -0.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f, -0.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f, -0.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f, -0.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f, -0.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f, -0.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f, -0.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f, -0.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f, -0.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f, -0.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f, -0.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f, -0.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f, -0.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f, -0.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f, -0.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f, -0.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f, -0.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f, -0.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f, -0.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f, -0.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f, -0.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f, -0.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f, -0.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f, -0.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f, -0.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f, -0.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f, -0.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f, -0.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f, -0.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f, -0.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f, -0.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f, -0.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f, -0.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f, -0.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f, -0.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f, -0.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f, -0.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f, -0.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f, -0.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f, -0.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f, -0.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f, -0.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f, -0.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f, -0.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f, -0.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f, -0.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f, -0.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f, -0.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f, -0.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f, -0.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f, -0.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f, -0.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f, -0.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f, -0.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f, -0.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f, -0.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f, -0.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f, -0.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f, -0.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f, -0.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f, -0.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f, -0.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f, -0.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f, -0.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f, -0.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f, -0.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f, -0.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f, -0.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f, -0.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f, -0.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f, -0.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f, -0.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f, -0.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f, -0.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f, -0.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f, -0.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f, -0.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f, -0.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f, -0.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f, -0.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f, -0.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f, -0.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f, -0.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f, -0.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f, -0.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f, -0.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f, -0.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f, -0.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f, -0.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f, -0.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f, --0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f, --0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f, --0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f, --0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f, --0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f, --0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f, --0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f, --0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f, --0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f, --0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f, --0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f, --0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f, --0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f, --0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f, --0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f, --0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f, --0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f, --0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f, --0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f, --0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f, --0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f, --0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f, --0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f, --0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f, --0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f, --0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f, --0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f, --0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f, --0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f, --0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f, --0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f, --0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f, --0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f, --0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f, --0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f, --0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f, --0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f, --0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f, --0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f, --0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f, --0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f, --0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f, --0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f, --0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f, --0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f, --0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f, --0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f, --0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f, --0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f, --0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f, --0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f, --0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f, --0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f, --0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f, --0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f, --0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f, --0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f, --0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f, --0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f, --0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f, --0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f, --0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f, --0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f, --0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f, --0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f, --0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f, --0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f, --0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f, --0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f, --0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f, --0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f, --0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f, --0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f, --0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f, --0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f, --0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f, --0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f, --0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f, --0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f, --0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f, --0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f, --0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f, --0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f, --0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f, --0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f, --0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f, --0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f, --0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f, --0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f, --0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f, --0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f, --0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f, --0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f, --0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f, --0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f, --0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f, -0.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f, -0.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f, -0.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f, -0.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f, -0.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f, -0.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f, -0.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f, -0.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f, -0.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f, -0.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f, -0.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f, -0.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f, -0.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f, -0.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f, -0.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f, -0.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f, -0.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f, -0.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f, -0.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f, -0.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f, -0.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f, -0.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f, -0.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f, -0.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f, -0.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f, -0.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f, -0.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f, -0.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f, -0.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f, -0.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f, -0.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f, -0.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f, -0.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f, -0.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f, -0.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f, -0.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f, -0.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f, -0.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f, -0.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f, -0.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f, -0.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f, -0.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f, -0.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f, -0.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f, -0.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f, -0.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f, -0.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f, -0.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f, --0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f, --0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f, --0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f, --0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f, --0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f, --0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f, --0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f, --0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f, --0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f, --0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f, --0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f, --0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f, --0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f, --0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f, --0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f, --0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f, --0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f, --0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f, --0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f, --0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f, --0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f, --0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f, --0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f, --0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f, --0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f, --0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f, --0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f, --0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f, --0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f, --0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f, --0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f, --0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f, --0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f, --0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f, --0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f, --0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f, --0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f, --0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f, --0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f, --0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f, --0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f, --0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f, --0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f, --0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f, --0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f, --0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f, --0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f, --0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f, -0.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f, -0.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f, -0.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f, -0.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f, -0.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f, -0.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f, -0.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f, -0.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f, -0.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f, -0.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f, -0.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f, -0.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f, -0.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f, -0.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f, -0.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f, -0.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f, -0.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f, -0.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f, -0.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f, -0.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f, -0.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f, -0.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f, -0.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f, -0.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f, --0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f, --0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f, --0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f, --0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f, --0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f, --0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f, --0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f, --0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f, --0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f, --0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f, --0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f, --0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f, --0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f, --0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f, --0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f, --0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f, --0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f, --0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f, --0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f, --0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f, --0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f, --0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f, --0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f, --0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f, -0.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f, -0.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f, -0.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f, -0.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f, -0.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f, -0.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f, -0.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f, -0.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f, -0.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f, -0.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f, -0.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f, -0.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f, --0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f, --0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f, --0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f, --0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f, --0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f, --0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f, --0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f, --0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f, --0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f, --0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f, --0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f, --0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f, -}; -#endif - -static const CELTMode mode48000_960_120 = { -48000, /* Fs */ -120, /* overlap */ -21, /* nbEBands */ -21, /* effEBands */ -{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, }, /* preemph */ -eband5ms, /* eBands */ -3, /* maxLM */ -8, /* nbShortMdcts */ -120, /* shortMdctSize */ -11, /* nbAllocVectors */ -band_allocation, /* allocVectors */ -logN400, /* logN */ -window120, /* window */ -{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */ -{392, cache_index50, cache_bits50, cache_caps50}, /* cache */ -}; - -/* List of all the available modes */ -#define TOTAL_MODES 1 -static const CELTMode * const static_mode_list[TOTAL_MODES] = { -&mode48000_960_120, -}; diff --git a/thirdparty/opus/celt/static_modes_float_arm_ne10.h b/thirdparty/opus/celt/static_modes_float_arm_ne10.h deleted file mode 100644 index 934a82a420..0000000000 --- a/thirdparty/opus/celt/static_modes_float_arm_ne10.h +++ /dev/null @@ -1,404 +0,0 @@ -/* The contents of this file was automatically generated by - * dump_mode_arm_ne10.c with arguments: 48000 960 - * It contains static definitions for some pre-defined modes. */ -#include <NE10_init.h> - -#ifndef NE10_FFT_PARAMS48000_960 -#define NE10_FFT_PARAMS48000_960 -static const ne10_int32_t ne10_factors_480[64] = { -4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_240[64] = { -3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_120[64] = { -3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_int32_t ne10_factors_60[64] = { -2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, }; -static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, -{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, -{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, -{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, -{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, -{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, -{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, -{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, -{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, -{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, -{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, -{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, -{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, -{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, -{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, -{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, -{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, -{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, -{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, -{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, -{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, -{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, -{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, -{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, -{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f}, -{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f}, -{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f}, -{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f}, -{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f}, -{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f}, -{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f}, -{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f}, -{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f}, -{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f}, -{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f}, -{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f}, -{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f}, -{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f}, -{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f}, -{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f}, -{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f}, -{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f}, -{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f}, -{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f}, -{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f}, -{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f}, -{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f}, -{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f}, -{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f}, -{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f}, -{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f}, -{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f}, -{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f}, -{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f}, -{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f}, -{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f}, -{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f}, -{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f}, -{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f}, -{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f}, -{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f}, -{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f}, -{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f}, -{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f}, -{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, -{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, -{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, -{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, -{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, -{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, -{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, -{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, -{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, -{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, -{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, -{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, -{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, -{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, -{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, -{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, -{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, -{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, -{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, -{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, -{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f}, -{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f}, -{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f}, -{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f}, -{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f}, -{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f}, -{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f}, -{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f}, -{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f}, -{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f}, -{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f}, -{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f}, -{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f}, -{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f}, -{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f}, -{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f}, -{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f}, -{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f}, -{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f}, -{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f}, -{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f}, -{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f}, -{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f}, -{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f}, -{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f}, -{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f}, -{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f}, -{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f}, -{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f}, -{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f}, -{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f}, -{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f}, -{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f}, -{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f}, -{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f}, -{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f}, -{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f}, -{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f}, -{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f}, -{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f}, -{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f}, -{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f}, -{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f}, -{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f}, -{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f}, -{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f}, -{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f}, -{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f}, -{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f}, -{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f}, -{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f}, -{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f}, -{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f}, -{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f}, -{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f}, -{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f}, -{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f}, -{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f}, -{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f}, -{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f}, -}; -static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, -{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, -{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, -{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, -{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, -{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f}, -{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f}, -{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f}, -{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f}, -{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f}, -{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f}, -{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f}, -{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f}, -{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f}, -{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f}, -{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f}, -{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f}, -{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f}, -{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f}, -{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f}, -{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f}, -{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f}, -{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f}, -{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f}, -{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f}, -{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, -{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, -{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, -{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, -{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, -{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, -{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, -{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, -{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, -{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, -{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f}, -{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f}, -{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f}, -{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f}, -{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f}, -{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f}, -{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f}, -{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f}, -{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f}, -{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f}, -{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f}, -{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f}, -{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f}, -{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f}, -{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f}, -{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f}, -{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f}, -{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f}, -{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f}, -{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f}, -{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f}, -{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f}, -{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f}, -{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f}, -{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f}, -{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f}, -{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f}, -{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f}, -{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f}, -{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f}, -}; -static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f}, -{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f}, -{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f}, -{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f}, -{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f}, -{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f}, -{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f}, -{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f}, -{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f}, -{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f}, -{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f}, -{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f}, -{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f}, -{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f}, -{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f}, -{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f}, -{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f}, -{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f}, -{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f}, -{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f}, -{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f}, -{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f}, -{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f}, -{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f}, -}; -static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = { -{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f}, -{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f}, -{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f}, -{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f}, -{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f}, -{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f}, -{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f}, -{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f}, -{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f}, -{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f}, -{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f}, -{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f}, -{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f}, -{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f}, -{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f}, -{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f}, -{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f}, -{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f}, -{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f}, -}; -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = { -120, -(ne10_int32_t *)ne10_factors_480, -(ne10_fft_cpx_float32_t *)ne10_twiddles_480, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_480 = { -1, -(void *)&ne10_fft_state_float32_t_480, -}; - -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = { -60, -(ne10_int32_t *)ne10_factors_240, -(ne10_fft_cpx_float32_t *)ne10_twiddles_240, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_240 = { -1, -(void *)&ne10_fft_state_float32_t_240, -}; - -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = { -30, -(ne10_int32_t *)ne10_factors_120, -(ne10_fft_cpx_float32_t *)ne10_twiddles_120, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_120 = { -1, -(void *)&ne10_fft_state_float32_t_120, -}; - -static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = { -15, -(ne10_int32_t *)ne10_factors_60, -(ne10_fft_cpx_float32_t *)ne10_twiddles_60, -NULL, -(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15], -/* is_forward_scaled = true */ -(ne10_int32_t) 1, -/* is_backward_scaled = false */ -(ne10_int32_t) 0, -}; -static const arch_fft_state cfg_arch_60 = { -1, -(void *)&ne10_fft_state_float32_t_60, -}; - -#endif /* end NE10_FFT_PARAMS48000_960 */ diff --git a/thirdparty/opus/celt/tests/test_unit_cwrs32.c b/thirdparty/opus/celt/tests/test_unit_cwrs32.c deleted file mode 100644 index 36dd8af5f5..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_cwrs32.c +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdio.h> -#include <string.h> - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#else -#define TEST_CUSTOM_MODES -#endif - -#define CELT_C -#include "stack_alloc.h" -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" -#include "cwrs.c" -#include "mathops.c" -#include "rate.h" - -#define NMAX (240) -#define KMAX (128) - -#ifdef TEST_CUSTOM_MODES - -#define NDIMS (44) -static const int pn[NDIMS]={ - 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 18, 20, 22, - 24, 26, 28, 30, 32, 36, 40, 44, 48, - 52, 56, 60, 64, 72, 80, 88, 96, 104, - 112, 120, 128, 144, 160, 176, 192, 208 -}; -static const int pkmax[NDIMS]={ - 128, 128, 128, 128, 88, 52, 36, 26, 22, - 18, 16, 15, 13, 12, 12, 11, 10, 9, - 9, 8, 8, 7, 7, 7, 7, 6, 6, - 6, 6, 6, 5, 5, 5, 5, 5, 5, - 4, 4, 4, 4, 4, 4, 4, 4 -}; - -#else /* TEST_CUSTOM_MODES */ - -#define NDIMS (22) -static const int pn[NDIMS]={ - 2, 3, 4, 6, 8, 9, 11, 12, 16, - 18, 22, 24, 32, 36, 44, 48, 64, 72, - 88, 96, 144, 176 -}; -static const int pkmax[NDIMS]={ - 128, 128, 128, 88, 36, 26, 18, 16, 12, - 11, 9, 9, 7, 7, 6, 6, 5, 5, - 5, 5, 4, 4 -}; - -#endif - -int main(void){ - int t; - int n; - ALLOC_STACK; - for(t=0;t<NDIMS;t++){ - int pseudo; - n=pn[t]; - for(pseudo=1;pseudo<41;pseudo++) - { - int k; -#if defined(SMALL_FOOTPRINT) - opus_uint32 uu[KMAX+2U]; -#endif - opus_uint32 inc; - opus_uint32 nc; - opus_uint32 i; - k=get_pulses(pseudo); - if (k>pkmax[t])break; - printf("Testing CWRS with N=%i, K=%i...\n",n,k); -#if defined(SMALL_FOOTPRINT) - nc=ncwrs_urow(n,k,uu); -#else - nc=CELT_PVQ_V(n,k); -#endif - inc=nc/20000; - if(inc<1)inc=1; - for(i=0;i<nc;i+=inc){ -#if defined(SMALL_FOOTPRINT) - opus_uint32 u[KMAX+2U]; -#endif - int y[NMAX]; - int sy; - opus_uint32 v; - opus_uint32 ii; - int j; -#if defined(SMALL_FOOTPRINT) - memcpy(u,uu,(k+2U)*sizeof(*u)); - cwrsi(n,k,i,y,u); -#else - cwrsi(n,k,i,y); -#endif - sy=0; - for(j=0;j<n;j++)sy+=abs(y[j]); - if(sy!=k){ - fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n", - n,sy,k); - return 99; - } - /*printf("%6u of %u:",i,nc); - for(j=0;j<n;j++)printf(" %+3i",y[j]); - printf(" ->");*/ -#if defined(SMALL_FOOTPRINT) - ii=icwrs(n,k,&v,y,u); -#else - ii=icwrs(n,y); - v=CELT_PVQ_V(n,k); -#endif - if(ii!=i){ - fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n", - (long)ii,(long)i); - return 1; - } - if(v!=nc){ - fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n", - (long)v,(long)nc); - return 2; - } - /*printf(" %6u\n",i);*/ - } - /*printf("\n");*/ - } - } - return 0; -} diff --git a/thirdparty/opus/celt/tests/test_unit_dft.c b/thirdparty/opus/celt/tests/test_unit_dft.c deleted file mode 100644 index 6166eb0e4f..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_dft.c +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2008 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define SKIP_CONFIG_H - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#include <stdio.h> - -#define CELT_C -#define TEST_UNIT_DFT_C -#include "stack_alloc.h" -#include "kiss_fft.h" -#include "kiss_fft.c" -#include "mathops.c" -#include "entcode.c" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/x86cpu.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# include "celt_lpc.c" -# include "pitch.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "mdct.c" -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -int ret = 0; - -void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0, snr; - - for (bin=0;bin<nfft;++bin) { - double ansr = 0; - double ansi = 0; - double difr; - double difi; - - for (k=0;k<nfft;++k) { - double phase = -2*M_PI*bin*k/nfft; - double re = cos(phase); - double im = sin(phase); - if (isinverse) - im = -im; - - if (!isinverse) - { - re /= nfft; - im /= nfft; - } - - ansr += in[k].r * re - in[k].i * im; - ansi += in[k].r * im + in[k].i * re; - } - /*printf ("%d %d ", (int)ansr, (int)ansi);*/ - difr = ansr - out[bin].r; - difi = ansi - out[bin].i; - errpow += difr*difr + difi*difi; - sigpow += ansr*ansr+ansi*ansi; - } - snr = 10*log10(sigpow/errpow); - printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); - if (snr<60) { - printf( "** poor snr: %f ** \n", snr); - ret = 1; - } -} - -void test1d(int nfft,int isinverse,int arch) -{ - size_t buflen = sizeof(kiss_fft_cpx)*nfft; - - kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen); - kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen); - kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch); - int k; - - for (k=0;k<nfft;++k) { - in[k].r = (rand() % 32767) - 16384; - in[k].i = (rand() % 32767) - 16384; - } - - for (k=0;k<nfft;++k) { - in[k].r *= 32768; - in[k].i *= 32768; - } - - if (isinverse) - { - for (k=0;k<nfft;++k) { - in[k].r /= nfft; - in[k].i /= nfft; - } - } - - /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/ - - if (isinverse) - opus_ifft(cfg,in,out, arch); - else - opus_fft(cfg,in,out, arch); - - /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ - - check(in,out,nfft,isinverse); - - free(in); - free(out); - opus_fft_free(cfg, arch); -} - -int main(int argc,char ** argv) -{ - ALLOC_STACK; - int arch = opus_select_arch(); - - if (argc>1) { - int k; - for (k=1;k<argc;++k) { - test1d(atoi(argv[k]),0,arch); - test1d(atoi(argv[k]),1,arch); - } - }else{ - test1d(32,0,arch); - test1d(32,1,arch); - test1d(128,0,arch); - test1d(128,1,arch); - test1d(256,0,arch); - test1d(256,1,arch); -#ifndef RADIX_TWO_ONLY - test1d(36,0,arch); - test1d(36,1,arch); - test1d(50,0,arch); - test1d(50,1,arch); - test1d(60,0,arch); - test1d(60,1,arch); - test1d(120,0,arch); - test1d(120,1,arch); - test1d(240,0,arch); - test1d(240,1,arch); - test1d(480,0,arch); - test1d(480,1,arch); -#endif - } - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_entropy.c b/thirdparty/opus/celt/tests/test_unit_entropy.c deleted file mode 100644 index ff9265864c..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_entropy.c +++ /dev/null @@ -1,382 +0,0 @@ -/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdlib.h> -#include <stdio.h> -#include <math.h> -#include <time.h> -#include "entcode.h" -#include "entenc.h" -#include "entdec.h" -#include <string.h> - -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" - -#ifndef M_LOG2E -# define M_LOG2E 1.4426950408889634074 -#endif -#define DATA_SIZE 10000000 -#define DATA_SIZE2 10000 - -int main(int _argc,char **_argv){ - ec_enc enc; - ec_dec dec; - long nbits; - long nbits2; - double entropy; - int ft; - int ftb; - int sz; - int i; - int ret; - unsigned int sym; - unsigned int seed; - unsigned char *ptr; - const char *env_seed; - ret=0; - entropy=0; - if (_argc > 2) { - fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]); - return 1; - } - env_seed = getenv("SEED"); - if (_argc > 1) - seed = atoi(_argv[1]); - else if (env_seed) - seed = atoi(env_seed); - else - seed = time(NULL); - /*Testing encoding of raw bit values.*/ - ptr = (unsigned char *)malloc(DATA_SIZE); - ec_enc_init(&enc,ptr, DATA_SIZE); - for(ft=2;ft<1024;ft++){ - for(i=0;i<ft;i++){ - entropy+=log(ft)*M_LOG2E; - ec_enc_uint(&enc,i,ft); - } - } - /*Testing encoding of raw bit values.*/ - for(ftb=1;ftb<16;ftb++){ - for(i=0;i<(1<<ftb);i++){ - entropy+=ftb; - nbits=ec_tell(&enc); - ec_enc_bits(&enc,i,ftb); - nbits2=ec_tell(&enc); - if(nbits2-nbits!=ftb){ - fprintf(stderr,"Used %li bits to encode %i bits directly.\n", - nbits2-nbits,ftb); - ret=-1; - } - } - } - nbits=ec_tell_frac(&enc); - ec_enc_done(&enc); - fprintf(stderr, - "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n", - entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits); - fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc)); - ec_dec_init(&dec,ptr,DATA_SIZE); - for(ft=2;ft<1024;ft++){ - for(i=0;i<ft;i++){ - sym=ec_dec_uint(&dec,ft); - if(sym!=(unsigned)i){ - fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft); - ret=-1; - } - } - } - for(ftb=1;ftb<16;ftb++){ - for(i=0;i<(1<<ftb);i++){ - sym=ec_dec_bits(&dec,ftb); - if(sym!=(unsigned)i){ - fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb); - ret=-1; - } - } - } - nbits2=ec_tell_frac(&dec); - if(nbits!=nbits2){ - fprintf(stderr, - "Reported number of bits used was %0.2lf, should be %0.2lf.\n", - ldexp(nbits2,-3),ldexp(nbits,-3)); - ret=-1; - } - /*Testing an encoder bust prefers range coder data over raw bits. - This isn't a general guarantee, will only work for data that is buffered in - the encoder state and not yet stored in the user buffer, and should never - get used in practice. - It's mostly here for code coverage completeness.*/ - /*Start with a 16-bit buffer.*/ - ec_enc_init(&enc,ptr,2); - /*Write 7 raw bits.*/ - ec_enc_bits(&enc,0x55,7); - /*Write 12.3 bits of range coder data.*/ - ec_enc_uint(&enc,1,2); - ec_enc_uint(&enc,1,3); - ec_enc_uint(&enc,1,4); - ec_enc_uint(&enc,1,5); - ec_enc_uint(&enc,2,6); - ec_enc_uint(&enc,6,7); - ec_enc_done(&enc); - ec_dec_init(&dec,ptr,2); - if(!enc.error - /*The raw bits should have been overwritten by the range coder data.*/ - ||ec_dec_bits(&dec,7)!=0x05 - /*And all the range coder data should have been encoded correctly.*/ - ||ec_dec_uint(&dec,2)!=1 - ||ec_dec_uint(&dec,3)!=1 - ||ec_dec_uint(&dec,4)!=1 - ||ec_dec_uint(&dec,5)!=1 - ||ec_dec_uint(&dec,6)!=2 - ||ec_dec_uint(&dec,7)!=6){ - fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n"); - ret=-1; - } - srand(seed); - fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536); - for(i=0;i<409600;i++){ - unsigned *data; - unsigned *tell; - unsigned tell_bits; - int j; - int zeros; - ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10; - sz=rand()/((RAND_MAX>>(rand()%9U))+1U); - data=(unsigned *)malloc(sz*sizeof(*data)); - tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); - ec_enc_init(&enc,ptr,DATA_SIZE2); - zeros = rand()%13==0; - tell[0]=ec_tell_frac(&enc); - for(j=0;j<sz;j++){ - if (zeros) - data[j]=0; - else - data[j]=rand()%ft; - ec_enc_uint(&enc,data[j],ft); - tell[j+1]=ec_tell_frac(&enc); - } - if (rand()%2==0) - while(ec_tell(&enc)%8 != 0) - ec_enc_uint(&enc, rand()%2, 2); - tell_bits = ec_tell(&enc); - ec_enc_done(&enc); - if(tell_bits!=(unsigned)ec_tell(&enc)){ - fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n", - ec_tell(&enc),tell_bits,seed); - ret=-1; - } - if ((tell_bits+7)/8 < ec_range_bytes(&enc)) - { - fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n", - ec_range_bytes(&enc), (tell_bits+7)/8,seed); - ret=-1; - } - ec_dec_init(&dec,ptr,DATA_SIZE2); - if(ec_tell_frac(&dec)!=tell[0]){ - fprintf(stderr, - "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", - 0,ec_tell_frac(&dec),tell[0],seed); - } - for(j=0;j<sz;j++){ - sym=ec_dec_uint(&dec,ft); - if(sym!=data[j]){ - fprintf(stderr, - "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n", - sym,data[j],ft,j,sz,seed); - ret=-1; - } - if(ec_tell_frac(&dec)!=tell[j+1]){ - fprintf(stderr, - "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", - j+1,ec_tell_frac(&dec),tell[j+1],seed); - } - } - free(tell); - free(data); - } - /*Test compatibility between multiple different encode/decode routines.*/ - for(i=0;i<409600;i++){ - unsigned *logp1; - unsigned *data; - unsigned *tell; - unsigned *enc_method; - int j; - sz=rand()/((RAND_MAX>>(rand()%9U))+1U); - logp1=(unsigned *)malloc(sz*sizeof(*logp1)); - data=(unsigned *)malloc(sz*sizeof(*data)); - tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); - enc_method=(unsigned *)malloc(sz*sizeof(*enc_method)); - ec_enc_init(&enc,ptr,DATA_SIZE2); - tell[0]=ec_tell_frac(&enc); - for(j=0;j<sz;j++){ - data[j]=rand()/((RAND_MAX>>1)+1); - logp1[j]=(rand()%15)+1; - enc_method[j]=rand()/((RAND_MAX>>2)+1); - switch(enc_method[j]){ - case 0:{ - ec_encode(&enc,data[j]?(1<<logp1[j])-1:0, - (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]); - }break; - case 1:{ - ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0, - (1<<logp1[j])-(data[j]?0:1),logp1[j]); - }break; - case 2:{ - ec_enc_bit_logp(&enc,data[j],logp1[j]); - }break; - case 3:{ - unsigned char icdf[2]; - icdf[0]=1; - icdf[1]=0; - ec_enc_icdf(&enc,data[j],icdf,logp1[j]); - }break; - } - tell[j+1]=ec_tell_frac(&enc); - } - ec_enc_done(&enc); - if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){ - fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n", - ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed); - ret=-1; - } - ec_dec_init(&dec,ptr,DATA_SIZE2); - if(ec_tell_frac(&dec)!=tell[0]){ - fprintf(stderr, - "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", - 0,ec_tell_frac(&dec),tell[0],seed); - } - for(j=0;j<sz;j++){ - int fs; - int dec_method; - dec_method=rand()/((RAND_MAX>>2)+1); - switch(dec_method){ - case 0:{ - fs=ec_decode(&dec,1<<logp1[j]); - sym=fs>=(1<<logp1[j])-1; - ec_dec_update(&dec,sym?(1<<logp1[j])-1:0, - (1<<logp1[j])-(sym?0:1),1<<logp1[j]); - }break; - case 1:{ - fs=ec_decode_bin(&dec,logp1[j]); - sym=fs>=(1<<logp1[j])-1; - ec_dec_update(&dec,sym?(1<<logp1[j])-1:0, - (1<<logp1[j])-(sym?0:1),1<<logp1[j]); - }break; - case 2:{ - sym=ec_dec_bit_logp(&dec,logp1[j]); - }break; - case 3:{ - unsigned char icdf[2]; - icdf[0]=1; - icdf[1]=0; - sym=ec_dec_icdf(&dec,icdf,logp1[j]); - }break; - } - if(sym!=data[j]){ - fprintf(stderr, - "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n", - sym,data[j],logp1[j],j,sz,seed); - fprintf(stderr,"Encoding method: %i, decoding method: %i\n", - enc_method[j],dec_method); - ret=-1; - } - if(ec_tell_frac(&dec)!=tell[j+1]){ - fprintf(stderr, - "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", - j+1,ec_tell_frac(&dec),tell[j+1],seed); - } - } - free(enc_method); - free(tell); - free(data); - free(logp1); - } - ec_enc_init(&enc,ptr,DATA_SIZE2); - ec_enc_bit_logp(&enc,0,1); - ec_enc_bit_logp(&enc,0,1); - ec_enc_bit_logp(&enc,0,1); - ec_enc_bit_logp(&enc,0,1); - ec_enc_bit_logp(&enc,0,2); - ec_enc_patch_initial_bits(&enc,3,2); - if(enc.error){ - fprintf(stderr,"patch_initial_bits failed"); - ret=-1; - } - ec_enc_patch_initial_bits(&enc,0,5); - if(!enc.error){ - fprintf(stderr,"patch_initial_bits didn't fail when it should have"); - ret=-1; - } - ec_enc_done(&enc); - if(ec_range_bytes(&enc)!=1||ptr[0]!=192){ - fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]); - ret=-1; - } - ec_enc_init(&enc,ptr,DATA_SIZE2); - ec_enc_bit_logp(&enc,0,1); - ec_enc_bit_logp(&enc,0,1); - ec_enc_bit_logp(&enc,1,6); - ec_enc_bit_logp(&enc,0,2); - ec_enc_patch_initial_bits(&enc,0,2); - if(enc.error){ - fprintf(stderr,"patch_initial_bits failed"); - ret=-1; - } - ec_enc_done(&enc); - if(ec_range_bytes(&enc)!=2||ptr[0]!=63){ - fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]); - ret=-1; - } - ec_enc_init(&enc,ptr,2); - ec_enc_bit_logp(&enc,0,2); - for(i=0;i<48;i++){ - ec_enc_bits(&enc,0,1); - } - ec_enc_done(&enc); - if(!enc.error){ - fprintf(stderr,"Raw bits overfill didn't fail when it should have"); - ret=-1; - } - ec_enc_init(&enc,ptr,2); - for(i=0;i<17;i++){ - ec_enc_bits(&enc,0,1); - } - ec_enc_done(&enc); - if(!enc.error){ - fprintf(stderr,"17 raw bits encoded in two bytes"); - ret=-1; - } - free(ptr); - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_laplace.c b/thirdparty/opus/celt/tests/test_unit_laplace.c deleted file mode 100644 index 22951e29ee..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_laplace.c +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation - Written by Jean-Marc Valin and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdio.h> -#include <stdlib.h> -#include "laplace.h" -#define CELT_C -#include "stack_alloc.h" - -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" -#include "laplace.c" - -#define DATA_SIZE 40000 - -int ec_laplace_get_start_freq(int decay) -{ - opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1); - int fs = (ft*(16384-decay))/(16384+decay); - return fs+LAPLACE_MINP; -} - -int main(void) -{ - int i; - int ret = 0; - ec_enc enc; - ec_dec dec; - unsigned char *ptr; - int val[10000], decay[10000]; - ALLOC_STACK; - ptr = (unsigned char *)malloc(DATA_SIZE); - ec_enc_init(&enc,ptr,DATA_SIZE); - - val[0] = 3; decay[0] = 6000; - val[1] = 0; decay[1] = 5800; - val[2] = -1; decay[2] = 5600; - for (i=3;i<10000;i++) - { - val[i] = rand()%15-7; - decay[i] = rand()%11000+5000; - } - for (i=0;i<10000;i++) - ec_laplace_encode(&enc, &val[i], - ec_laplace_get_start_freq(decay[i]), decay[i]); - - ec_enc_done(&enc); - - ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc)); - - for (i=0;i<10000;i++) - { - int d = ec_laplace_decode(&dec, - ec_laplace_get_start_freq(decay[i]), decay[i]); - if (d != val[i]) - { - fprintf (stderr, "Got %d instead of %d\n", d, val[i]); - ret = 1; - } - } - - free(ptr); - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_mathops.c b/thirdparty/opus/celt/tests/test_unit_mathops.c deleted file mode 100644 index fd3319da91..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_mathops.c +++ /dev/null @@ -1,304 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, - Gregory Maxwell - Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#define CELT_C - -#include <stdio.h> -#include <math.h> -#include "mathops.c" -#include "entenc.c" -#include "entdec.c" -#include "entcode.c" -#include "bands.c" -#include "quant_bands.c" -#include "laplace.c" -#include "vq.c" -#include "cwrs.c" -#include "pitch.c" -#include "celt_lpc.c" -#include "celt.c" - -#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# if defined(OPUS_X86_MAY_HAVE_SSE) -# include "x86/pitch_sse.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# include "x86/pitch_sse2.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/pitch_sse4_1.c" -# include "x86/celt_lpc_sse.c" -# endif -# include "x86/x86_celt_map.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "kiss_fft.c" -# include "mdct.c" -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#ifdef FIXED_POINT -#define WORD "%d" -#else -#define WORD "%f" -#endif - -int ret = 0; - -void testdiv(void) -{ - opus_int32 i; - for (i=1;i<=327670;i++) - { - double prod; - opus_val32 val; - val = celt_rcp(i); -#ifdef FIXED_POINT - prod = (1./32768./65526.)*val*i; -#else - prod = val*i; -#endif - if (fabs(prod-1) > .00025) - { - fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod); - ret = 1; - } - } -} - -void testsqrt(void) -{ - opus_int32 i; - for (i=1;i<=1000000000;i++) - { - double ratio; - opus_val16 val; - val = celt_sqrt(i); - ratio = val/sqrt(i); - if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2) - { - fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio); - ret = 1; - } - i+= i>>10; - } -} - -void testbitexactcos(void) -{ - int i; - opus_int32 min_d,max_d,last,chk; - chk=max_d=0; - last=min_d=32767; - for(i=64;i<=16320;i++) - { - opus_int32 d; - opus_int32 q=bitexact_cos(i); - chk ^= q*i; - d = last - q; - if (d>max_d)max_d=d; - if (d<min_d)min_d=d; - last = q; - } - if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)|| - (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171)) - { - fprintf (stderr, "bitexact_cos failed\n"); - ret = 1; - } -} - -void testbitexactlog2tan(void) -{ - int i,fail; - opus_int32 min_d,max_d,last,chk; - fail=chk=max_d=0; - last=min_d=15059; - for(i=64;i<8193;i++) - { - opus_int32 d; - opus_int32 mid=bitexact_cos(i); - opus_int32 side=bitexact_cos(16384-i); - opus_int32 q=bitexact_log2tan(mid,side); - chk ^= q*i; - d = last - q; - if (q!=-1*bitexact_log2tan(side,mid)) - fail = 1; - if (d>max_d)max_d=d; - if (d<min_d)min_d=d; - last = q; - } - if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail|| - (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)|| - (bitexact_log2tan(23171,23171)!=0)) - { - fprintf (stderr, "bitexact_log2tan failed\n"); - ret = 1; - } -} - -#ifndef FIXED_POINT -void testlog2(void) -{ - float x; - for (x=0.001;x<1677700.0;x+=(x/8.0)) - { - float error = fabs((1.442695040888963387*log(x))-celt_log2(x)); - if (error>0.0009) - { - fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} - -void testexp2(void) -{ - float x; - for (x=-11.0;x<24.0;x+=0.0007) - { - float error = fabs(x-(1.442695040888963387*log(celt_exp2(x)))); - if (error>0.0002) - { - fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} - -void testexp2log2(void) -{ - float x; - for (x=-11.0;x<24.0;x+=0.0007) - { - float error = fabs(x-(celt_log2(celt_exp2(x)))); - if (error>0.001) - { - fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error); - ret = 1; - } - } -} -#else -void testlog2(void) -{ - opus_val32 x; - for (x=8;x<1073741824;x+=(x>>3)) - { - float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0); - if (error>0.003) - { - fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error); - ret = 1; - } - } -} - -void testexp2(void) -{ - opus_val16 x; - for (x=-32768;x<15360;x++) - { - float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0))); - float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0); - if (error1>0.0002&&error2>0.00004) - { - fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2); - ret = 1; - } - } -} - -void testexp2log2(void) -{ - opus_val32 x; - for (x=8;x<65536;x+=(x>>3)) - { - float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384; - if (error>0.004) - { - fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error); - ret = 1; - } - } -} - -void testilog2(void) -{ - opus_val32 x; - for (x=1;x<=268435455;x+=127) - { - opus_val32 lg; - opus_val32 y; - - lg = celt_ilog2(x); - if (lg<0 || lg>=31) - { - printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg); - ret = 1; - } - y = 1<<lg; - - if (x<y || (x>>1)>=y) - { - printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y); - ret = 1; - } - } -} -#endif - -int main(void) -{ - testbitexactcos(); - testbitexactlog2tan(); - testdiv(); - testsqrt(); - testlog2(); - testexp2(); - testexp2log2(); -#ifdef FIXED_POINT - testilog2(); -#endif - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_mdct.c b/thirdparty/opus/celt/tests/test_unit_mdct.c deleted file mode 100644 index 8dbb9caa2e..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_mdct.c +++ /dev/null @@ -1,230 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#define SKIP_CONFIG_H - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#include <stdio.h> - -#define CELT_C -#include "mdct.h" -#include "stack_alloc.h" - -#include "kiss_fft.c" -#include "mdct.c" -#include "mathops.c" -#include "entcode.c" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/x86cpu.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# include "pitch.c" -# include "celt_lpc.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#ifndef M_PI -#define M_PI 3.141592653 -#endif - -int ret = 0; -void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0; - double snr; - for (bin=0;bin<nfft/2;++bin) { - double ansr = 0; - double difr; - - for (k=0;k<nfft;++k) { - double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft; - double re = cos(phase); - - re /= nfft/4; - - ansr += in[k] * re; - } - /*printf ("%f %f\n", ansr, out[bin]);*/ - difr = ansr - out[bin]; - errpow += difr*difr; - sigpow += ansr*ansr; - } - snr = 10*log10(sigpow/errpow); - printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); - if (snr<60) { - printf( "** poor snr: %f **\n", snr); - ret = 1; - } -} - -void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) -{ - int bin,k; - double errpow=0,sigpow=0; - double snr; - for (bin=0;bin<nfft;++bin) { - double ansr = 0; - double difr; - - for (k=0;k<nfft/2;++k) { - double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft; - double re = cos(phase); - - /*re *= 2;*/ - - ansr += in[k] * re; - } - /*printf ("%f %f\n", ansr, out[bin]);*/ - difr = ansr - out[bin]; - errpow += difr*difr; - sigpow += ansr*ansr; - } - snr = 10*log10(sigpow/errpow); - printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); - if (snr<60) { - printf( "** poor snr: %f **\n", snr); - ret = 1; - } -} - - -void test1d(int nfft,int isinverse,int arch) -{ - mdct_lookup cfg; - size_t buflen = sizeof(kiss_fft_scalar)*nfft; - - kiss_fft_scalar * in = (kiss_fft_scalar*)malloc(buflen); - kiss_fft_scalar * in_copy = (kiss_fft_scalar*)malloc(buflen); - kiss_fft_scalar * out= (kiss_fft_scalar*)malloc(buflen); - opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2); - int k; - - clt_mdct_init(&cfg, nfft, 0, arch); - for (k=0;k<nfft;++k) { - in[k] = (rand() % 32768) - 16384; - } - - for (k=0;k<nfft/2;++k) { - window[k] = Q15ONE; - } - for (k=0;k<nfft;++k) { - in[k] *= 32768; - } - - if (isinverse) - { - for (k=0;k<nfft;++k) { - in[k] /= nfft; - } - } - - for (k=0;k<nfft;++k) - in_copy[k] = in[k]; - /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/ - - if (isinverse) - { - for (k=0;k<nfft;++k) - out[k] = 0; - clt_mdct_backward(&cfg,in,out, window, nfft/2, 0, 1, arch); - /* apply TDAC because clt_mdct_backward() no longer does that */ - for (k=0;k<nfft/4;++k) - out[nfft-k-1] = out[nfft/2+k]; - check_inv(in,out,nfft,isinverse); - } else { - clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, arch); - check(in_copy,out,nfft,isinverse); - } - /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ - - - free(in); - free(in_copy); - free(out); - free(window); - clt_mdct_clear(&cfg, arch); -} - -int main(int argc,char ** argv) -{ - ALLOC_STACK; - int arch = opus_select_arch(); - - if (argc>1) { - int k; - for (k=1;k<argc;++k) { - test1d(atoi(argv[k]),0,arch); - test1d(atoi(argv[k]),1,arch); - } - }else{ - test1d(32,0,arch); - test1d(32,1,arch); - test1d(256,0,arch); - test1d(256,1,arch); - test1d(512,0,arch); - test1d(512,1,arch); - test1d(1024,0,arch); - test1d(1024,1,arch); - test1d(2048,0,arch); - test1d(2048,1,arch); -#ifndef RADIX_TWO_ONLY - test1d(36,0,arch); - test1d(36,1,arch); - test1d(40,0,arch); - test1d(40,1,arch); - test1d(60,0,arch); - test1d(60,1,arch); - test1d(120,0,arch); - test1d(120,1,arch); - test1d(240,0,arch); - test1d(240,1,arch); - test1d(480,0,arch); - test1d(480,1,arch); - test1d(960,0,arch); - test1d(960,1,arch); - test1d(1920,0,arch); - test1d(1920,1,arch); -#endif - } - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_rotation.c b/thirdparty/opus/celt/tests/test_unit_rotation.c deleted file mode 100644 index 1080c2085d..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_rotation.c +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifndef CUSTOM_MODES -#define CUSTOM_MODES -#endif - -#define CELT_C - -#include <stdio.h> -#include <stdlib.h> -#include "vq.c" -#include "cwrs.c" -#include "entcode.c" -#include "entenc.c" -#include "entdec.c" -#include "mathops.c" -#include "bands.h" -#include "pitch.c" -#include "celt_lpc.c" -#include "celt.c" -#include <math.h> - -#if defined(OPUS_X86_MAY_HAVE_SSE) || defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) -# if defined(OPUS_X86_MAY_HAVE_SSE) -# include "x86/pitch_sse.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# include "x86/pitch_sse2.c" -# endif -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# include "x86/pitch_sse4_1.c" -# include "x86/celt_lpc_sse.c" -# endif -# include "x86/x86_celt_map.c" -#elif defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/armcpu.c" -# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -# include "arm/celt_neon_intr.c" -# if defined(HAVE_ARM_NE10) -# include "kiss_fft.c" -# include "mdct.c" -# include "arm/celt_ne10_fft.c" -# include "arm/celt_ne10_mdct.c" -# endif -# endif -# include "arm/arm_celt_map.c" -#endif - -#define MAX_SIZE 100 - -int ret=0; -void test_rotation(int N, int K) -{ - int i; - double err = 0, ener = 0, snr, snr0; - opus_val16 x0[MAX_SIZE]; - opus_val16 x1[MAX_SIZE]; - for (i=0;i<N;i++) - x1[i] = x0[i] = rand()%32767-16384; - exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL); - for (i=0;i<N;i++) - { - err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]); - ener += x0[i]*(double)x0[i]; - } - snr0 = 20*log10(ener/err); - err = ener = 0; - exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL); - for (i=0;i<N;i++) - { - err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]); - ener += x0[i]*(double)x0[i]; - } - snr = 20*log10(ener/err); - printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0); - if (snr < 60 || snr0 > 20) - { - fprintf(stderr, "FAIL!\n"); - ret = 1; - } -} - -int main(void) -{ - ALLOC_STACK; - test_rotation(15, 3); - test_rotation(23, 5); - test_rotation(50, 3); - test_rotation(80, 1); - return ret; -} diff --git a/thirdparty/opus/celt/tests/test_unit_types.c b/thirdparty/opus/celt/tests/test_unit_types.c deleted file mode 100644 index 67a0fb8ed3..0000000000 --- a/thirdparty/opus/celt/tests/test_unit_types.c +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2008-2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_types.h" -#include <stdio.h> - -int main(void) -{ - opus_int16 i = 1; - i <<= 14; - if (i>>14 != 1) - { - fprintf(stderr, "opus_int16 isn't 16 bits\n"); - return 1; - } - if (sizeof(opus_int16)*2 != sizeof(opus_int32)) - { - fprintf(stderr, "16*2 != 32\n"); - return 1; - } - return 0; -} diff --git a/thirdparty/opus/celt/vq.c b/thirdparty/opus/celt/vq.c deleted file mode 100644 index d29f38fd8e..0000000000 --- a/thirdparty/opus/celt/vq.c +++ /dev/null @@ -1,408 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mathops.h" -#include "cwrs.h" -#include "vq.h" -#include "arch.h" -#include "os_support.h" -#include "bands.h" -#include "rate.h" -#include "pitch.h" - -#ifndef OVERRIDE_vq_exp_rotation1 -static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) -{ - int i; - opus_val16 ms; - celt_norm *Xptr; - Xptr = X; - ms = NEG16(s); - for (i=0;i<len-stride;i++) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } - Xptr = &X[len-2*stride-1]; - for (i=len-2*stride-1;i>=0;i--) - { - celt_norm x1, x2; - x1 = Xptr[0]; - x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); - *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); - } -} -#endif /* OVERRIDE_vq_exp_rotation1 */ - -static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) -{ - static const int SPREAD_FACTOR[3]={15,10,5}; - int i; - opus_val16 c, s; - opus_val16 gain, theta; - int stride2=0; - int factor; - - if (2*K>=len || spread==SPREAD_NONE) - return; - factor = SPREAD_FACTOR[spread-1]; - - gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); - theta = HALF16(MULT16_16_Q15(gain,gain)); - - c = celt_cos_norm(EXTEND32(theta)); - s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ - - if (len>=8*stride) - { - stride2 = 1; - /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. - It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ - while ((stride2*stride2+stride2)*stride + (stride>>2) < len) - stride2++; - } - /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for - extract_collapse_mask().*/ - len = celt_udiv(len, stride); - for (i=0;i<stride;i++) - { - if (dir < 0) - { - if (stride2) - exp_rotation1(X+i*len, len, stride2, s, c); - exp_rotation1(X+i*len, len, 1, c, s); - } else { - exp_rotation1(X+i*len, len, 1, c, -s); - if (stride2) - exp_rotation1(X+i*len, len, stride2, s, -c); - } - } -} - -/** Takes the pitch vector and the decoded residual vector, computes the gain - that will give ||p+g*y||=1 and mixes the residual with the pitch. */ -static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, - int N, opus_val32 Ryy, opus_val16 gain) -{ - int i; -#ifdef FIXED_POINT - int k; -#endif - opus_val32 t; - opus_val16 g; - -#ifdef FIXED_POINT - k = celt_ilog2(Ryy)>>1; -#endif - t = VSHR32(Ryy, 2*(k-7)); - g = MULT16_16_P15(celt_rsqrt_norm(t),gain); - - i=0; - do - X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); - while (++i < N); -} - -static unsigned extract_collapse_mask(int *iy, int N, int B) -{ - unsigned collapse_mask; - int N0; - int i; - if (B<=1) - return 1; - /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for - exp_rotation().*/ - N0 = celt_udiv(N, B); - collapse_mask = 0; - i=0; do { - int j; - unsigned tmp=0; - j=0; do { - tmp |= iy[i*N0+j]; - } while (++j<N0); - collapse_mask |= (tmp!=0)<<i; - } while (++i<B); - return collapse_mask; -} - -unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc -#ifdef RESYNTH - , opus_val16 gain -#endif - ) -{ - VARDECL(celt_norm, y); - VARDECL(int, iy); - VARDECL(opus_val16, signx); - int i, j; - opus_val16 s; - int pulsesLeft; - opus_val32 sum; - opus_val32 xy; - opus_val16 yy; - unsigned collapse_mask; - SAVE_STACK; - - celt_assert2(K>0, "alg_quant() needs at least one pulse"); - celt_assert2(N>1, "alg_quant() needs at least two dimensions"); - - ALLOC(y, N, celt_norm); - ALLOC(iy, N, int); - ALLOC(signx, N, opus_val16); - - exp_rotation(X, N, 1, B, K, spread); - - /* Get rid of the sign */ - sum = 0; - j=0; do { - if (X[j]>0) - signx[j]=1; - else { - signx[j]=-1; - X[j]=-X[j]; - } - iy[j] = 0; - y[j] = 0; - } while (++j<N); - - xy = yy = 0; - - pulsesLeft = K; - - /* Do a pre-search by projecting on the pyramid */ - if (K > (N>>1)) - { - opus_val16 rcp; - j=0; do { - sum += X[j]; - } while (++j<N); - - /* If X is too small, just replace it with a pulse at 0 */ -#ifdef FIXED_POINT - if (sum <= K) -#else - /* Prevents infinities and NaNs from causing too many pulses - to be allocated. 64 is an approximation of infinity here. */ - if (!(sum > EPSILON && sum < 64)) -#endif - { - X[0] = QCONST16(1.f,14); - j=1; do - X[j]=0; - while (++j<N); - sum = QCONST16(1.f,14); - } - rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); - j=0; do { -#ifdef FIXED_POINT - /* It's really important to round *towards zero* here */ - iy[j] = MULT16_16_Q15(X[j],rcp); -#else - iy[j] = (int)floor(rcp*X[j]); -#endif - y[j] = (celt_norm)iy[j]; - yy = MAC16_16(yy, y[j],y[j]); - xy = MAC16_16(xy, X[j],y[j]); - y[j] *= 2; - pulsesLeft -= iy[j]; - } while (++j<N); - } - celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); - - /* This should never happen, but just in case it does (e.g. on silence) - we fill the first bin with pulses. */ -#ifdef FIXED_POINT_DEBUG - celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); -#endif - if (pulsesLeft > N+3) - { - opus_val16 tmp = (opus_val16)pulsesLeft; - yy = MAC16_16(yy, tmp, tmp); - yy = MAC16_16(yy, tmp, y[0]); - iy[0] += pulsesLeft; - pulsesLeft=0; - } - - s = 1; - for (i=0;i<pulsesLeft;i++) - { - int best_id; - opus_val32 best_num = -VERY_LARGE16; - opus_val16 best_den = 0; -#ifdef FIXED_POINT - int rshift; -#endif -#ifdef FIXED_POINT - rshift = 1+celt_ilog2(K-pulsesLeft+i+1); -#endif - best_id = 0; - /* The squared magnitude term gets added anyway, so we might as well - add it outside the loop */ - yy = ADD16(yy, 1); - j=0; - do { - opus_val16 Rxy, Ryy; - /* Temporary sums of the new pulse(s) */ - Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); - /* We're multiplying y[j] by two so we don't have to do it here */ - Ryy = ADD16(yy, y[j]); - - /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that - Rxy is positive because the sign is pre-computed) */ - Rxy = MULT16_16_Q15(Rxy,Rxy); - /* The idea is to check for num/den >= best_num/best_den, but that way - we can do it without any division */ - /* OPT: Make sure to use conditional moves here */ - if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) - { - best_den = Ryy; - best_num = Rxy; - best_id = j; - } - } while (++j<N); - - /* Updating the sums of the new pulse(s) */ - xy = ADD32(xy, EXTEND32(X[best_id])); - /* We're multiplying y[j] by two so we don't have to do it here */ - yy = ADD16(yy, y[best_id]); - - /* Only now that we've made the final choice, update y/iy */ - /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ - y[best_id] += 2*s; - iy[best_id]++; - } - - /* Put the original sign back */ - j=0; - do { - X[j] = MULT16_16(signx[j],X[j]); - if (signx[j] < 0) - iy[j] = -iy[j]; - } while (++j<N); - encode_pulses(iy, N, K, enc); - -#ifdef RESYNTH - normalise_residual(iy, X, N, yy, gain); - exp_rotation(X, N, -1, B, K, spread); -#endif - - collapse_mask = extract_collapse_mask(iy, N, B); - RESTORE_STACK; - return collapse_mask; -} - -/** Decode pulse vector and combine the result with the pitch vector to produce - the final normalised signal in the current band. */ -unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, - ec_dec *dec, opus_val16 gain) -{ - opus_val32 Ryy; - unsigned collapse_mask; - VARDECL(int, iy); - SAVE_STACK; - - celt_assert2(K>0, "alg_unquant() needs at least one pulse"); - celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); - ALLOC(iy, N, int); - Ryy = decode_pulses(iy, N, K, dec); - normalise_residual(iy, X, N, Ryy, gain); - exp_rotation(X, N, -1, B, K, spread); - collapse_mask = extract_collapse_mask(iy, N, B); - RESTORE_STACK; - return collapse_mask; -} - -#ifndef OVERRIDE_renormalise_vector -void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) -{ - int i; -#ifdef FIXED_POINT - int k; -#endif - opus_val32 E; - opus_val16 g; - opus_val32 t; - celt_norm *xptr; - E = EPSILON + celt_inner_prod(X, X, N, arch); -#ifdef FIXED_POINT - k = celt_ilog2(E)>>1; -#endif - t = VSHR32(E, 2*(k-7)); - g = MULT16_16_P15(celt_rsqrt_norm(t),gain); - - xptr = X; - for (i=0;i<N;i++) - { - *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); - xptr++; - } - /*return celt_sqrt(E);*/ -} -#endif /* OVERRIDE_renormalise_vector */ - -int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) -{ - int i; - int itheta; - opus_val16 mid, side; - opus_val32 Emid, Eside; - - Emid = Eside = EPSILON; - if (stereo) - { - for (i=0;i<N;i++) - { - celt_norm m, s; - m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); - s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); - Emid = MAC16_16(Emid, m, m); - Eside = MAC16_16(Eside, s, s); - } - } else { - Emid += celt_inner_prod(X, X, N, arch); - Eside += celt_inner_prod(Y, Y, N, arch); - } - mid = celt_sqrt(Emid); - side = celt_sqrt(Eside); -#ifdef FIXED_POINT - /* 0.63662 = 2/pi */ - itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); -#else - itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); -#endif - - return itheta; -} diff --git a/thirdparty/opus/celt/vq.h b/thirdparty/opus/celt/vq.h deleted file mode 100644 index 5cfcbe50ea..0000000000 --- a/thirdparty/opus/celt/vq.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/** - @file vq.h - @brief Vector quantisation of the residual - */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef VQ_H -#define VQ_H - -#include "entenc.h" -#include "entdec.h" -#include "modes.h" - -#if defined(MIPSr1_ASM) -#include "mips/vq_mipsr1.h" -#endif - - -/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of - * the pitch and a combination of pulses such that its norm is still equal - * to 1. This is the function that will typically require the most CPU. - * @param X Residual signal to quantise/encode (returns quantised version) - * @param N Number of samples to encode - * @param K Number of pulses to use - * @param enc Entropy encoder state - * @ret A mask indicating which blocks in the band received pulses -*/ -unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, - ec_enc *enc -#ifdef RESYNTH - , opus_val16 gain -#endif - ); - -/** Algebraic pulse decoder - * @param X Decoded normalised spectrum (returned) - * @param N Number of samples to decode - * @param K Number of pulses to use - * @param dec Entropy decoder state - * @ret A mask indicating which blocks in the band received pulses - */ -unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, - ec_dec *dec, opus_val16 gain); - -void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch); - -int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch); - -#endif /* VQ_H */ diff --git a/thirdparty/opus/celt/x86/celt_lpc_sse.c b/thirdparty/opus/celt/x86/celt_lpc_sse.c deleted file mode 100644 index 67e5592acf..0000000000 --- a/thirdparty/opus/celt/x86/celt_lpc_sse.c +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" -#include "x86cpu.h" - -#if defined(FIXED_POINT) - -void celt_fir_sse4_1(const opus_val16 *_x, - const opus_val16 *num, - opus_val16 *_y, - int N, - int ord, - opus_val16 *mem, - int arch) -{ - int i,j; - VARDECL(opus_val16, rnum); - VARDECL(opus_val16, x); - - __m128i vecNoA; - opus_int32 noA ; - SAVE_STACK; - - ALLOC(rnum, ord, opus_val16); - ALLOC(x, N+ord, opus_val16); - for(i=0;i<ord;i++) - rnum[i] = num[ord-i-1]; - for(i=0;i<ord;i++) - x[i] = mem[ord-i-1]; - - for (i=0;i<N-7;i+=8) - { - x[i+ord ]=_x[i ]; - x[i+ord+1]=_x[i+1]; - x[i+ord+2]=_x[i+2]; - x[i+ord+3]=_x[i+3]; - x[i+ord+4]=_x[i+4]; - x[i+ord+5]=_x[i+5]; - x[i+ord+6]=_x[i+6]; - x[i+ord+7]=_x[i+7]; - } - - for (;i<N-3;i+=4) - { - x[i+ord ]=_x[i ]; - x[i+ord+1]=_x[i+1]; - x[i+ord+2]=_x[i+2]; - x[i+ord+3]=_x[i+3]; - } - - for (;i<N;i++) - x[i+ord]=_x[i]; - - for(i=0;i<ord;i++) - mem[i] = _x[N-i-1]; -#ifdef SMALL_FOOTPRINT - for (i=0;i<N;i++) - { - opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); - for (j=0;j<ord;j++) - { - sum = MAC16_16(sum,rnum[j],x[i+j]); - } - _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); - } -#else - noA = EXTEND32(1) << SIG_SHIFT >> 1; - vecNoA = _mm_set_epi32(noA, noA, noA, noA); - - for (i=0;i<N-3;i+=4) - { - opus_val32 sums[4] = {0}; - __m128i vecSum, vecX; - - xcorr_kernel(rnum, x+i, sums, ord, arch); - - vecSum = _mm_loadu_si128((__m128i *)sums); - vecSum = _mm_add_epi32(vecSum, vecNoA); - vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); - vecX = OP_CVTEPI16_EPI32_M64(_x + i); - vecSum = _mm_add_epi32(vecSum, vecX); - vecSum = _mm_packs_epi32(vecSum, vecSum); - _mm_storel_epi64((__m128i *)(_y + i), vecSum); - } - for (;i<N;i++) - { - opus_val32 sum = 0; - for (j=0;j<ord;j++) - sum = MAC16_16(sum, rnum[j], x[i + j]); - _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); - } - -#endif - RESTORE_STACK; -} - -#endif diff --git a/thirdparty/opus/celt/x86/celt_lpc_sse.h b/thirdparty/opus/celt/x86/celt_lpc_sse.h deleted file mode 100644 index c5ec796ed5..0000000000 --- a/thirdparty/opus/celt/x86/celt_lpc_sse.h +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef CELT_LPC_SSE_H -#define CELT_LPC_SSE_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_CELT_FIR - -void celt_fir_sse4_1( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define celt_fir(x, num, y, N, ord, mem, arch) \ - ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch)) - -#else - -extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch); - -# define celt_fir(x, num, y, N, ord, mem, arch) \ - ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch)) - -#endif -#endif - -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse.c b/thirdparty/opus/celt/x86/pitch_sse.c deleted file mode 100644 index 20e73126b6..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse.c +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) - -#include <xmmintrin.h> -#include "arch.h" - -void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) -{ - int j; - __m128 xsum1, xsum2; - xsum1 = _mm_loadu_ps(sum); - xsum2 = _mm_setzero_ps(); - - for (j = 0; j < len-3; j += 4) - { - __m128 x0 = _mm_loadu_ps(x+j); - __m128 yj = _mm_loadu_ps(y+j); - __m128 y3 = _mm_loadu_ps(y+j+3); - - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), - _mm_shuffle_ps(yj,y3,0x49))); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), - _mm_shuffle_ps(yj,y3,0x9e))); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); - } - if (j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - if (++j < len) - { - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); - } - } - } - _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); -} - - -void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, - int N, opus_val32 *xy1, opus_val32 *xy2) -{ - int i; - __m128 xsum1, xsum2; - xsum1 = _mm_setzero_ps(); - xsum2 = _mm_setzero_ps(); - for (i=0;i<N-3;i+=4) - { - __m128 xi = _mm_loadu_ps(x+i); - __m128 y1i = _mm_loadu_ps(y01+i); - __m128 y2i = _mm_loadu_ps(y02+i); - xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i)); - xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i)); - } - /* Horizontal sum */ - xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1)); - xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55)); - _mm_store_ss(xy1, xsum1); - xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2)); - xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55)); - _mm_store_ss(xy2, xsum2); - for (;i<N;i++) - { - *xy1 = MAC16_16(*xy1, x[i], y01[i]); - *xy2 = MAC16_16(*xy2, x[i], y02[i]); - } -} - -opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y, - int N) -{ - int i; - float xy; - __m128 sum; - sum = _mm_setzero_ps(); - /* FIXME: We should probably go 8-way and use 2 sums. */ - for (i=0;i<N-3;i+=4) - { - __m128 xi = _mm_loadu_ps(x+i); - __m128 yi = _mm_loadu_ps(y+i); - sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi)); - } - /* Horizontal sum */ - sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); - sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); - _mm_store_ss(&xy, sum); - for (;i<N;i++) - { - xy = MAC16_16(xy, x[i], y[i]); - } - return xy; -} - -void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N, - opus_val16 g10, opus_val16 g11, opus_val16 g12) -{ - int i; - __m128 x0v; - __m128 g10v, g11v, g12v; - g10v = _mm_load1_ps(&g10); - g11v = _mm_load1_ps(&g11); - g12v = _mm_load1_ps(&g12); - x0v = _mm_loadu_ps(&x[-T-2]); - for (i=0;i<N-3;i+=4) - { - __m128 yi, yi2, x1v, x2v, x3v, x4v; - const opus_val32 *xp = &x[i-T-2]; - yi = _mm_loadu_ps(x+i); - x4v = _mm_loadu_ps(xp+4); -#if 0 - /* Slower version with all loads */ - x1v = _mm_loadu_ps(xp+1); - x2v = _mm_loadu_ps(xp+2); - x3v = _mm_loadu_ps(xp+3); -#else - x2v = _mm_shuffle_ps(x0v, x4v, 0x4e); - x1v = _mm_shuffle_ps(x0v, x2v, 0x99); - x3v = _mm_shuffle_ps(x2v, x4v, 0x99); -#endif - - yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v)); -#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */ - yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v))); - yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v))); -#else - /* Use partial sums */ - yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)), - _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v))); - yi = _mm_add_ps(yi, yi2); -#endif - x0v=x4v; - _mm_storeu_ps(y+i, yi); - } -#ifdef CUSTOM_MODES - for (;i<N;i++) - { - y[i] = x[i] - + MULT16_32_Q15(g10,x[i-T]) - + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1])) - + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2])); - } -#endif -} - - -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse.h b/thirdparty/opus/celt/x86/pitch_sse.h deleted file mode 100644 index e5f87ab51a..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse.h +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (c) 2013 Jean-Marc Valin and John Ridges - Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/ -/** - @file pitch_sse.h - @brief Pitch analysis - */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef PITCH_SSE_H -#define PITCH_SSE_H - -#if defined(HAVE_CONFIG_H) -#include "config.h" -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -void xcorr_kernel_sse4_1( - const opus_int16 *x, - const opus_int16 *y, - opus_val32 sum[4], - int len); -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) -void xcorr_kernel_sse( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); -#endif - -#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len)) - -#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((void)arch, xcorr_kernel_sse(x, y, sum, len)) - -#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) - -extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len); - -#define OVERRIDE_XCORR_KERNEL -#define xcorr_kernel(x, y, sum, len, arch) \ - ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) - -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse4_1( - const opus_int16 *x, - const opus_int16 *y, - int N); -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse2( - const opus_int16 *x, - const opus_int16 *y, - int N); -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse( - const opus_val16 *x, - const opus_val16 *y, - int N); -#endif - - -#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse4_1(x, y, N)) - -#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse2(x, y, N)) - -#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((void)arch, celt_inner_prod_sse(x, y, N)) - - -#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) - -extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - int N); - -#define OVERRIDE_CELT_INNER_PROD -#define celt_inner_prod(x, y, N, arch) \ - ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N)) - -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) - -#define OVERRIDE_DUAL_INNER_PROD -#define OVERRIDE_COMB_FILTER_CONST - -#undef dual_inner_prod -#undef comb_filter_const - -void dual_inner_prod_sse(const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2); - -void comb_filter_const_sse(opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12); - - -#if defined(OPUS_X86_PRESUME_SSE) -# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ - ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2)) - -# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ - ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12)) -#else - -extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2); - -#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ - ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) - -extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( - opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12); - -#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ - ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12)) - -#define NON_STATIC_COMB_FILTER_CONST_C - -#endif -#endif - -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse2.c b/thirdparty/opus/celt/x86/pitch_sse2.c deleted file mode 100644 index a0e7d1beaf..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse2.c +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) -opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y, - int N) -{ - opus_int i, dataSize16; - opus_int32 sum; - - __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; - __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; - - sum = 0; - dataSize16 = N & ~15; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for (i=0;i<dataSize16;i+=16) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8])); - inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); - } - - acc1 = _mm_add_epi32( acc1, acc2 ); - - if (N - i >= 8) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - i += 8; - } - - acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1)); - acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E)); - sum += _mm_cvtsi128_si32(acc1); - - for (;i<N;i++) { - sum = silk_SMLABB(sum, x[i], y[i]); - } - - return sum; -} -#endif diff --git a/thirdparty/opus/celt/x86/pitch_sse4_1.c b/thirdparty/opus/celt/x86/pitch_sse4_1.c deleted file mode 100644 index a092c68b24..0000000000 --- a/thirdparty/opus/celt/x86/pitch_sse4_1.c +++ /dev/null @@ -1,195 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> - -#include "macros.h" -#include "celt_lpc.h" -#include "stack_alloc.h" -#include "mathops.h" -#include "pitch.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) -#include <smmintrin.h> -#include "x86cpu.h" - -opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y, - int N) -{ - opus_int i, dataSize16; - opus_int32 sum; - __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; - __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; - __m128i inVec1_3210, inVec2_3210; - - sum = 0; - dataSize16 = N & ~15; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for (i=0;i<dataSize16;i+=16) { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8])); - inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); - } - - acc1 = _mm_add_epi32(acc1, acc2); - - if (N - i >= 8) - { - inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); - inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); - - inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); - - acc1 = _mm_add_epi32(acc1, inVec1_76543210); - i += 8; - } - - if (N - i >= 4) - { - inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]); - inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]); - - inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210); - - acc1 = _mm_add_epi32(acc1, inVec1_3210); - i += 4; - } - - acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1)); - acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E)); - - sum += _mm_cvtsi128_si32(acc1); - - for (;i<N;i++) - { - sum = silk_SMLABB(sum, x[i], y[i]); - } - - return sum; -} - -void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len) -{ - int j; - - __m128i vecX, vecX0, vecX1, vecX2, vecX3; - __m128i vecY0, vecY1, vecY2, vecY3; - __m128i sum0, sum1, sum2, sum3, vecSum; - __m128i initSum; - - celt_assert(len >= 3); - - sum0 = _mm_setzero_si128(); - sum1 = _mm_setzero_si128(); - sum2 = _mm_setzero_si128(); - sum3 = _mm_setzero_si128(); - - for (j=0;j<(len-7);j+=8) - { - vecX = _mm_loadu_si128((__m128i *)(&x[j + 0])); - vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0])); - vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1])); - vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2])); - vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3])); - - sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0)); - sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1)); - sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2)); - sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3)); - } - - sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0)); - sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E)); - - sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1)); - sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E)); - - sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2)); - sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E)); - - sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3)); - sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E)); - - vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1), - _mm_unpacklo_epi32(sum2, sum3)); - - for (;j<(len-3);j+=4) - { - vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); - vecX0 = _mm_shuffle_epi32(vecX, 0x00); - vecX1 = _mm_shuffle_epi32(vecX, 0x55); - vecX2 = _mm_shuffle_epi32(vecX, 0xaa); - vecX3 = _mm_shuffle_epi32(vecX, 0xff); - - vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); - vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]); - vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]); - vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]); - - sum0 = _mm_mullo_epi32(vecX0, vecY0); - sum1 = _mm_mullo_epi32(vecX1, vecY1); - sum2 = _mm_mullo_epi32(vecX2, vecY2); - sum3 = _mm_mullo_epi32(vecX3, vecY3); - - sum0 = _mm_add_epi32(sum0, sum1); - sum2 = _mm_add_epi32(sum2, sum3); - vecSum = _mm_add_epi32(vecSum, sum0); - vecSum = _mm_add_epi32(vecSum, sum2); - } - - for (;j<len;j++) - { - vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]); - vecX0 = _mm_shuffle_epi32(vecX, 0x00); - - vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]); - - sum0 = _mm_mullo_epi32(vecX0, vecY0); - vecSum = _mm_add_epi32(vecSum, sum0); - } - - initSum = _mm_loadu_si128((__m128i *)(&sum[0])); - initSum = _mm_add_epi32(initSum, vecSum); - _mm_storeu_si128((__m128i *)sum, initSum); -} -#endif diff --git a/thirdparty/opus/celt/x86/x86_celt_map.c b/thirdparty/opus/celt/x86/x86_celt_map.c deleted file mode 100644 index 47ba41b9ee..0000000000 --- a/thirdparty/opus/celt/x86/x86_celt_map.c +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if defined(HAVE_CONFIG_H) -#include "config.h" -#endif - -#include "x86/x86cpu.h" -#include "celt_lpc.h" -#include "pitch.h" -#include "pitch_sse.h" - -#if defined(OPUS_HAVE_RTCD) - -# if defined(FIXED_POINT) - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1) - -void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord, - opus_val16 *mem, - int arch -) = { - celt_fir_c, /* non-sse */ - celt_fir_c, - celt_fir_c, - MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */ - MAY_HAVE_SSE4_1(celt_fir) /* avx */ -}; - -void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len -) = { - xcorr_kernel_c, /* non-sse */ - xcorr_kernel_c, - xcorr_kernel_c, - MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */ - MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */ -}; - -#endif - -#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) - -opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - int N -) = { - celt_inner_prod_c, /* non-sse */ - celt_inner_prod_c, - MAY_HAVE_SSE2(celt_inner_prod), - MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */ - MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */ -}; - -#endif - -# else - -#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE) - -void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - opus_val32 sum[4], - int len -) = { - xcorr_kernel_c, /* non-sse */ - MAY_HAVE_SSE(xcorr_kernel), - MAY_HAVE_SSE(xcorr_kernel), - MAY_HAVE_SSE(xcorr_kernel), - MAY_HAVE_SSE(xcorr_kernel) -}; - -opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y, - int N -) = { - celt_inner_prod_c, /* non-sse */ - MAY_HAVE_SSE(celt_inner_prod), - MAY_HAVE_SSE(celt_inner_prod), - MAY_HAVE_SSE(celt_inner_prod), - MAY_HAVE_SSE(celt_inner_prod) -}; - -void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( - const opus_val16 *x, - const opus_val16 *y01, - const opus_val16 *y02, - int N, - opus_val32 *xy1, - opus_val32 *xy2 -) = { - dual_inner_prod_c, /* non-sse */ - MAY_HAVE_SSE(dual_inner_prod), - MAY_HAVE_SSE(dual_inner_prod), - MAY_HAVE_SSE(dual_inner_prod), - MAY_HAVE_SSE(dual_inner_prod) -}; - -void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( - opus_val32 *y, - opus_val32 *x, - int T, - int N, - opus_val16 g10, - opus_val16 g11, - opus_val16 g12 -) = { - comb_filter_const_c, /* non-sse */ - MAY_HAVE_SSE(comb_filter_const), - MAY_HAVE_SSE(comb_filter_const), - MAY_HAVE_SSE(comb_filter_const), - MAY_HAVE_SSE(comb_filter_const) -}; - - -#endif - -#endif -#endif diff --git a/thirdparty/opus/celt/x86/x86cpu.c b/thirdparty/opus/celt/x86/x86cpu.c deleted file mode 100644 index 080eb25e41..0000000000 --- a/thirdparty/opus/celt/x86/x86cpu.c +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "cpu_support.h" -#include "macros.h" -#include "main.h" -#include "pitch.h" -#include "x86cpu.h" - -#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ - (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ - (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)) - - -#if defined(_MSC_VER) - -#include <intrin.h> -static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) -{ - __cpuid((int*)CPUInfo, InfoType); -} - -#else - -#if defined(CPU_INFO_BY_C) -#include <cpuid.h> -#endif - -static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) -{ -#if defined(CPU_INFO_BY_ASM) -#if defined(__i386__) && defined(__PIC__) -/* %ebx is PIC register in 32-bit, so mustn't clobber it. */ - __asm__ __volatile__ ( - "xchg %%ebx, %1\n" - "cpuid\n" - "xchg %%ebx, %1\n": - "=a" (CPUInfo[0]), - "=r" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "0" (InfoType) - ); -#else - __asm__ __volatile__ ( - "cpuid": - "=a" (CPUInfo[0]), - "=b" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "0" (InfoType) - ); -#endif -#elif defined(CPU_INFO_BY_C) - __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3])); -#endif -} - -#endif - -typedef struct CPU_Feature{ - /* SIMD: 128-bit */ - int HW_SSE; - int HW_SSE2; - int HW_SSE41; - /* SIMD: 256-bit */ - int HW_AVX; -} CPU_Feature; - -static void opus_cpu_feature_check(CPU_Feature *cpu_feature) -{ - unsigned int info[4] = {0}; - unsigned int nIds = 0; - - cpuid(info, 0); - nIds = info[0]; - - if (nIds >= 1){ - cpuid(info, 1); - cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0; - cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0; - cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0; - cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0; - } - else { - cpu_feature->HW_SSE = 0; - cpu_feature->HW_SSE2 = 0; - cpu_feature->HW_SSE41 = 0; - cpu_feature->HW_AVX = 0; - } -} - -int opus_select_arch(void) -{ - CPU_Feature cpu_feature; - int arch; - - opus_cpu_feature_check(&cpu_feature); - - arch = 0; - if (!cpu_feature.HW_SSE) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_SSE2) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_SSE41) - { - return arch; - } - arch++; - - if (!cpu_feature.HW_AVX) - { - return arch; - } - arch++; - - return arch; -} - -#endif diff --git a/thirdparty/opus/celt/x86/x86cpu.h b/thirdparty/opus/celt/x86/x86cpu.h deleted file mode 100644 index 04fd48aac4..0000000000 --- a/thirdparty/opus/celt/x86/x86cpu.h +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if !defined(X86CPU_H) -# define X86CPU_H - -# if defined(OPUS_X86_MAY_HAVE_SSE) -# define MAY_HAVE_SSE(name) name ## _sse -# else -# define MAY_HAVE_SSE(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_SSE2) -# define MAY_HAVE_SSE2(name) name ## _sse2 -# else -# define MAY_HAVE_SSE2(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) -# define MAY_HAVE_SSE4_1(name) name ## _sse4_1 -# else -# define MAY_HAVE_SSE4_1(name) name ## _c -# endif - -# if defined(OPUS_X86_MAY_HAVE_AVX) -# define MAY_HAVE_AVX(name) name ## _avx -# else -# define MAY_HAVE_AVX(name) name ## _c -# endif - -# if defined(OPUS_HAVE_RTCD) -int opus_select_arch(void); -# endif - -/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32() - or _mm_cvtepi16_epi32() when optimizations are disabled, even though the - actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory - reference, these require 16-byte alignment and load a full 16 bytes (instead - of 4 or 8), possibly reading out of bounds. - - We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or - _mm_loadl_epi64(), which should have the same semantics as an m32 or m64 - reference in the PMOVSXWD instruction itself, but gcc is not smart enough to - optimize this out when optimizations ARE enabled. - - Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32 - (which is fair, since technically the compiler is always allowed to do the - dereference before invoking the function implementing the intrinsic). - However, it is smart enough to eliminate the extra MOVD instruction. - For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out - the extra MOVQ if it's specified explicitly */ - -# if defined(__clang__) || !defined(__OPTIMIZE__) -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) -# else -# define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(*(__m128i *)(x))) -#endif - -# if !defined(__OPTIMIZE__) -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) -# else -# define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(*(__m128i *)(x))) -# endif - -#endif diff --git a/thirdparty/opus/config.h b/thirdparty/opus/config.h deleted file mode 100644 index 7b9c92c6a8..0000000000 --- a/thirdparty/opus/config.h +++ /dev/null @@ -1,133 +0,0 @@ -/* Opus configuration header */ -/* Based on the output of libopus configure script */ - -/* Define to 1 if you have the <dlfcn.h> header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the <inttypes.h> header file. */ -#define HAVE_INTTYPES_H 1 - -#if (!defined( _MSC_VER ) || ( _MSC_VER >= 1800 )) - -/* Define to 1 if you have the `lrint' function. */ -#define HAVE_LRINT 1 - -/* Define to 1 if you have the `lrintf' function. */ -#define HAVE_LRINTF 1 - -#endif - -/* Define to 1 if you have the <memory.h> header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the <stdint.h> header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the <stdlib.h> header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the <strings.h> header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the <string.h> header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the <sys/stat.h> header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the <sys/types.h> header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the <unistd.h> header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -#ifdef OPUS_ARM_OPT -/* Make use of ARM asm optimization */ -#define OPUS_ARM_ASM 1 - -/* Use generic ARMv4 inline asm optimizations */ -#define OPUS_ARM_INLINE_ASM 1 - -/* Use ARMv5E inline asm optimizations */ -#define OPUS_ARM_INLINE_EDSP 1 - -/* Use ARMv6 inline asm optimizations */ -#define OPUS_ARM_INLINE_MEDIA 1 - -/* Use ARM NEON inline asm optimizations */ -#define OPUS_ARM_INLINE_NEON 1 - -/* Define if assembler supports EDSP instructions */ -#define OPUS_ARM_MAY_HAVE_EDSP 1 - -/* Define if assembler supports ARMv6 media instructions */ -#define OPUS_ARM_MAY_HAVE_MEDIA 1 - -/* Define if compiler supports NEON instructions */ -#define OPUS_ARM_MAY_HAVE_NEON 1 -#endif // OPUS_ARM_OPT - -#ifdef OPUS_ARM64_OPT -/* Make use of ARM asm optimization */ -#define OPUS_ARM_ASM 1 - -/* Use ARMv6 inline asm optimizations */ -#define OPUS_ARM_INLINE_MEDIA 1 // work - -/* Use ARM NEON inline asm optimizations */ -#define OPUS_ARM_INLINE_NEON 1 // work - -/* Define if assembler supports EDSP instructions */ -#define OPUS_ARM_MAY_HAVE_EDSP 1 // work - -/* Define if assembler supports ARMv6 media instructions */ -#define OPUS_ARM_MAY_HAVE_MEDIA 1 // work - -/* Define if compiler supports NEON instructions */ -#define OPUS_ARM_MAY_HAVE_NEON 1 - -#endif // OPUS_ARM64_OPT - -/* This is a build of OPUS */ -#define OPUS_BUILD /**/ - -#ifndef WIN32 - /* Use C99 variable-size arrays */ - #define VAR_ARRAYS 1 -#else - /* Fixes VS 2013 compile error */ - #define USE_ALLOCA 1 -#endif - -#ifndef OPUS_FIXED_POINT -#define FLOAT_APPROX 1 -#endif - - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to the equivalent of the C99 'restrict' keyword, or to - nothing if this is not supported. Do not define if restrict is - supported directly. */ -#if (!defined( _MSC_VER ) || ( _MSC_VER >= 1800 )) -#define restrict __restrict -#else -#undef restrict -#endif -/* Work around a bug in Sun C++: it does not support _Restrict or - __restrict__, even though the corresponding Sun C compiler ends up with - "#define restrict _Restrict" or "#define restrict __restrict__" in the - previous line. Perhaps some future version of Sun C++ will work with - restrict; if so, hopefully it defines __RESTRICT like Sun C does. */ -#if defined __SUNPRO_CC && !defined __RESTRICT -# define _Restrict -# define __restrict__ -#endif diff --git a/thirdparty/opus/info.c b/thirdparty/opus/info.c deleted file mode 100644 index c36f9a9ee1..0000000000 --- a/thirdparty/opus/info.c +++ /dev/null @@ -1,758 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" -#include <limits.h> -#include <string.h> - -static unsigned op_parse_uint16le(const unsigned char *_data){ - return _data[0]|_data[1]<<8; -} - -static int op_parse_int16le(const unsigned char *_data){ - int ret; - ret=_data[0]|_data[1]<<8; - return (ret^0x8000)-0x8000; -} - -static opus_uint32 op_parse_uint32le(const unsigned char *_data){ - return _data[0]|(opus_uint32)_data[1]<<8| - (opus_uint32)_data[2]<<16|(opus_uint32)_data[3]<<24; -} - -static opus_uint32 op_parse_uint32be(const unsigned char *_data){ - return _data[3]|(opus_uint32)_data[2]<<8| - (opus_uint32)_data[1]<<16|(opus_uint32)_data[0]<<24; -} - -int opus_head_parse(OpusHead *_head,const unsigned char *_data,size_t _len){ - OpusHead head; - if(_len<8)return OP_ENOTFORMAT; - if(memcmp(_data,"OpusHead",8)!=0)return OP_ENOTFORMAT; - if(_len<9)return OP_EBADHEADER; - head.version=_data[8]; - if(head.version>15)return OP_EVERSION; - if(_len<19)return OP_EBADHEADER; - head.channel_count=_data[9]; - head.pre_skip=op_parse_uint16le(_data+10); - head.input_sample_rate=op_parse_uint32le(_data+12); - head.output_gain=op_parse_int16le(_data+16); - head.mapping_family=_data[18]; - if(head.mapping_family==0){ - if(head.channel_count<1||head.channel_count>2)return OP_EBADHEADER; - if(head.version<=1&&_len>19)return OP_EBADHEADER; - head.stream_count=1; - head.coupled_count=head.channel_count-1; - if(_head!=NULL){ - _head->mapping[0]=0; - _head->mapping[1]=1; - } - } - else if(head.mapping_family==1){ - size_t size; - int ci; - if(head.channel_count<1||head.channel_count>8)return OP_EBADHEADER; - size=21+head.channel_count; - if(_len<size||head.version<=1&&_len>size)return OP_EBADHEADER; - head.stream_count=_data[19]; - if(head.stream_count<1)return OP_EBADHEADER; - head.coupled_count=_data[20]; - if(head.coupled_count>head.stream_count)return OP_EBADHEADER; - for(ci=0;ci<head.channel_count;ci++){ - if(_data[21+ci]>=head.stream_count+head.coupled_count - &&_data[21+ci]!=255){ - return OP_EBADHEADER; - } - } - if(_head!=NULL)memcpy(_head->mapping,_data+21,head.channel_count); - } - /*General purpose players should not attempt to play back content with - channel mapping family 255.*/ - else if(head.mapping_family==255)return OP_EIMPL; - /*No other channel mapping families are currently defined.*/ - else return OP_EBADHEADER; - if(_head!=NULL)memcpy(_head,&head,head.mapping-(unsigned char *)&head); - return 0; -} - -void opus_tags_init(OpusTags *_tags){ - memset(_tags,0,sizeof(*_tags)); -} - -void opus_tags_clear(OpusTags *_tags){ - int ncomments; - int ci; - ncomments=_tags->comments; - if(_tags->user_comments!=NULL)ncomments++; - for(ci=ncomments;ci-->0;)_ogg_free(_tags->user_comments[ci]); - _ogg_free(_tags->user_comments); - _ogg_free(_tags->comment_lengths); - _ogg_free(_tags->vendor); -} - -/*Ensure there's room for up to _ncomments comments.*/ -static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){ - char **user_comments; - int *comment_lengths; - int cur_ncomments; - char *binary_suffix_data; - int binary_suffix_len; - size_t size; - if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT; - size=sizeof(*_tags->comment_lengths)*(_ncomments+1); - if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT; - cur_ncomments=_tags->comments; - comment_lengths=_tags->comment_lengths; - binary_suffix_len=comment_lengths==NULL?0:comment_lengths[cur_ncomments]; - comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size); - if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT; - comment_lengths[_ncomments]=binary_suffix_len; - _tags->comment_lengths=comment_lengths; - size=sizeof(*_tags->user_comments)*(_ncomments+1); - if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT; - user_comments=_tags->user_comments; - binary_suffix_data=user_comments==NULL?NULL:user_comments[cur_ncomments]; - user_comments=(char **)_ogg_realloc(_tags->user_comments,size); - if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT; - user_comments[_ncomments]=binary_suffix_data; - _tags->user_comments=user_comments; - return 0; -} - -/*Duplicate a (possibly non-NUL terminated) string with a known length.*/ -static char *op_strdup_with_len(const char *_s,size_t _len){ - size_t size; - char *ret; - size=sizeof(*ret)*(_len+1); - if(OP_UNLIKELY(size<_len))return NULL; - ret=(char *)_ogg_malloc(size); - if(OP_LIKELY(ret!=NULL)){ - ret=(char *)memcpy(ret,_s,sizeof(*ret)*_len); - ret[_len]='\0'; - } - return ret; -} - -/*The actual implementation of opus_tags_parse(). - Unlike the public API, this function requires _tags to already be - initialized, modifies its contents before success is guaranteed, and assumes - the caller will clear it on error.*/ -static int opus_tags_parse_impl(OpusTags *_tags, - const unsigned char *_data,size_t _len){ - opus_uint32 count; - size_t len; - int ncomments; - int ci; - len=_len; - if(len<8)return OP_ENOTFORMAT; - if(memcmp(_data,"OpusTags",8)!=0)return OP_ENOTFORMAT; - if(len<16)return OP_EBADHEADER; - _data+=8; - len-=8; - count=op_parse_uint32le(_data); - _data+=4; - len-=4; - if(count>len)return OP_EBADHEADER; - if(_tags!=NULL){ - _tags->vendor=op_strdup_with_len((char *)_data,count); - if(_tags->vendor==NULL)return OP_EFAULT; - } - _data+=count; - len-=count; - if(len<4)return OP_EBADHEADER; - count=op_parse_uint32le(_data); - _data+=4; - len-=4; - /*Check to make sure there's minimally sufficient data left in the packet.*/ - if(count>len>>2)return OP_EBADHEADER; - /*Check for overflow (the API limits this to an int).*/ - if(count>(opus_uint32)INT_MAX-1)return OP_EFAULT; - if(_tags!=NULL){ - int ret; - ret=op_tags_ensure_capacity(_tags,count); - if(ret<0)return ret; - } - ncomments=(int)count; - for(ci=0;ci<ncomments;ci++){ - /*Check to make sure there's minimally sufficient data left in the packet.*/ - if((size_t)(ncomments-ci)>len>>2)return OP_EBADHEADER; - count=op_parse_uint32le(_data); - _data+=4; - len-=4; - if(count>len)return OP_EBADHEADER; - /*Check for overflow (the API limits this to an int).*/ - if(count>(opus_uint32)INT_MAX)return OP_EFAULT; - if(_tags!=NULL){ - _tags->user_comments[ci]=op_strdup_with_len((char *)_data,count); - if(_tags->user_comments[ci]==NULL)return OP_EFAULT; - _tags->comment_lengths[ci]=(int)count; - _tags->comments=ci+1; - /*Needed by opus_tags_clear() if we fail before parsing the (optional) - binary metadata.*/ - _tags->user_comments[ci+1]=NULL; - } - _data+=count; - len-=count; - } - if(len>0&&(_data[0]&1)){ - if(len>(opus_uint32)INT_MAX)return OP_EFAULT; - if(_tags!=NULL){ - _tags->user_comments[ncomments]=(char *)_ogg_malloc(len); - if(OP_UNLIKELY(_tags->user_comments[ncomments]==NULL))return OP_EFAULT; - memcpy(_tags->user_comments[ncomments],_data,len); - _tags->comment_lengths[ncomments]=(int)len; - } - } - return 0; -} - -int opus_tags_parse(OpusTags *_tags,const unsigned char *_data,size_t _len){ - if(_tags!=NULL){ - OpusTags tags; - int ret; - opus_tags_init(&tags); - ret=opus_tags_parse_impl(&tags,_data,_len); - if(ret<0)opus_tags_clear(&tags); - else *_tags=*&tags; - return ret; - } - else return opus_tags_parse_impl(NULL,_data,_len); -} - -/*The actual implementation of opus_tags_copy(). - Unlike the public API, this function requires _dst to already be - initialized, modifies its contents before success is guaranteed, and assumes - the caller will clear it on error.*/ -static int opus_tags_copy_impl(OpusTags *_dst,const OpusTags *_src){ - char *vendor; - int ncomments; - int ret; - int ci; - vendor=_src->vendor; - _dst->vendor=op_strdup_with_len(vendor,strlen(vendor)); - if(OP_UNLIKELY(_dst->vendor==NULL))return OP_EFAULT; - ncomments=_src->comments; - ret=op_tags_ensure_capacity(_dst,ncomments); - if(OP_UNLIKELY(ret<0))return ret; - for(ci=0;ci<ncomments;ci++){ - int len; - len=_src->comment_lengths[ci]; - OP_ASSERT(len>=0); - _dst->user_comments[ci]=op_strdup_with_len(_src->user_comments[ci],len); - if(OP_UNLIKELY(_dst->user_comments[ci]==NULL))return OP_EFAULT; - _dst->comment_lengths[ci]=len; - _dst->comments=ci+1; - } - if(_src->comment_lengths!=NULL){ - int len; - len=_src->comment_lengths[ncomments]; - if(len>0){ - _dst->user_comments[ncomments]=(char *)_ogg_malloc(len); - if(OP_UNLIKELY(_dst->user_comments[ncomments]==NULL))return OP_EFAULT; - memcpy(_dst->user_comments[ncomments],_src->user_comments[ncomments],len); - _dst->comment_lengths[ncomments]=len; - } - } - return 0; -} - -int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){ - OpusTags dst; - int ret; - opus_tags_init(&dst); - ret=opus_tags_copy_impl(&dst,_src); - if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst); - else *_dst=*&dst; - return 0; -} - -int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){ - char *comment; - int tag_len; - int value_len; - int ncomments; - int ret; - ncomments=_tags->comments; - ret=op_tags_ensure_capacity(_tags,ncomments+1); - if(OP_UNLIKELY(ret<0))return ret; - tag_len=strlen(_tag); - value_len=strlen(_value); - /*+2 for '=' and '\0'.*/ - comment=(char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2)); - if(OP_UNLIKELY(comment==NULL))return OP_EFAULT; - memcpy(comment,_tag,sizeof(*comment)*tag_len); - comment[tag_len]='='; - memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1)); - _tags->user_comments[ncomments]=comment; - _tags->comment_lengths[ncomments]=tag_len+value_len+1; - _tags->comments=ncomments+1; - return 0; -} - -int opus_tags_add_comment(OpusTags *_tags,const char *_comment){ - char *comment; - int comment_len; - int ncomments; - int ret; - ncomments=_tags->comments; - ret=op_tags_ensure_capacity(_tags,ncomments+1); - if(OP_UNLIKELY(ret<0))return ret; - comment_len=(int)strlen(_comment); - comment=op_strdup_with_len(_comment,comment_len); - if(OP_UNLIKELY(comment==NULL))return OP_EFAULT; - _tags->user_comments[ncomments]=comment; - _tags->comment_lengths[ncomments]=comment_len; - _tags->comments=ncomments+1; - return 0; -} - -int opus_tags_set_binary_suffix(OpusTags *_tags, - const unsigned char *_data,int _len){ - unsigned char *binary_suffix_data; - int ncomments; - int ret; - if(_len<0||_len>0&&(_data==NULL||!(_data[0]&1)))return OP_EINVAL; - ncomments=_tags->comments; - ret=op_tags_ensure_capacity(_tags,ncomments); - if(OP_UNLIKELY(ret<0))return ret; - binary_suffix_data= - (unsigned char *)_ogg_realloc(_tags->user_comments[ncomments],_len); - if(OP_UNLIKELY(binary_suffix_data==NULL))return OP_EFAULT; - memcpy(binary_suffix_data,_data,_len); - _tags->user_comments[ncomments]=(char *)binary_suffix_data; - _tags->comment_lengths[ncomments]=_len; - return 0; -} - -int opus_tagcompare(const char *_tag_name,const char *_comment){ - return opus_tagncompare(_tag_name,strlen(_tag_name),_comment); -} - -int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){ - int ret; - OP_ASSERT(_tag_len>=0); - ret=op_strncasecmp(_tag_name,_comment,_tag_len); - return ret?ret:'='-_comment[_tag_len]; -} - -const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){ - char **user_comments; - int tag_len; - int found; - int ncomments; - int ci; - tag_len=strlen(_tag); - ncomments=_tags->comments; - user_comments=_tags->user_comments; - found=0; - for(ci=0;ci<ncomments;ci++){ - if(!opus_tagncompare(_tag,tag_len,user_comments[ci])){ - /*We return a pointer to the data, not a copy.*/ - if(_count==found++)return user_comments[ci]+tag_len+1; - } - } - /*Didn't find anything.*/ - return NULL; -} - -int opus_tags_query_count(const OpusTags *_tags,const char *_tag){ - char **user_comments; - int tag_len; - int found; - int ncomments; - int ci; - tag_len=strlen(_tag); - ncomments=_tags->comments; - user_comments=_tags->user_comments; - found=0; - for(ci=0;ci<ncomments;ci++){ - if(!opus_tagncompare(_tag,tag_len,user_comments[ci]))found++; - } - return found; -} - -const unsigned char *opus_tags_get_binary_suffix(const OpusTags *_tags, - int *_len){ - int ncomments; - int len; - ncomments=_tags->comments; - len=_tags->comment_lengths==NULL?0:_tags->comment_lengths[ncomments]; - *_len=len; - OP_ASSERT(len==0||_tags->user_comments!=NULL); - return len>0?(const unsigned char *)_tags->user_comments[ncomments]:NULL; -} - -static int opus_tags_get_gain(const OpusTags *_tags,int *_gain_q8, - const char *_tag_name,size_t _tag_len){ - char **comments; - int ncomments; - int ci; - comments=_tags->user_comments; - ncomments=_tags->comments; - /*Look for the first valid tag with the name _tag_name and use that.*/ - for(ci=0;ci<ncomments;ci++){ - if(opus_tagncompare(_tag_name,_tag_len,comments[ci])==0){ - char *p; - opus_int32 gain_q8; - int negative; - p=comments[ci]+_tag_len+1; - negative=0; - if(*p=='-'){ - negative=-1; - p++; - } - else if(*p=='+')p++; - gain_q8=0; - while(*p>='0'&&*p<='9'){ - gain_q8=10*gain_q8+*p-'0'; - if(gain_q8>32767-negative)break; - p++; - } - /*This didn't look like a signed 16-bit decimal integer. - Not a valid gain tag.*/ - if(*p!='\0')continue; - *_gain_q8=(int)(gain_q8+negative^negative); - return 0; - } - } - return OP_FALSE; -} - -int opus_tags_get_album_gain(const OpusTags *_tags,int *_gain_q8){ - return opus_tags_get_gain(_tags,_gain_q8,"R128_ALBUM_GAIN",15); -} - -int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){ - return opus_tags_get_gain(_tags,_gain_q8,"R128_TRACK_GAIN",15); -} - -static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0 - &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0; -} - -/*Tries to extract the width, height, bits per pixel, and palette size of a - JPEG. - On failure, simply leaves its outputs unmodified.*/ -static void op_extract_jpeg_params(const unsigned char *_buf,size_t _buf_sz, - opus_uint32 *_width,opus_uint32 *_height, - opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ - if(op_is_jpeg(_buf,_buf_sz)){ - size_t offs; - offs=2; - for(;;){ - size_t segment_len; - int marker; - while(offs<_buf_sz&&_buf[offs]!=0xFF)offs++; - while(offs<_buf_sz&&_buf[offs]==0xFF)offs++; - marker=_buf[offs]; - offs++; - /*If we hit EOI* (end of image), or another SOI* (start of image), - or SOS (start of scan), then stop now.*/ - if(offs>=_buf_sz||(marker>=0xD8&&marker<=0xDA))break; - /*RST* (restart markers): skip (no segment length).*/ - else if(marker>=0xD0&&marker<=0xD7)continue; - /*Read the length of the marker segment.*/ - if(_buf_sz-offs<2)break; - segment_len=_buf[offs]<<8|_buf[offs+1]; - if(segment_len<2||_buf_sz-offs<segment_len)break; - if(marker==0xC0||(marker>0xC0&&marker<0xD0&&(marker&3)!=0)){ - /*Found a SOFn (start of frame) marker segment:*/ - if(segment_len>=8){ - *_height=_buf[offs+3]<<8|_buf[offs+4]; - *_width=_buf[offs+5]<<8|_buf[offs+6]; - *_depth=_buf[offs+2]*_buf[offs+7]; - *_colors=0; - *_has_palette=0; - } - break; - } - /*Other markers: skip the whole marker segment.*/ - offs+=segment_len; - } - } -} - -static int op_is_png(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=8&&memcmp(_buf,"\x89PNG\x0D\x0A\x1A\x0A",8)==0; -} - -/*Tries to extract the width, height, bits per pixel, and palette size of a - PNG. - On failure, simply leaves its outputs unmodified.*/ -static void op_extract_png_params(const unsigned char *_buf,size_t _buf_sz, - opus_uint32 *_width,opus_uint32 *_height, - opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ - if(op_is_png(_buf,_buf_sz)){ - size_t offs; - offs=8; - while(_buf_sz-offs>=12){ - ogg_uint32_t chunk_len; - chunk_len=op_parse_uint32be(_buf+offs); - if(chunk_len>_buf_sz-(offs+12))break; - else if(chunk_len==13&&memcmp(_buf+offs+4,"IHDR",4)==0){ - int color_type; - *_width=op_parse_uint32be(_buf+offs+8); - *_height=op_parse_uint32be(_buf+offs+12); - color_type=_buf[offs+17]; - if(color_type==3){ - *_depth=24; - *_has_palette=1; - } - else{ - int sample_depth; - sample_depth=_buf[offs+16]; - if(color_type==0)*_depth=sample_depth; - else if(color_type==2)*_depth=sample_depth*3; - else if(color_type==4)*_depth=sample_depth*2; - else if(color_type==6)*_depth=sample_depth*4; - *_colors=0; - *_has_palette=0; - break; - } - } - else if(*_has_palette>0&&memcmp(_buf+offs+4,"PLTE",4)==0){ - *_colors=chunk_len/3; - break; - } - offs+=12+chunk_len; - } - } -} - -static int op_is_gif(const unsigned char *_buf,size_t _buf_sz){ - return _buf_sz>=6&&(memcmp(_buf,"GIF87a",6)==0||memcmp(_buf,"GIF89a",6)==0); -} - -/*Tries to extract the width, height, bits per pixel, and palette size of a - GIF. - On failure, simply leaves its outputs unmodified.*/ -static void op_extract_gif_params(const unsigned char *_buf,size_t _buf_sz, - opus_uint32 *_width,opus_uint32 *_height, - opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ - if(op_is_gif(_buf,_buf_sz)&&_buf_sz>=14){ - *_width=_buf[6]|_buf[7]<<8; - *_height=_buf[8]|_buf[9]<<8; - /*libFLAC hard-codes the depth to 24.*/ - *_depth=24; - *_colors=1<<((_buf[10]&7)+1); - *_has_palette=1; - } -} - -/*The actual implementation of opus_picture_tag_parse(). - Unlike the public API, this function requires _pic to already be - initialized, modifies its contents before success is guaranteed, and assumes - the caller will clear it on error.*/ -static int opus_picture_tag_parse_impl(OpusPictureTag *_pic,const char *_tag, - unsigned char *_buf,size_t _buf_sz,size_t _base64_sz){ - opus_int32 picture_type; - opus_uint32 mime_type_length; - char *mime_type; - opus_uint32 description_length; - char *description; - opus_uint32 width; - opus_uint32 height; - opus_uint32 depth; - opus_uint32 colors; - opus_uint32 data_length; - opus_uint32 file_width; - opus_uint32 file_height; - opus_uint32 file_depth; - opus_uint32 file_colors; - int format; - int has_palette; - int colors_set; - size_t i; - /*Decode the BASE64 data.*/ - for(i=0;i<_base64_sz;i++){ - opus_uint32 value; - int j; - value=0; - for(j=0;j<4;j++){ - unsigned c; - unsigned d; - c=(unsigned char)_tag[4*i+j]; - if(c=='+')d=62; - else if(c=='/')d=63; - else if(c>='0'&&c<='9')d=52+c-'0'; - else if(c>='a'&&c<='z')d=26+c-'a'; - else if(c>='A'&&c<='Z')d=c-'A'; - else if(c=='='&&3*i+j>_buf_sz)d=0; - else return OP_ENOTFORMAT; - value=value<<6|d; - } - _buf[3*i]=(unsigned char)(value>>16); - if(3*i+1<_buf_sz){ - _buf[3*i+1]=(unsigned char)(value>>8); - if(3*i+2<_buf_sz)_buf[3*i+2]=(unsigned char)value; - } - } - i=0; - picture_type=op_parse_uint32be(_buf+i); - i+=4; - /*Extract the MIME type.*/ - mime_type_length=op_parse_uint32be(_buf+i); - i+=4; - if(mime_type_length>_buf_sz-32)return OP_ENOTFORMAT; - mime_type=(char *)_ogg_malloc(sizeof(*_pic->mime_type)*(mime_type_length+1)); - if(mime_type==NULL)return OP_EFAULT; - memcpy(mime_type,_buf+i,sizeof(*mime_type)*mime_type_length); - mime_type[mime_type_length]='\0'; - _pic->mime_type=mime_type; - i+=mime_type_length; - /*Extract the description string.*/ - description_length=op_parse_uint32be(_buf+i); - i+=4; - if(description_length>_buf_sz-mime_type_length-32)return OP_ENOTFORMAT; - description= - (char *)_ogg_malloc(sizeof(*_pic->mime_type)*(description_length+1)); - if(description==NULL)return OP_EFAULT; - memcpy(description,_buf+i,sizeof(*description)*description_length); - description[description_length]='\0'; - _pic->description=description; - i+=description_length; - /*Extract the remaining fields.*/ - width=op_parse_uint32be(_buf+i); - i+=4; - height=op_parse_uint32be(_buf+i); - i+=4; - depth=op_parse_uint32be(_buf+i); - i+=4; - colors=op_parse_uint32be(_buf+i); - i+=4; - /*If one of these is set, they all must be, but colors==0 is a valid value.*/ - colors_set=width!=0||height!=0||depth!=0||colors!=0; - if((width==0||height==0||depth==0)&&colors_set)return OP_ENOTFORMAT; - data_length=op_parse_uint32be(_buf+i); - i+=4; - if(data_length>_buf_sz-i)return OP_ENOTFORMAT; - /*Trim extraneous data so we don't copy it below.*/ - _buf_sz=i+data_length; - /*Attempt to determine the image format.*/ - format=OP_PIC_FORMAT_UNKNOWN; - if(mime_type_length==3&&strcmp(mime_type,"-->")==0){ - format=OP_PIC_FORMAT_URL; - /*Picture type 1 must be a 32x32 PNG.*/ - if(picture_type==1&&(width!=0||height!=0)&&(width!=32||height!=32)){ - return OP_ENOTFORMAT; - } - /*Append a terminating NUL for the convenience of our callers.*/ - _buf[_buf_sz++]='\0'; - } - else{ - if(mime_type_length==10 - &&op_strncasecmp(mime_type,"image/jpeg",mime_type_length)==0){ - if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG; - } - else if(mime_type_length==9 - &&op_strncasecmp(mime_type,"image/png",mime_type_length)==0){ - if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG; - } - else if(mime_type_length==9 - &&op_strncasecmp(mime_type,"image/gif",mime_type_length)==0){ - if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF; - } - else if(mime_type_length==0||(mime_type_length==6 - &&op_strncasecmp(mime_type,"image/",mime_type_length)==0)){ - if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG; - else if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG; - else if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF; - } - file_width=file_height=file_depth=file_colors=0; - has_palette=-1; - switch(format){ - case OP_PIC_FORMAT_JPEG:{ - op_extract_jpeg_params(_buf+i,data_length, - &file_width,&file_height,&file_depth,&file_colors,&has_palette); - }break; - case OP_PIC_FORMAT_PNG:{ - op_extract_png_params(_buf+i,data_length, - &file_width,&file_height,&file_depth,&file_colors,&has_palette); - }break; - case OP_PIC_FORMAT_GIF:{ - op_extract_gif_params(_buf+i,data_length, - &file_width,&file_height,&file_depth,&file_colors,&has_palette); - }break; - } - if(has_palette>=0){ - /*If we successfully extracted these parameters from the image, override - any declared values.*/ - width=file_width; - height=file_height; - depth=file_depth; - colors=file_colors; - } - /*Picture type 1 must be a 32x32 PNG.*/ - if(picture_type==1&&(format!=OP_PIC_FORMAT_PNG||width!=32||height!=32)){ - return OP_ENOTFORMAT; - } - } - /*Adjust _buf_sz instead of using data_length to capture the terminating NUL - for URLs.*/ - _buf_sz-=i; - memmove(_buf,_buf+i,sizeof(*_buf)*_buf_sz); - _buf=(unsigned char *)_ogg_realloc(_buf,_buf_sz); - if(_buf_sz>0&&_buf==NULL)return OP_EFAULT; - _pic->type=picture_type; - _pic->width=width; - _pic->height=height; - _pic->depth=depth; - _pic->colors=colors; - _pic->data_length=data_length; - _pic->data=_buf; - _pic->format=format; - return 0; -} - -int opus_picture_tag_parse(OpusPictureTag *_pic,const char *_tag){ - OpusPictureTag pic; - unsigned char *buf; - size_t base64_sz; - size_t buf_sz; - size_t tag_length; - int ret; - if(opus_tagncompare("METADATA_BLOCK_PICTURE",22,_tag)==0)_tag+=23; - /*Figure out how much BASE64-encoded data we have.*/ - tag_length=strlen(_tag); - if(tag_length&3)return OP_ENOTFORMAT; - base64_sz=tag_length>>2; - buf_sz=3*base64_sz; - if(buf_sz<32)return OP_ENOTFORMAT; - if(_tag[tag_length-1]=='=')buf_sz--; - if(_tag[tag_length-2]=='=')buf_sz--; - if(buf_sz<32)return OP_ENOTFORMAT; - /*Allocate an extra byte to allow appending a terminating NUL to URL data.*/ - buf=(unsigned char *)_ogg_malloc(sizeof(*buf)*(buf_sz+1)); - if(buf==NULL)return OP_EFAULT; - opus_picture_tag_init(&pic); - ret=opus_picture_tag_parse_impl(&pic,_tag,buf,buf_sz,base64_sz); - if(ret<0){ - opus_picture_tag_clear(&pic); - _ogg_free(buf); - } - else *_pic=*&pic; - return ret; -} - -void opus_picture_tag_init(OpusPictureTag *_pic){ - memset(_pic,0,sizeof(*_pic)); -} - -void opus_picture_tag_clear(OpusPictureTag *_pic){ - _ogg_free(_pic->description); - _ogg_free(_pic->mime_type); - _ogg_free(_pic->data); -} diff --git a/thirdparty/opus/internal.c b/thirdparty/opus/internal.c deleted file mode 100644 index 96c80def82..0000000000 --- a/thirdparty/opus/internal.c +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" - -#if defined(OP_ENABLE_ASSERTIONS) -void op_fatal_impl(const char *_str,const char *_file,int _line){ - fprintf(stderr,"Fatal (internal) error in %s, line %i: %s\n", - _file,_line,_str); - abort(); -} -#endif - -/*A version of strncasecmp() that is guaranteed to only ignore the case of - ASCII characters.*/ -int op_strncasecmp(const char *_a,const char *_b,int _n){ - int i; - for(i=0;i<_n;i++){ - int a; - int b; - int d; - a=_a[i]; - b=_b[i]; - if(a>='a'&&a<='z')a-='a'-'A'; - if(b>='a'&&b<='z')b-='a'-'A'; - d=a-b; - if(d)return d; - } - return 0; -} diff --git a/thirdparty/opus/internal.h b/thirdparty/opus/internal.h deleted file mode 100644 index ee48ea34c9..0000000000 --- a/thirdparty/opus/internal.h +++ /dev/null @@ -1,254 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ********************************************************************/ -#if !defined(_opusfile_internal_h) -# define _opusfile_internal_h (1) - -# if !defined(_REENTRANT) -# define _REENTRANT -# endif -# if !defined(_GNU_SOURCE) -# define _GNU_SOURCE -# endif -# if !defined(_LARGEFILE_SOURCE) -# define _LARGEFILE_SOURCE -# endif -# if !defined(_LARGEFILE64_SOURCE) -# define _LARGEFILE64_SOURCE -# endif -# if !defined(_FILE_OFFSET_BITS) -# define _FILE_OFFSET_BITS 64 -# endif - -# include <stdlib.h> -# include <opusfile.h> - -typedef struct OggOpusLink OggOpusLink; - -# if defined(OP_FIXED_POINT) - -typedef opus_int16 op_sample; - -# else - -typedef float op_sample; - -/*We're using this define to test for libopus 1.1 or later until libopus - provides a better mechanism.*/ -# if defined(OPUS_GET_EXPERT_FRAME_DURATION_REQUEST) -/*Enable soft clipping prevention in 16-bit decodes.*/ -# define OP_SOFT_CLIP (1) -# endif - -# endif - -# if OP_GNUC_PREREQ(4,2) -/*Disable excessive warnings about the order of operations.*/ -# pragma GCC diagnostic ignored "-Wparentheses" -# elif defined(_MSC_VER) -/*Disable excessive warnings about the order of operations.*/ -# pragma warning(disable:4554) -/*Disable warnings about "deprecated" POSIX functions.*/ -# pragma warning(disable:4996) -# endif - -# if OP_GNUC_PREREQ(3,0) -/*Another alternative is - (__builtin_constant_p(_x)?!!(_x):__builtin_expect(!!(_x),1)) - but that evaluates _x multiple times, which may be bad.*/ -# define OP_LIKELY(_x) (__builtin_expect(!!(_x),1)) -# define OP_UNLIKELY(_x) (__builtin_expect(!!(_x),0)) -# else -# define OP_LIKELY(_x) (!!(_x)) -# define OP_UNLIKELY(_x) (!!(_x)) -# endif - -# if defined(OP_ENABLE_ASSERTIONS) -# if OP_GNUC_PREREQ(2,5)||__SUNPRO_C>=0x590 -__attribute__((noreturn)) -# endif -void op_fatal_impl(const char *_str,const char *_file,int _line); - -# define OP_FATAL(_str) (op_fatal_impl(_str,__FILE__,__LINE__)) - -# define OP_ASSERT(_cond) \ - do{ \ - if(OP_UNLIKELY(!(_cond)))OP_FATAL("assertion failed: " #_cond); \ - } \ - while(0) -# define OP_ALWAYS_TRUE(_cond) OP_ASSERT(_cond) - -# else -# define OP_FATAL(_str) abort() -# define OP_ASSERT(_cond) -# define OP_ALWAYS_TRUE(_cond) ((void)(_cond)) -# endif - -# define OP_INT64_MAX (2*(((ogg_int64_t)1<<62)-1)|1) -# define OP_INT64_MIN (-OP_INT64_MAX-1) -# define OP_INT32_MAX (2*(((ogg_int32_t)1<<30)-1)|1) -# define OP_INT32_MIN (-OP_INT32_MAX-1) - -# define OP_MIN(_a,_b) ((_a)<(_b)?(_a):(_b)) -# define OP_MAX(_a,_b) ((_a)>(_b)?(_a):(_b)) -# define OP_CLAMP(_lo,_x,_hi) (OP_MAX(_lo,OP_MIN(_x,_hi))) - -/*Advance a file offset by the given amount, clamping against OP_INT64_MAX. - This is used to advance a known offset by things like OP_CHUNK_SIZE or - OP_PAGE_SIZE_MAX, while making sure to avoid signed overflow. - It assumes that both _offset and _amount are non-negative.*/ -#define OP_ADV_OFFSET(_offset,_amount) \ - (OP_MIN(_offset,OP_INT64_MAX-(_amount))+(_amount)) - -/*The maximum channel count for any mapping we'll actually decode.*/ -# define OP_NCHANNELS_MAX (8) - -/*Initial state.*/ -# define OP_NOTOPEN (0) -/*We've found the first Opus stream in the first link.*/ -# define OP_PARTOPEN (1) -# define OP_OPENED (2) -/*We've found the first Opus stream in the current link.*/ -# define OP_STREAMSET (3) -/*We've initialized the decoder for the chosen Opus stream in the current - link.*/ -# define OP_INITSET (4) - -/*Information cached for a single link in a chained Ogg Opus file. - We choose the first Opus stream encountered in each link to play back (and - require at least one).*/ -struct OggOpusLink{ - /*The byte offset of the first header page in this link.*/ - opus_int64 offset; - /*The byte offset of the first data page from the chosen Opus stream in this - link (after the headers).*/ - opus_int64 data_offset; - /*The byte offset of the last page from the chosen Opus stream in this link. - This is used when seeking to ensure we find a page before the last one, so - that end-trimming calculations work properly. - This is only valid for seekable sources.*/ - opus_int64 end_offset; - /*The granule position of the last sample. - This is only valid for seekable sources.*/ - ogg_int64_t pcm_end; - /*The granule position before the first sample.*/ - ogg_int64_t pcm_start; - /*The serial number.*/ - ogg_uint32_t serialno; - /*The contents of the info header.*/ - OpusHead head; - /*The contents of the comment header.*/ - OpusTags tags; -}; - -struct OggOpusFile{ - /*The callbacks used to access the data source.*/ - OpusFileCallbacks callbacks; - /*A FILE *, memory bufer, etc.*/ - void *source; - /*Whether or not we can seek with this data source.*/ - int seekable; - /*The number of links in this chained Ogg Opus file.*/ - int nlinks; - /*The cached information from each link in a chained Ogg Opus file. - If source isn't seekable (e.g., it's a pipe), only the current link - appears.*/ - OggOpusLink *links; - /*The number of serial numbers from a single link.*/ - int nserialnos; - /*The capacity of the list of serial numbers from a single link.*/ - int cserialnos; - /*Storage for the list of serial numbers from a single link.*/ - ogg_uint32_t *serialnos; - /*This is the current offset of the data processed by the ogg_sync_state. - After a seek, this should be set to the target offset so that we can track - the byte offsets of subsequent pages. - After a call to op_get_next_page(), this will point to the first byte after - that page.*/ - opus_int64 offset; - /*The total size of this data source, or -1 if it's unseekable.*/ - opus_int64 end; - /*Used to locate pages in the data source.*/ - ogg_sync_state oy; - /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/ - int ready_state; - /*The current link being played back.*/ - int cur_link; - /*The number of decoded samples to discard from the start of decoding.*/ - opus_int32 cur_discard_count; - /*The granule position of the previous packet (current packet start time).*/ - ogg_int64_t prev_packet_gp; - /*The stream offset of the most recent page with completed packets, or -1. - This is only needed to recover continued packet data in the seeking logic, - when we use the current position as one of our bounds, only to later - discover it was the correct starting point.*/ - opus_int64 prev_page_offset; - /*The number of bytes read since the last bitrate query, including framing.*/ - opus_int64 bytes_tracked; - /*The number of samples decoded since the last bitrate query.*/ - ogg_int64_t samples_tracked; - /*Takes physical pages and welds them into a logical stream of packets.*/ - ogg_stream_state os; - /*Re-timestamped packets from a single page. - Buffering these relies on the undocumented libogg behavior that ogg_packet - pointers remain valid until the next page is submitted to the - ogg_stream_state they came from.*/ - ogg_packet op[255]; - /*The index of the next packet to return.*/ - int op_pos; - /*The total number of packets available.*/ - int op_count; - /*Central working state for the packet-to-PCM decoder.*/ - OpusMSDecoder *od; - /*The application-provided packet decode callback.*/ - op_decode_cb_func decode_cb; - /*The application-provided packet decode callback context.*/ - void *decode_cb_ctx; - /*The stream count used to initialize the decoder.*/ - int od_stream_count; - /*The coupled stream count used to initialize the decoder.*/ - int od_coupled_count; - /*The channel count used to initialize the decoder.*/ - int od_channel_count; - /*The channel mapping used to initialize the decoder.*/ - unsigned char od_mapping[OP_NCHANNELS_MAX]; - /*The buffered data for one decoded packet.*/ - op_sample *od_buffer; - /*The current position in the decoded buffer.*/ - int od_buffer_pos; - /*The number of valid samples in the decoded buffer.*/ - int od_buffer_size; - /*The type of gain offset to apply. - One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/ - int gain_type; - /*The offset to apply to the gain.*/ - opus_int32 gain_offset_q8; - /*Internal state for soft clipping and dithering float->short output.*/ -#if !defined(OP_FIXED_POINT) -# if defined(OP_SOFT_CLIP) - float clip_state[OP_NCHANNELS_MAX]; -# endif - float dither_a[OP_NCHANNELS_MAX*4]; - float dither_b[OP_NCHANNELS_MAX*4]; - opus_uint32 dither_seed; - int dither_mute; - int dither_disabled; - /*The number of channels represented by the internal state. - This gets set to 0 whenever anything that would prevent state propagation - occurs (switching between the float/short APIs, or between the - stereo/multistream APIs).*/ - int state_channel_count; -#endif -}; - -int op_strncasecmp(const char *_a,const char *_b,int _n); - -#endif diff --git a/thirdparty/opus/mlp.c b/thirdparty/opus/mlp.c deleted file mode 100644 index ff9e50df47..0000000000 --- a/thirdparty/opus/mlp.c +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2008-2011 Octasic Inc. - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_types.h" -#include "opus_defines.h" - -#include <math.h> -#include "mlp.h" -#include "arch.h" -#include "tansig_table.h" -#define MAX_NEURONS 100 - -#if 0 -static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ -{ - int i; - opus_val16 xx; /* Q11 */ - /*double x, y;*/ - opus_val16 dy, yy; /* Q14 */ - /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(8,19)) - return QCONST32(1.,14); - if (_x<=-QCONST32(8,19)) - return -QCONST32(1.,14); - xx = EXTRACT16(SHR32(_x, 8)); - /*i = lrint(25*x);*/ - i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); - /*x -= .04*i;*/ - xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); - /*x = xx*(1./2048);*/ - /*y = tansig_table[250+i];*/ - yy = tansig_table[250+i]; - /*y = yy*(1./16384);*/ - dy = 16384-MULT16_16_Q14(yy,yy); - yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); - return yy; -} -#else -/*extern const float tansig_table[501];*/ -static OPUS_INLINE float tansig_approx(float x) -{ - int i; - float y, dy; - float sign=1; - /* Tests are reversed to catch NaNs */ - if (!(x<8)) - return 1; - if (!(x>-8)) - return -1; -#ifndef FIXED_POINT - /* Another check in case of -ffast-math */ - if (celt_isnan(x)) - return 0; -#endif - if (x<0) - { - x=-x; - sign=-1; - } - i = (int)floor(.5f+25*x); - x -= .04f*i; - y = tansig_table[i]; - dy = 1-y*y; - y = y + x*dy*(1 - y*x); - return sign*y; -} -#endif - -#if 0 -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) -{ - int j; - opus_val16 hidden[MAX_NEURONS]; - const opus_val16 *W = m->weights; - /* Copy to tmp_in */ - for (j=0;j<m->topo[1];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),8); - for (k=0;k<m->topo[0];k++) - sum = MAC16_16(sum, in[k],*W++); - hidden[j] = tansig_approx(sum); - } - for (j=0;j<m->topo[2];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),14); - for (k=0;k<m->topo[1];k++) - sum = MAC16_16(sum, hidden[k], *W++); - out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); - } -} -#else -void mlp_process(const MLP *m, const float *in, float *out) -{ - int j; - float hidden[MAX_NEURONS]; - const float *W = m->weights; - /* Copy to tmp_in */ - for (j=0;j<m->topo[1];j++) - { - int k; - float sum = *W++; - for (k=0;k<m->topo[0];k++) - sum = sum + in[k]**W++; - hidden[j] = tansig_approx(sum); - } - for (j=0;j<m->topo[2];j++) - { - int k; - float sum = *W++; - for (k=0;k<m->topo[1];k++) - sum = sum + hidden[k]**W++; - out[j] = tansig_approx(sum); - } -} -#endif diff --git a/thirdparty/opus/mlp.h b/thirdparty/opus/mlp.h deleted file mode 100644 index 618e246e2c..0000000000 --- a/thirdparty/opus/mlp.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2008-2011 Octasic Inc. - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _MLP_H_ -#define _MLP_H_ - -#include "arch.h" - -typedef struct { - int layers; - const int *topo; - const float *weights; -} MLP; - -extern const MLP net; - -void mlp_process(const MLP *m, const float *in, float *out); - -#endif /* _MLP_H_ */ diff --git a/thirdparty/opus/mlp_data.c b/thirdparty/opus/mlp_data.c deleted file mode 100644 index c2fda4e2e5..0000000000 --- a/thirdparty/opus/mlp_data.c +++ /dev/null @@ -1,109 +0,0 @@ -/* The contents of this file was automatically generated by mlp_train.c - It contains multi-layer perceptron (MLP) weights. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "mlp.h" - -/* RMS error was 0.138320, seed was 1361535663 */ - -static const float weights[422] = { - -/* hidden layer */ --0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f, --0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f, --0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f, -0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f, -0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f, -24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f, --0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f, --0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f, --0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f, -1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f, -15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f, -0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f, --0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f, -0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f, -0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f, --1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f, --0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f, --0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f, -0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f, --0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f, -2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f, -0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f, --0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f, -0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f, -0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f, --4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f, -5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f, --0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f, --0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f, --0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f, -1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f, --7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f, --0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f, -0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f, -0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f, --0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f, -10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f, --0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f, --0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f, --0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f, -0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f, --0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f, -0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f, -0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f, --0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f, -0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f, --0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f, --0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f, --0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f, --0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f, --0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f, -5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f, -1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f, -0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f, --0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f, -0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f, --0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f, --975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f, -0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f, --0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f, --2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f, -0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f, --6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f, -0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f, --0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f, --0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f, -0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f, --0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f, -0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f, --0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f, -0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f, --2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f, -4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f, -0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f, --0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f, -0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f, -0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f, -3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f, - -/* output layer */ --0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f, -0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f, -0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f, -0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f, -4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f, --1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f, -3.87308f, 3.52558f}; - -static const int topo[3] = {25, 15, 2}; - -const MLP net = { - 3, - topo, - weights -}; diff --git a/thirdparty/opus/opus.c b/thirdparty/opus/opus.c deleted file mode 100644 index f76f125cfa..0000000000 --- a/thirdparty/opus/opus.c +++ /dev/null @@ -1,356 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus.h" -#include "opus_private.h" - -#ifndef DISABLE_FLOAT_API -OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem) -{ - int c; - int i; - float *x; - - if (C<1 || N<1 || !_x || !declip_mem) return; - - /* First thing: saturate everything to +/- 2 which is the highest level our - non-linearity can handle. At the point where the signal reaches +/-2, - the derivative will be zero anyway, so this doesn't introduce any - discontinuity in the derivative. */ - for (i=0;i<N*C;i++) - _x[i] = MAX16(-2.f, MIN16(2.f, _x[i])); - for (c=0;c<C;c++) - { - float a; - float x0; - int curr; - - x = _x+c; - a = declip_mem[c]; - /* Continue applying the non-linearity from the previous frame to avoid - any discontinuity. */ - for (i=0;i<N;i++) - { - if (x[i*C]*a>=0) - break; - x[i*C] = x[i*C]+a*x[i*C]*x[i*C]; - } - - curr=0; - x0 = x[0]; - while(1) - { - int start, end; - float maxval; - int special=0; - int peak_pos; - for (i=curr;i<N;i++) - { - if (x[i*C]>1 || x[i*C]<-1) - break; - } - if (i==N) - { - a=0; - break; - } - peak_pos = i; - start=end=i; - maxval=ABS16(x[i*C]); - /* Look for first zero crossing before clipping */ - while (start>0 && x[i*C]*x[(start-1)*C]>=0) - start--; - /* Look for first zero crossing after clipping */ - while (end<N && x[i*C]*x[end*C]>=0) - { - /* Look for other peaks until the next zero-crossing. */ - if (ABS16(x[end*C])>maxval) - { - maxval = ABS16(x[end*C]); - peak_pos = end; - } - end++; - } - /* Detect the special case where we clip before the first zero crossing */ - special = (start==0 && x[i*C]*x[0]>=0); - - /* Compute a such that maxval + a*maxval^2 = 1 */ - a=(maxval-1)/(maxval*maxval); - /* Slightly boost "a" by 2^-22. This is just enough to ensure -ffast-math - does not cause output values larger than +/-1, but small enough not - to matter even for 24-bit output. */ - a += a*2.4e-7; - if (x[i*C]>0) - a = -a; - /* Apply soft clipping */ - for (i=start;i<end;i++) - x[i*C] = x[i*C]+a*x[i*C]*x[i*C]; - - if (special && peak_pos>=2) - { - /* Add a linear ramp from the first sample to the signal peak. - This avoids a discontinuity at the beginning of the frame. */ - float delta; - float offset = x0-x[0]; - delta = offset / peak_pos; - for (i=curr;i<peak_pos;i++) - { - offset -= delta; - x[i*C] += offset; - x[i*C] = MAX16(-1.f, MIN16(1.f, x[i*C])); - } - } - curr = end; - if (curr==N) - break; - } - declip_mem[c] = a; - } -} -#endif - -int encode_size(int size, unsigned char *data) -{ - if (size < 252) - { - data[0] = size; - return 1; - } else { - data[0] = 252+(size&0x3); - data[1] = (size-(int)data[0])>>2; - return 2; - } -} - -static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size) -{ - if (len<1) - { - *size = -1; - return -1; - } else if (data[0]<252) - { - *size = data[0]; - return 1; - } else if (len<2) - { - *size = -1; - return -1; - } else { - *size = 4*data[1] + data[0]; - return 2; - } -} - -int opus_packet_get_samples_per_frame(const unsigned char *data, - opus_int32 Fs) -{ - int audiosize; - if (data[0]&0x80) - { - audiosize = ((data[0]>>3)&0x3); - audiosize = (Fs<<audiosize)/400; - } else if ((data[0]&0x60) == 0x60) - { - audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; - } else { - audiosize = ((data[0]>>3)&0x3); - if (audiosize == 3) - audiosize = Fs*60/1000; - else - audiosize = (Fs<<audiosize)/100; - } - return audiosize; -} - -int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, - int self_delimited, unsigned char *out_toc, - const unsigned char *frames[48], opus_int16 size[48], - int *payload_offset, opus_int32 *packet_offset) -{ - int i, bytes; - int count; - int cbr; - unsigned char ch, toc; - int framesize; - opus_int32 last_size; - opus_int32 pad = 0; - const unsigned char *data0 = data; - - if (size==NULL || len<0) - return OPUS_BAD_ARG; - if (len==0) - return OPUS_INVALID_PACKET; - - framesize = opus_packet_get_samples_per_frame(data, 48000); - - cbr = 0; - toc = *data++; - len--; - last_size = len; - switch (toc&0x3) - { - /* One frame */ - case 0: - count=1; - break; - /* Two CBR frames */ - case 1: - count=2; - cbr = 1; - if (!self_delimited) - { - if (len&0x1) - return OPUS_INVALID_PACKET; - last_size = len/2; - /* If last_size doesn't fit in size[0], we'll catch it later */ - size[0] = (opus_int16)last_size; - } - break; - /* Two VBR frames */ - case 2: - count = 2; - bytes = parse_size(data, len, size); - len -= bytes; - if (size[0]<0 || size[0] > len) - return OPUS_INVALID_PACKET; - data += bytes; - last_size = len-size[0]; - break; - /* Multiple CBR/VBR frames (from 0 to 120 ms) */ - default: /*case 3:*/ - if (len<1) - return OPUS_INVALID_PACKET; - /* Number of frames encoded in bits 0 to 5 */ - ch = *data++; - count = ch&0x3F; - if (count <= 0 || framesize*count > 5760) - return OPUS_INVALID_PACKET; - len--; - /* Padding flag is bit 6 */ - if (ch&0x40) - { - int p; - do { - int tmp; - if (len<=0) - return OPUS_INVALID_PACKET; - p = *data++; - len--; - tmp = p==255 ? 254: p; - len -= tmp; - pad += tmp; - } while (p==255); - } - if (len<0) - return OPUS_INVALID_PACKET; - /* VBR flag is bit 7 */ - cbr = !(ch&0x80); - if (!cbr) - { - /* VBR case */ - last_size = len; - for (i=0;i<count-1;i++) - { - bytes = parse_size(data, len, size+i); - len -= bytes; - if (size[i]<0 || size[i] > len) - return OPUS_INVALID_PACKET; - data += bytes; - last_size -= bytes+size[i]; - } - if (last_size<0) - return OPUS_INVALID_PACKET; - } else if (!self_delimited) - { - /* CBR case */ - last_size = len/count; - if (last_size*count!=len) - return OPUS_INVALID_PACKET; - for (i=0;i<count-1;i++) - size[i] = (opus_int16)last_size; - } - break; - } - /* Self-delimited framing has an extra size for the last frame. */ - if (self_delimited) - { - bytes = parse_size(data, len, size+count-1); - len -= bytes; - if (size[count-1]<0 || size[count-1] > len) - return OPUS_INVALID_PACKET; - data += bytes; - /* For CBR packets, apply the size to all the frames. */ - if (cbr) - { - if (size[count-1]*count > len) - return OPUS_INVALID_PACKET; - for (i=0;i<count-1;i++) - size[i] = size[count-1]; - } else if (bytes+size[count-1] > last_size) - return OPUS_INVALID_PACKET; - } else - { - /* Because it's not encoded explicitly, it's possible the size of the - last packet (or all the packets, for the CBR case) is larger than - 1275. Reject them here.*/ - if (last_size > 1275) - return OPUS_INVALID_PACKET; - size[count-1] = (opus_int16)last_size; - } - - if (payload_offset) - *payload_offset = (int)(data-data0); - - for (i=0;i<count;i++) - { - if (frames) - frames[i] = data; - data += size[i]; - } - - if (packet_offset) - *packet_offset = pad+(opus_int32)(data-data0); - - if (out_toc) - *out_toc = toc; - - return count; -} - -int opus_packet_parse(const unsigned char *data, opus_int32 len, - unsigned char *out_toc, const unsigned char *frames[48], - opus_int16 size[48], int *payload_offset) -{ - return opus_packet_parse_impl(data, len, 0, out_toc, - frames, size, payload_offset, NULL); -} - diff --git a/thirdparty/opus/opus/opus.h b/thirdparty/opus/opus/opus.h deleted file mode 100644 index 5be73ddf4e..0000000000 --- a/thirdparty/opus/opus/opus.h +++ /dev/null @@ -1,981 +0,0 @@ -/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file opus.h - * @brief Opus reference implementation API - */ - -#ifndef OPUS_H -#define OPUS_H - -#include "opus_types.h" -#include "opus_defines.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @mainpage Opus - * - * The Opus codec is designed for interactive speech and audio transmission over the Internet. - * It is designed by the IETF Codec Working Group and incorporates technology from - * Skype's SILK codec and Xiph.Org's CELT codec. - * - * The Opus codec is designed to handle a wide range of interactive audio applications, - * including Voice over IP, videoconferencing, in-game chat, and even remote live music - * performances. It can scale from low bit-rate narrowband speech to very high quality - * stereo music. Its main features are: - - * @li Sampling rates from 8 to 48 kHz - * @li Bit-rates from 6 kb/s to 510 kb/s - * @li Support for both constant bit-rate (CBR) and variable bit-rate (VBR) - * @li Audio bandwidth from narrowband to full-band - * @li Support for speech and music - * @li Support for mono and stereo - * @li Support for multichannel (up to 255 channels) - * @li Frame sizes from 2.5 ms to 60 ms - * @li Good loss robustness and packet loss concealment (PLC) - * @li Floating point and fixed-point implementation - * - * Documentation sections: - * @li @ref opus_encoder - * @li @ref opus_decoder - * @li @ref opus_repacketizer - * @li @ref opus_multistream - * @li @ref opus_libinfo - * @li @ref opus_custom - */ - -/** @defgroup opus_encoder Opus Encoder - * @{ - * - * @brief This page describes the process and functions used to encode Opus. - * - * Since Opus is a stateful codec, the encoding process starts with creating an encoder - * state. This can be done with: - * - * @code - * int error; - * OpusEncoder *enc; - * enc = opus_encoder_create(Fs, channels, application, &error); - * @endcode - * - * From this point, @c enc can be used for encoding an audio stream. An encoder state - * @b must @b not be used for more than one stream at the same time. Similarly, the encoder - * state @b must @b not be re-initialized for each frame. - * - * While opus_encoder_create() allocates memory for the state, it's also possible - * to initialize pre-allocated memory: - * - * @code - * int size; - * int error; - * OpusEncoder *enc; - * size = opus_encoder_get_size(channels); - * enc = malloc(size); - * error = opus_encoder_init(enc, Fs, channels, application); - * @endcode - * - * where opus_encoder_get_size() returns the required size for the encoder state. Note that - * future versions of this code may change the size, so no assuptions should be made about it. - * - * The encoder state is always continuous in memory and only a shallow copy is sufficient - * to copy it (e.g. memcpy()) - * - * It is possible to change some of the encoder's settings using the opus_encoder_ctl() - * interface. All these settings already default to the recommended value, so they should - * only be changed when necessary. The most common settings one may want to change are: - * - * @code - * opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate)); - * opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity)); - * opus_encoder_ctl(enc, OPUS_SET_SIGNAL(signal_type)); - * @endcode - * - * where - * - * @arg bitrate is in bits per second (b/s) - * @arg complexity is a value from 1 to 10, where 1 is the lowest complexity and 10 is the highest - * @arg signal_type is either OPUS_AUTO (default), OPUS_SIGNAL_VOICE, or OPUS_SIGNAL_MUSIC - * - * See @ref opus_encoderctls and @ref opus_genericctls for a complete list of parameters that can be set or queried. Most parameters can be set or changed at any time during a stream. - * - * To encode a frame, opus_encode() or opus_encode_float() must be called with exactly one frame (2.5, 5, 10, 20, 40 or 60 ms) of audio data: - * @code - * len = opus_encode(enc, audio_frame, frame_size, packet, max_packet); - * @endcode - * - * where - * <ul> - * <li>audio_frame is the audio data in opus_int16 (or float for opus_encode_float())</li> - * <li>frame_size is the duration of the frame in samples (per channel)</li> - * <li>packet is the byte array to which the compressed data is written</li> - * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended). - * Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li> - * </ul> - * - * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet. - * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value - * is 2 bytes or less, then the packet does not need to be transmitted (DTX). - * - * Once the encoder state if no longer needed, it can be destroyed with - * - * @code - * opus_encoder_destroy(enc); - * @endcode - * - * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(), - * then no action is required aside from potentially freeing the memory that was manually - * allocated for it (calling free(enc) for the example above) - * - */ - -/** Opus encoder state. - * This contains the complete state of an Opus encoder. - * It is position independent and can be freely copied. - * @see opus_encoder_create,opus_encoder_init - */ -typedef struct OpusEncoder OpusEncoder; - -/** Gets the size of an <code>OpusEncoder</code> structure. - * @param[in] channels <tt>int</tt>: Number of channels. - * This must be 1 or 2. - * @returns The size in bytes. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_encoder_get_size(int channels); - -/** - */ - -/** Allocates and initializes an encoder state. - * There are three coding modes: - * - * @ref OPUS_APPLICATION_VOIP gives best quality at a given bitrate for voice - * signals. It enhances the input signal by high-pass filtering and - * emphasizing formants and harmonics. Optionally it includes in-band - * forward error correction to protect against packet loss. Use this - * mode for typical VoIP applications. Because of the enhancement, - * even at high bitrates the output may sound different from the input. - * - * @ref OPUS_APPLICATION_AUDIO gives best quality at a given bitrate for most - * non-voice signals like music. Use this mode for music and mixed - * (music/voice) content, broadcast, and applications requiring less - * than 15 ms of coding delay. - * - * @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY configures low-delay mode that - * disables the speech-optimized mode in exchange for slightly reduced delay. - * This mode can only be set on an newly initialized or freshly reset encoder - * because it changes the codec delay. - * - * This is useful when the caller knows that the speech-optimized modes will not be needed (use with caution). - * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz) - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal - * @param [in] application <tt>int</tt>: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY) - * @param [out] error <tt>int*</tt>: @ref opus_errorcodes - * @note Regardless of the sampling rate and number channels selected, the Opus encoder - * can switch to a lower audio bandwidth or number of channels if the bitrate - * selected is too low. This also means that it is safe to always use 48 kHz stereo input - * and let the encoder optimize the encoding. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create( - opus_int32 Fs, - int channels, - int application, - int *error -); - -/** Initializes a previously allocated encoder state - * The memory pointed to by st must be at least the size returned by opus_encoder_get_size(). - * This is intended for applications which use their own allocator instead of malloc. - * @see opus_encoder_create(),opus_encoder_get_size() - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @param [in] st <tt>OpusEncoder*</tt>: Encoder state - * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz) - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal - * @param [in] application <tt>int</tt>: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY) - * @retval #OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_EXPORT int opus_encoder_init( - OpusEncoder *st, - opus_int32 Fs, - int channels, - int application -) OPUS_ARG_NONNULL(1); - -/** Encodes an Opus frame. - * @param [in] st <tt>OpusEncoder*</tt>: Encoder state - * @param [in] pcm <tt>opus_int16*</tt>: Input signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16) - * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the - * input signal. - * This must be an Opus frame size for - * the encoder's sampling rate. - * For example, at 48 kHz the permitted - * values are 120, 240, 480, 960, 1920, - * and 2880. - * Passing in a duration of less than - * 10 ms (480 samples at 48 kHz) will - * prevent the encoder from using the LPC - * or hybrid modes. - * @param [out] data <tt>unsigned char*</tt>: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode( - OpusEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Encodes an Opus frame from floating point input. - * @param [in] st <tt>OpusEncoder*</tt>: Encoder state - * @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0. - * Samples with a range beyond +/-1.0 are supported but will - * be clipped by decoders using the integer API and should - * only be used if it is known that the far end supports - * extended dynamic range. - * length is frame_size*channels*sizeof(float) - * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the - * input signal. - * This must be an Opus frame size for - * the encoder's sampling rate. - * For example, at 48 kHz the permitted - * values are 120, 240, 480, 960, 1920, - * and 2880. - * Passing in a duration of less than - * 10 ms (480 samples at 48 kHz) will - * prevent the encoder from using the LPC - * or hybrid modes. - * @param [out] data <tt>unsigned char*</tt>: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode_float( - OpusEncoder *st, - const float *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Frees an <code>OpusEncoder</code> allocated by opus_encoder_create(). - * @param[in] st <tt>OpusEncoder*</tt>: State to be freed. - */ -OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st); - -/** Perform a CTL function on an Opus encoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @param st <tt>OpusEncoder*</tt>: Encoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls or - * @ref opus_encoderctls. - * @see opus_genericctls - * @see opus_encoderctls - */ -OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); -/**@}*/ - -/** @defgroup opus_decoder Opus Decoder - * @{ - * - * @brief This page describes the process and functions used to decode Opus. - * - * The decoding process also starts with creating a decoder - * state. This can be done with: - * @code - * int error; - * OpusDecoder *dec; - * dec = opus_decoder_create(Fs, channels, &error); - * @endcode - * where - * @li Fs is the sampling rate and must be 8000, 12000, 16000, 24000, or 48000 - * @li channels is the number of channels (1 or 2) - * @li error will hold the error code in case of failure (or #OPUS_OK on success) - * @li the return value is a newly created decoder state to be used for decoding - * - * While opus_decoder_create() allocates memory for the state, it's also possible - * to initialize pre-allocated memory: - * @code - * int size; - * int error; - * OpusDecoder *dec; - * size = opus_decoder_get_size(channels); - * dec = malloc(size); - * error = opus_decoder_init(dec, Fs, channels); - * @endcode - * where opus_decoder_get_size() returns the required size for the decoder state. Note that - * future versions of this code may change the size, so no assuptions should be made about it. - * - * The decoder state is always continuous in memory and only a shallow copy is sufficient - * to copy it (e.g. memcpy()) - * - * To decode a frame, opus_decode() or opus_decode_float() must be called with a packet of compressed audio data: - * @code - * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0); - * @endcode - * where - * - * @li packet is the byte array containing the compressed data - * @li len is the exact number of bytes contained in the packet - * @li decoded is the decoded audio data in opus_int16 (or float for opus_decode_float()) - * @li max_size is the max duration of the frame in samples (per channel) that can fit into the decoded_frame array - * - * opus_decode() and opus_decode_float() return the number of samples (per channel) decoded from the packet. - * If that value is negative, then an error has occurred. This can occur if the packet is corrupted or if the audio - * buffer is too small to hold the decoded audio. - * - * Opus is a stateful codec with overlapping blocks and as a result Opus - * packets are not coded independently of each other. Packets must be - * passed into the decoder serially and in the correct order for a correct - * decode. Lost packets can be replaced with loss concealment by calling - * the decoder with a null pointer and zero length for the missing packet. - * - * A single codec state may only be accessed from a single thread at - * a time and any required locking must be performed by the caller. Separate - * streams must be decoded with separate decoder states and can be decoded - * in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK - * defined. - * - */ - -/** Opus decoder state. - * This contains the complete state of an Opus decoder. - * It is position independent and can be freely copied. - * @see opus_decoder_create,opus_decoder_init - */ -typedef struct OpusDecoder OpusDecoder; - -/** Gets the size of an <code>OpusDecoder</code> structure. - * @param [in] channels <tt>int</tt>: Number of channels. - * This must be 1 or 2. - * @returns The size in bytes. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_size(int channels); - -/** Allocates and initializes a decoder state. - * @param [in] Fs <tt>opus_int32</tt>: Sample rate to decode at (Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode - * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes - * - * Internally Opus stores data at 48000 Hz, so that should be the default - * value for Fs. However, the decoder can efficiently decode to buffers - * at 8, 12, 16, and 24 kHz so if for some reason the caller cannot use - * data at the full sample rate, or knows the compressed data doesn't - * use the full frequency range, it can request decoding at a reduced - * rate. Likewise, the decoder is capable of filling in either mono or - * interleaved stereo pcm buffers, at the caller's request. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusDecoder *opus_decoder_create( - opus_int32 Fs, - int channels, - int *error -); - -/** Initializes a previously allocated decoder state. - * The state must be at least the size returned by opus_decoder_get_size(). - * This is intended for applications which use their own allocator instead of malloc. @see opus_decoder_create,opus_decoder_get_size - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @param [in] st <tt>OpusDecoder*</tt>: Decoder state. - * @param [in] Fs <tt>opus_int32</tt>: Sampling rate to decode to (Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode - * @retval #OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_EXPORT int opus_decoder_init( - OpusDecoder *st, - opus_int32 Fs, - int channels -) OPUS_ARG_NONNULL(1); - -/** Decode an Opus packet. - * @param [in] st <tt>OpusDecoder*</tt>: Decoder state - * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload* - * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(opus_int16) - * @param [in] frame_size Number of samples per channel of available space in \a pcm. - * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will - * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1), - * then frame_size needs to be exactly the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and - * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms. - * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be - * decoded. If no such data is available, the frame is decoded as if it were lost. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode( - OpusDecoder *st, - const unsigned char *data, - opus_int32 len, - opus_int16 *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Decode an Opus packet with floating point output. - * @param [in] st <tt>OpusDecoder*</tt>: Decoder state - * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload - * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(float) - * @param [in] frame_size Number of samples per channel of available space in \a pcm. - * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will - * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1), - * then frame_size needs to be exactly the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and - * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms. - * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be - * decoded. If no such data is available the frame is decoded as if it were lost. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode_float( - OpusDecoder *st, - const unsigned char *data, - opus_int32 len, - float *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on an Opus decoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @param st <tt>OpusDecoder*</tt>: Decoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls or - * @ref opus_decoderctls. - * @see opus_genericctls - * @see opus_decoderctls - */ -OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); - -/** Frees an <code>OpusDecoder</code> allocated by opus_decoder_create(). - * @param[in] st <tt>OpusDecoder*</tt>: State to be freed. - */ -OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st); - -/** Parse an opus packet into one or more frames. - * Opus_decode will perform this operation internally so most applications do - * not need to use this function. - * This function does not copy the frames, the returned pointers are pointers into - * the input packet. - * @param [in] data <tt>char*</tt>: Opus packet to be parsed - * @param [in] len <tt>opus_int32</tt>: size of data - * @param [out] out_toc <tt>char*</tt>: TOC pointer - * @param [out] frames <tt>char*[48]</tt> encapsulated frames - * @param [out] size <tt>opus_int16[48]</tt> sizes of the encapsulated frames - * @param [out] payload_offset <tt>int*</tt>: returns the position of the payload within the packet (in bytes) - * @returns number of frames - */ -OPUS_EXPORT int opus_packet_parse( - const unsigned char *data, - opus_int32 len, - unsigned char *out_toc, - const unsigned char *frames[48], - opus_int16 size[48], - int *payload_offset -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Gets the bandwidth of an Opus packet. - * @param [in] data <tt>char*</tt>: Opus packet - * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass) - * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass) - * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass) - * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass) - * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass) - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_bandwidth(const unsigned char *data) OPUS_ARG_NONNULL(1); - -/** Gets the number of samples per frame from an Opus packet. - * @param [in] data <tt>char*</tt>: Opus packet. - * This must contain at least one byte of - * data. - * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz. - * This must be a multiple of 400, or - * inaccurate results will be returned. - * @returns Number of samples per frame. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_samples_per_frame(const unsigned char *data, opus_int32 Fs) OPUS_ARG_NONNULL(1); - -/** Gets the number of channels from an Opus packet. - * @param [in] data <tt>char*</tt>: Opus packet - * @returns Number of channels - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_channels(const unsigned char *data) OPUS_ARG_NONNULL(1); - -/** Gets the number of frames in an Opus packet. - * @param [in] packet <tt>char*</tt>: Opus packet - * @param [in] len <tt>opus_int32</tt>: Length of packet - * @returns Number of frames - * @retval OPUS_BAD_ARG Insufficient data was passed to the function - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1); - -/** Gets the number of samples of an Opus packet. - * @param [in] packet <tt>char*</tt>: Opus packet - * @param [in] len <tt>opus_int32</tt>: Length of packet - * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz. - * This must be a multiple of 400, or - * inaccurate results will be returned. - * @returns Number of samples - * @retval OPUS_BAD_ARG Insufficient data was passed to the function - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1); - -/** Gets the number of samples of an Opus packet. - * @param [in] dec <tt>OpusDecoder*</tt>: Decoder state - * @param [in] packet <tt>char*</tt>: Opus packet - * @param [in] len <tt>opus_int32</tt>: Length of packet - * @returns Number of samples - * @retval OPUS_BAD_ARG Insufficient data was passed to the function - * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - -/** Applies soft-clipping to bring a float signal within the [-1,1] range. If - * the signal is already in that range, nothing is done. If there are values - * outside of [-1,1], then the signal is clipped as smoothly as possible to - * both fit in the range and avoid creating excessive distortion in the - * process. - * @param [in,out] pcm <tt>float*</tt>: Input PCM and modified PCM - * @param [in] frame_size <tt>int</tt> Number of samples per channel to process - * @param [in] channels <tt>int</tt>: Number of channels - * @param [in,out] softclip_mem <tt>float*</tt>: State memory for the soft clipping process (one float per channel, initialized to zero) - */ -OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, float *softclip_mem); - - -/**@}*/ - -/** @defgroup opus_repacketizer Repacketizer - * @{ - * - * The repacketizer can be used to merge multiple Opus packets into a single - * packet or alternatively to split Opus packets that have previously been - * merged. Splitting valid Opus packets is always guaranteed to succeed, - * whereas merging valid packets only succeeds if all frames have the same - * mode, bandwidth, and frame size, and when the total duration of the merged - * packet is no more than 120 ms. The 120 ms limit comes from the - * specification and limits decoder memory requirements at a point where - * framing overhead becomes negligible. - * - * The repacketizer currently only operates on elementary Opus - * streams. It will not manipualte multistream packets successfully, except in - * the degenerate case where they consist of data from a single stream. - * - * The repacketizing process starts with creating a repacketizer state, either - * by calling opus_repacketizer_create() or by allocating the memory yourself, - * e.g., - * @code - * OpusRepacketizer *rp; - * rp = (OpusRepacketizer*)malloc(opus_repacketizer_get_size()); - * if (rp != NULL) - * opus_repacketizer_init(rp); - * @endcode - * - * Then the application should submit packets with opus_repacketizer_cat(), - * extract new packets with opus_repacketizer_out() or - * opus_repacketizer_out_range(), and then reset the state for the next set of - * input packets via opus_repacketizer_init(). - * - * For example, to split a sequence of packets into individual frames: - * @code - * unsigned char *data; - * int len; - * while (get_next_packet(&data, &len)) - * { - * unsigned char out[1276]; - * opus_int32 out_len; - * int nb_frames; - * int err; - * int i; - * err = opus_repacketizer_cat(rp, data, len); - * if (err != OPUS_OK) - * { - * release_packet(data); - * return err; - * } - * nb_frames = opus_repacketizer_get_nb_frames(rp); - * for (i = 0; i < nb_frames; i++) - * { - * out_len = opus_repacketizer_out_range(rp, i, i+1, out, sizeof(out)); - * if (out_len < 0) - * { - * release_packet(data); - * return (int)out_len; - * } - * output_next_packet(out, out_len); - * } - * opus_repacketizer_init(rp); - * release_packet(data); - * } - * @endcode - * - * Alternatively, to combine a sequence of frames into packets that each - * contain up to <code>TARGET_DURATION_MS</code> milliseconds of data: - * @code - * // The maximum number of packets with duration TARGET_DURATION_MS occurs - * // when the frame size is 2.5 ms, for a total of (TARGET_DURATION_MS*2/5) - * // packets. - * unsigned char *data[(TARGET_DURATION_MS*2/5)+1]; - * opus_int32 len[(TARGET_DURATION_MS*2/5)+1]; - * int nb_packets; - * unsigned char out[1277*(TARGET_DURATION_MS*2/2)]; - * opus_int32 out_len; - * int prev_toc; - * nb_packets = 0; - * while (get_next_packet(data+nb_packets, len+nb_packets)) - * { - * int nb_frames; - * int err; - * nb_frames = opus_packet_get_nb_frames(data[nb_packets], len[nb_packets]); - * if (nb_frames < 1) - * { - * release_packets(data, nb_packets+1); - * return nb_frames; - * } - * nb_frames += opus_repacketizer_get_nb_frames(rp); - * // If adding the next packet would exceed our target, or it has an - * // incompatible TOC sequence, output the packets we already have before - * // submitting it. - * // N.B., The nb_packets > 0 check ensures we've submitted at least one - * // packet since the last call to opus_repacketizer_init(). Otherwise a - * // single packet longer than TARGET_DURATION_MS would cause us to try to - * // output an (invalid) empty packet. It also ensures that prev_toc has - * // been set to a valid value. Additionally, len[nb_packets] > 0 is - * // guaranteed by the call to opus_packet_get_nb_frames() above, so the - * // reference to data[nb_packets][0] should be valid. - * if (nb_packets > 0 && ( - * ((prev_toc & 0xFC) != (data[nb_packets][0] & 0xFC)) || - * opus_packet_get_samples_per_frame(data[nb_packets], 48000)*nb_frames > - * TARGET_DURATION_MS*48)) - * { - * out_len = opus_repacketizer_out(rp, out, sizeof(out)); - * if (out_len < 0) - * { - * release_packets(data, nb_packets+1); - * return (int)out_len; - * } - * output_next_packet(out, out_len); - * opus_repacketizer_init(rp); - * release_packets(data, nb_packets); - * data[0] = data[nb_packets]; - * len[0] = len[nb_packets]; - * nb_packets = 0; - * } - * err = opus_repacketizer_cat(rp, data[nb_packets], len[nb_packets]); - * if (err != OPUS_OK) - * { - * release_packets(data, nb_packets+1); - * return err; - * } - * prev_toc = data[nb_packets][0]; - * nb_packets++; - * } - * // Output the final, partial packet. - * if (nb_packets > 0) - * { - * out_len = opus_repacketizer_out(rp, out, sizeof(out)); - * release_packets(data, nb_packets); - * if (out_len < 0) - * return (int)out_len; - * output_next_packet(out, out_len); - * } - * @endcode - * - * An alternate way of merging packets is to simply call opus_repacketizer_cat() - * unconditionally until it fails. At that point, the merged packet can be - * obtained with opus_repacketizer_out() and the input packet for which - * opus_repacketizer_cat() needs to be re-added to a newly reinitialized - * repacketizer state. - */ - -typedef struct OpusRepacketizer OpusRepacketizer; - -/** Gets the size of an <code>OpusRepacketizer</code> structure. - * @returns The size in bytes. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_size(void); - -/** (Re)initializes a previously allocated repacketizer state. - * The state must be at least the size returned by opus_repacketizer_get_size(). - * This can be used for applications which use their own allocator instead of - * malloc(). - * It must also be called to reset the queue of packets waiting to be - * repacketized, which is necessary if the maximum packet duration of 120 ms - * is reached or if you wish to submit packets with a different Opus - * configuration (coding mode, audio bandwidth, frame size, or channel count). - * Failure to do so will prevent a new packet from being added with - * opus_repacketizer_cat(). - * @see opus_repacketizer_create - * @see opus_repacketizer_get_size - * @see opus_repacketizer_cat - * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to - * (re)initialize. - * @returns A pointer to the same repacketizer state that was passed in. - */ -OPUS_EXPORT OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1); - -/** Allocates memory and initializes the new repacketizer with - * opus_repacketizer_init(). - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusRepacketizer *opus_repacketizer_create(void); - -/** Frees an <code>OpusRepacketizer</code> allocated by - * opus_repacketizer_create(). - * @param[in] rp <tt>OpusRepacketizer*</tt>: State to be freed. - */ -OPUS_EXPORT void opus_repacketizer_destroy(OpusRepacketizer *rp); - -/** Add a packet to the current repacketizer state. - * This packet must match the configuration of any packets already submitted - * for repacketization since the last call to opus_repacketizer_init(). - * This means that it must have the same coding mode, audio bandwidth, frame - * size, and channel count. - * This can be checked in advance by examining the top 6 bits of the first - * byte of the packet, and ensuring they match the top 6 bits of the first - * byte of any previously submitted packet. - * The total duration of audio in the repacketizer state also must not exceed - * 120 ms, the maximum duration of a single packet, after adding this packet. - * - * The contents of the current repacketizer state can be extracted into new - * packets using opus_repacketizer_out() or opus_repacketizer_out_range(). - * - * In order to add a packet with a different configuration or to add more - * audio beyond 120 ms, you must clear the repacketizer state by calling - * opus_repacketizer_init(). - * If a packet is too large to add to the current repacketizer state, no part - * of it is added, even if it contains multiple frames, some of which might - * fit. - * If you wish to be able to add parts of such packets, you should first use - * another repacketizer to split the packet into pieces and add them - * individually. - * @see opus_repacketizer_out_range - * @see opus_repacketizer_out - * @see opus_repacketizer_init - * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to which to - * add the packet. - * @param[in] data <tt>const unsigned char*</tt>: The packet data. - * The application must ensure - * this pointer remains valid - * until the next call to - * opus_repacketizer_init() or - * opus_repacketizer_destroy(). - * @param len <tt>opus_int32</tt>: The number of bytes in the packet data. - * @returns An error code indicating whether or not the operation succeeded. - * @retval #OPUS_OK The packet's contents have been added to the repacketizer - * state. - * @retval #OPUS_INVALID_PACKET The packet did not have a valid TOC sequence, - * the packet's TOC sequence was not compatible - * with previously submitted packets (because - * the coding mode, audio bandwidth, frame size, - * or channel count did not match), or adding - * this packet would increase the total amount of - * audio stored in the repacketizer state to more - * than 120 ms. - */ -OPUS_EXPORT int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - - -/** Construct a new packet from data previously submitted to the repacketizer - * state via opus_repacketizer_cat(). - * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to - * construct the new packet. - * @param begin <tt>int</tt>: The index of the first frame in the current - * repacketizer state to include in the output. - * @param end <tt>int</tt>: One past the index of the last frame in the - * current repacketizer state to include in the - * output. - * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to - * store the output packet. - * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in - * the output buffer. In order to guarantee - * success, this should be at least - * <code>1276</code> for a single frame, - * or for multiple frames, - * <code>1277*(end-begin)</code>. - * However, <code>1*(end-begin)</code> plus - * the size of all packet data submitted to - * the repacketizer since the last call to - * opus_repacketizer_init() or - * opus_repacketizer_create() is also - * sufficient, and possibly much smaller. - * @returns The total size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BAD_ARG <code>[begin,end)</code> was an invalid range of - * frames (begin < 0, begin >= end, or end > - * opus_repacketizer_get_nb_frames()). - * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the - * complete output packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Return the total number of frames contained in packet data submitted to - * the repacketizer state so far via opus_repacketizer_cat() since the last - * call to opus_repacketizer_init() or opus_repacketizer_create(). - * This defines the valid range of packets that can be extracted with - * opus_repacketizer_out_range() or opus_repacketizer_out(). - * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state containing the - * frames. - * @returns The total number of frames contained in the packet data submitted - * to the repacketizer state. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1); - -/** Construct a new packet from data previously submitted to the repacketizer - * state via opus_repacketizer_cat(). - * This is a convenience routine that returns all the data submitted so far - * in a single packet. - * It is equivalent to calling - * @code - * opus_repacketizer_out_range(rp, 0, opus_repacketizer_get_nb_frames(rp), - * data, maxlen) - * @endcode - * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to - * construct the new packet. - * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to - * store the output packet. - * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in - * the output buffer. In order to guarantee - * success, this should be at least - * <code>1277*opus_repacketizer_get_nb_frames(rp)</code>. - * However, - * <code>1*opus_repacketizer_get_nb_frames(rp)</code> - * plus the size of all packet data - * submitted to the repacketizer since the - * last call to opus_repacketizer_init() or - * opus_repacketizer_create() is also - * sufficient, and possibly much smaller. - * @returns The total size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the - * complete output packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1); - -/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence). - * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the - * packet to pad. - * @param len <tt>opus_int32</tt>: The size of the packet. - * This must be at least 1. - * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding. - * This must be at least as large as len. - * @returns an error code - * @retval #OPUS_OK \a on success. - * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len); - -/** Remove all padding from a given Opus packet and rewrite the TOC sequence to - * minimize space usage. - * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the - * packet to strip. - * @param len <tt>opus_int32</tt>: The size of the packet. - * This must be at least 1. - * @returns The new size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BAD_ARG \a len was less than 1. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len); - -/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence). - * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the - * packet to pad. - * @param len <tt>opus_int32</tt>: The size of the packet. - * This must be at least 1. - * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding. - * This must be at least 1. - * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet. - * This must be at least as large as len. - * @returns an error code - * @retval #OPUS_OK \a on success. - * @retval #OPUS_BAD_ARG \a len was less than 1. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams); - -/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to - * minimize space usage. - * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the - * packet to strip. - * @param len <tt>opus_int32</tt>: The size of the packet. - * This must be at least 1. - * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet. - * This must be at least 1. - * @returns The new size of the output packet on success, or an error code - * on failure. - * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. - * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams); - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_H */ diff --git a/thirdparty/opus/opus/opus_custom.h b/thirdparty/opus/opus/opus_custom.h deleted file mode 100644 index 41f36bf2fb..0000000000 --- a/thirdparty/opus/opus/opus_custom.h +++ /dev/null @@ -1,342 +0,0 @@ -/* Copyright (c) 2007-2008 CSIRO - Copyright (c) 2007-2009 Xiph.Org Foundation - Copyright (c) 2008-2012 Gregory Maxwell - Written by Jean-Marc Valin and Gregory Maxwell */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - @file opus_custom.h - @brief Opus-Custom reference implementation API - */ - -#ifndef OPUS_CUSTOM_H -#define OPUS_CUSTOM_H - -#include "opus_defines.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef CUSTOM_MODES -# define OPUS_CUSTOM_EXPORT OPUS_EXPORT -# define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT -#else -# define OPUS_CUSTOM_EXPORT -# ifdef OPUS_BUILD -# define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE -# else -# define OPUS_CUSTOM_EXPORT_STATIC -# endif -#endif - -/** @defgroup opus_custom Opus Custom - * @{ - * Opus Custom is an optional part of the Opus specification and - * reference implementation which uses a distinct API from the regular - * API and supports frame sizes that are not normally supported.\ Use - * of Opus Custom is discouraged for all but very special applications - * for which a frame size different from 2.5, 5, 10, or 20 ms is needed - * (for either complexity or latency reasons) and where interoperability - * is less important. - * - * In addition to the interoperability limitations the use of Opus custom - * disables a substantial chunk of the codec and generally lowers the - * quality available at a given bitrate. Normally when an application needs - * a different frame size from the codec it should buffer to match the - * sizes but this adds a small amount of delay which may be important - * in some very low latency applications. Some transports (especially - * constant rate RF transports) may also work best with frames of - * particular durations. - * - * Libopus only supports custom modes if they are enabled at compile time. - * - * The Opus Custom API is similar to the regular API but the - * @ref opus_encoder_create and @ref opus_decoder_create calls take - * an additional mode parameter which is a structure produced by - * a call to @ref opus_custom_mode_create. Both the encoder and decoder - * must create a mode using the same sample rate (fs) and frame size - * (frame size) so these parameters must either be signaled out of band - * or fixed in a particular implementation. - * - * Similar to regular Opus the custom modes support on the fly frame size - * switching, but the sizes available depend on the particular frame size in - * use. For some initial frame sizes on a single on the fly size is available. - */ - -/** Contains the state of an encoder. One encoder state is needed - for each stream. It is initialized once at the beginning of the - stream. Do *not* re-initialize the state for every frame. - @brief Encoder state - */ -typedef struct OpusCustomEncoder OpusCustomEncoder; - -/** State of the decoder. One decoder state is needed for each stream. - It is initialized once at the beginning of the stream. Do *not* - re-initialize the state for every frame. - @brief Decoder state - */ -typedef struct OpusCustomDecoder OpusCustomDecoder; - -/** The mode contains all the information necessary to create an - encoder. Both the encoder and decoder need to be initialized - with exactly the same mode, otherwise the output will be - corrupted. - @brief Mode configuration - */ -typedef struct OpusCustomMode OpusCustomMode; - -/** Creates a new mode struct. This will be passed to an encoder or - * decoder. The mode MUST NOT BE DESTROYED until the encoders and - * decoders that use it are destroyed as well. - * @param [in] Fs <tt>int</tt>: Sampling rate (8000 to 96000 Hz) - * @param [in] frame_size <tt>int</tt>: Number of samples (per channel) to encode in each - * packet (64 - 1024, prime factorization must contain zero or more 2s, 3s, or 5s and no other primes) - * @param [out] error <tt>int*</tt>: Returned error code (if NULL, no error will be returned) - * @return A newly created mode - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error); - -/** Destroys a mode struct. Only call this after all encoders and - * decoders using this mode are destroyed as well. - * @param [in] mode <tt>OpusCustomMode*</tt>: Mode to be freed. - */ -OPUS_CUSTOM_EXPORT void opus_custom_mode_destroy(OpusCustomMode *mode); - - -#if !defined(OPUS_BUILD) || defined(CELT_ENCODER_C) - -/* Encoder */ -/** Gets the size of an OpusCustomEncoder structure. - * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration - * @param [in] channels <tt>int</tt>: Number of channels - * @returns size - */ -OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size( - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1); - -# ifdef CUSTOM_MODES -/** Initializes a previously allocated encoder state - * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size. - * This is intended for applications which use their own allocator instead of malloc. - * @see opus_custom_encoder_create(),opus_custom_encoder_get_size() - * To reset a previously initialized state use the OPUS_RESET_STATE CTL. - * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state - * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of - * the stream (must be the same characteristics as used for the - * decoder) - * @param [in] channels <tt>int</tt>: Number of channels - * @return OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT int opus_custom_encoder_init( - OpusCustomEncoder *st, - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); -# endif -#endif - - -/** Creates a new encoder state. Each stream needs its own encoder - * state (can't be shared across simultaneous streams). - * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of - * the stream (must be the same characteristics as used for the - * decoder) - * @param [in] channels <tt>int</tt>: Number of channels - * @param [out] error <tt>int*</tt>: Returns an error code - * @return Newly created encoder state. -*/ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encoder_create( - const OpusCustomMode *mode, - int channels, - int *error -) OPUS_ARG_NONNULL(1); - - -/** Destroys a an encoder state. - * @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed. - */ -OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st); - -/** Encodes a frame of audio. - * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state - * @param [in] pcm <tt>float*</tt>: PCM audio in float format, with a normal range of +/-1.0. - * Samples with a range beyond +/-1.0 are supported but will - * be clipped by decoders using the integer API and should - * only be used if it is known that the far end supports - * extended dynamic range. There must be exactly - * frame_size samples per channel. - * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal - * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long. - * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame - * (can change from one frame to another) - * @return Number of bytes written to "compressed". - * If negative, an error has occurred (see error codes). It is IMPORTANT that - * the length returned be somehow transmitted to the decoder. Otherwise, no - * decoding is possible. - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode_float( - OpusCustomEncoder *st, - const float *pcm, - int frame_size, - unsigned char *compressed, - int maxCompressedBytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Encodes a frame of audio. - * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state - * @param [in] pcm <tt>opus_int16*</tt>: PCM audio in signed 16-bit format (native endian). - * There must be exactly frame_size samples per channel. - * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal - * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long. - * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame - * (can change from one frame to another) - * @return Number of bytes written to "compressed". - * If negative, an error has occurred (see error codes). It is IMPORTANT that - * the length returned be somehow transmitted to the decoder. Otherwise, no - * decoding is possible. - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode( - OpusCustomEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *compressed, - int maxCompressedBytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on an Opus custom encoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @see opus_encoderctls - */ -OPUS_CUSTOM_EXPORT int opus_custom_encoder_ctl(OpusCustomEncoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1); - - -#if !defined(OPUS_BUILD) || defined(CELT_DECODER_C) -/* Decoder */ - -/** Gets the size of an OpusCustomDecoder structure. - * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration - * @param [in] channels <tt>int</tt>: Number of channels - * @returns size - */ -OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_decoder_get_size( - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1); - -/** Initializes a previously allocated decoder state - * The memory pointed to by st must be the size returned by opus_custom_decoder_get_size. - * This is intended for applications which use their own allocator instead of malloc. - * @see opus_custom_decoder_create(),opus_custom_decoder_get_size() - * To reset a previously initialized state use the OPUS_RESET_STATE CTL. - * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state - * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of - * the stream (must be the same characteristics as used for the - * encoder) - * @param [in] channels <tt>int</tt>: Number of channels - * @return OPUS_OK Success or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT_STATIC int opus_custom_decoder_init( - OpusCustomDecoder *st, - const OpusCustomMode *mode, - int channels -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - -#endif - - -/** Creates a new decoder state. Each stream needs its own decoder state (can't - * be shared across simultaneous streams). - * @param [in] mode <tt>OpusCustomMode</tt>: Contains all the information about the characteristics of the - * stream (must be the same characteristics as used for the encoder) - * @param [in] channels <tt>int</tt>: Number of channels - * @param [out] error <tt>int*</tt>: Returns an error code - * @return Newly created decoder state. - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decoder_create( - const OpusCustomMode *mode, - int channels, - int *error -) OPUS_ARG_NONNULL(1); - -/** Destroys a an decoder state. - * @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed. - */ -OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st); - -/** Decode an opus custom frame with floating point output - * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state - * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len <tt>int</tt>: Number of bytes in payload - * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(float) - * @param [in] frame_size Number of samples per channel of available space in *pcm. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode_float( - OpusCustomDecoder *st, - const unsigned char *data, - int len, - float *pcm, - int frame_size -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Decode an opus custom frame - * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state - * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss - * @param [in] len <tt>int</tt>: Number of bytes in payload - * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length - * is frame_size*channels*sizeof(opus_int16) - * @param [in] frame_size Number of samples per channel of available space in *pcm. - * @returns Number of decoded samples or @ref opus_errorcodes - */ -OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode( - OpusCustomDecoder *st, - const unsigned char *data, - int len, - opus_int16 *pcm, - int frame_size -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on an Opus custom decoder. - * - * Generally the request and subsequent arguments are generated - * by a convenience macro. - * @see opus_genericctls - */ -OPUS_CUSTOM_EXPORT int opus_custom_decoder_ctl(OpusCustomDecoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1); - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_CUSTOM_H */ diff --git a/thirdparty/opus/opus/opus_defines.h b/thirdparty/opus/opus/opus_defines.h deleted file mode 100644 index 315412dd1d..0000000000 --- a/thirdparty/opus/opus/opus_defines.h +++ /dev/null @@ -1,753 +0,0 @@ -/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file opus_defines.h - * @brief Opus reference implementation constants - */ - -#ifndef OPUS_DEFINES_H -#define OPUS_DEFINES_H - -#include "opus_types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @defgroup opus_errorcodes Error codes - * @{ - */ -/** No error @hideinitializer*/ -#define OPUS_OK 0 -/** One or more invalid/out of range arguments @hideinitializer*/ -#define OPUS_BAD_ARG -1 -/** Not enough bytes allocated in the buffer @hideinitializer*/ -#define OPUS_BUFFER_TOO_SMALL -2 -/** An internal error was detected @hideinitializer*/ -#define OPUS_INTERNAL_ERROR -3 -/** The compressed data passed is corrupted @hideinitializer*/ -#define OPUS_INVALID_PACKET -4 -/** Invalid/unsupported request number @hideinitializer*/ -#define OPUS_UNIMPLEMENTED -5 -/** An encoder or decoder structure is invalid or already freed @hideinitializer*/ -#define OPUS_INVALID_STATE -6 -/** Memory allocation has failed @hideinitializer*/ -#define OPUS_ALLOC_FAIL -7 -/**@}*/ - -/** @cond OPUS_INTERNAL_DOC */ -/**Export control for opus functions */ - -#ifndef OPUS_EXPORT -# if defined(WIN32) -# if defined(OPUS_BUILD) && defined(DLL_EXPORT) -# define OPUS_EXPORT __declspec(dllexport) -# else -# define OPUS_EXPORT -# endif -# elif defined(__GNUC__) && defined(OPUS_BUILD) -# define OPUS_EXPORT __attribute__ ((visibility ("default"))) -# else -# define OPUS_EXPORT -# endif -#endif - -# if !defined(OPUS_GNUC_PREREQ) -# if defined(__GNUC__)&&defined(__GNUC_MINOR__) -# define OPUS_GNUC_PREREQ(_maj,_min) \ - ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) -# else -# define OPUS_GNUC_PREREQ(_maj,_min) 0 -# endif -# endif - -#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) -# if OPUS_GNUC_PREREQ(3,0) -# define OPUS_RESTRICT __restrict__ -# elif (defined(_MSC_VER) && _MSC_VER >= 1400) -# define OPUS_RESTRICT __restrict -# else -# define OPUS_RESTRICT -# endif -#else -# define OPUS_RESTRICT restrict -#endif - -#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) -# if OPUS_GNUC_PREREQ(2,7) -# define OPUS_INLINE __inline__ -# elif (defined(_MSC_VER)) -# define OPUS_INLINE __inline -# else -# define OPUS_INLINE -# endif -#else -# define OPUS_INLINE inline -#endif - -/**Warning attributes for opus functions - * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out - * some paranoid null checks. */ -#if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) -# define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__)) -#else -# define OPUS_WARN_UNUSED_RESULT -#endif -#if !defined(OPUS_BUILD) && defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) -# define OPUS_ARG_NONNULL(_x) __attribute__ ((__nonnull__(_x))) -#else -# define OPUS_ARG_NONNULL(_x) -#endif - -/** These are the actual Encoder CTL ID numbers. - * They should not be used directly by applications. - * In general, SETs should be even and GETs should be odd.*/ -#define OPUS_SET_APPLICATION_REQUEST 4000 -#define OPUS_GET_APPLICATION_REQUEST 4001 -#define OPUS_SET_BITRATE_REQUEST 4002 -#define OPUS_GET_BITRATE_REQUEST 4003 -#define OPUS_SET_MAX_BANDWIDTH_REQUEST 4004 -#define OPUS_GET_MAX_BANDWIDTH_REQUEST 4005 -#define OPUS_SET_VBR_REQUEST 4006 -#define OPUS_GET_VBR_REQUEST 4007 -#define OPUS_SET_BANDWIDTH_REQUEST 4008 -#define OPUS_GET_BANDWIDTH_REQUEST 4009 -#define OPUS_SET_COMPLEXITY_REQUEST 4010 -#define OPUS_GET_COMPLEXITY_REQUEST 4011 -#define OPUS_SET_INBAND_FEC_REQUEST 4012 -#define OPUS_GET_INBAND_FEC_REQUEST 4013 -#define OPUS_SET_PACKET_LOSS_PERC_REQUEST 4014 -#define OPUS_GET_PACKET_LOSS_PERC_REQUEST 4015 -#define OPUS_SET_DTX_REQUEST 4016 -#define OPUS_GET_DTX_REQUEST 4017 -#define OPUS_SET_VBR_CONSTRAINT_REQUEST 4020 -#define OPUS_GET_VBR_CONSTRAINT_REQUEST 4021 -#define OPUS_SET_FORCE_CHANNELS_REQUEST 4022 -#define OPUS_GET_FORCE_CHANNELS_REQUEST 4023 -#define OPUS_SET_SIGNAL_REQUEST 4024 -#define OPUS_GET_SIGNAL_REQUEST 4025 -#define OPUS_GET_LOOKAHEAD_REQUEST 4027 -/* #define OPUS_RESET_STATE 4028 */ -#define OPUS_GET_SAMPLE_RATE_REQUEST 4029 -#define OPUS_GET_FINAL_RANGE_REQUEST 4031 -#define OPUS_GET_PITCH_REQUEST 4033 -#define OPUS_SET_GAIN_REQUEST 4034 -#define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */ -#define OPUS_SET_LSB_DEPTH_REQUEST 4036 -#define OPUS_GET_LSB_DEPTH_REQUEST 4037 -#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039 -#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040 -#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041 -#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042 -#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043 - -/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ - -/* Macros to trigger compilation errors when the wrong types are provided to a CTL */ -#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) -#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr))) -#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr))) -#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr))) -/** @endcond */ - -/** @defgroup opus_ctlvalues Pre-defined values for CTL interface - * @see opus_genericctls, opus_encoderctls - * @{ - */ -/* Values for the various encoder CTLs */ -#define OPUS_AUTO -1000 /**<Auto/default setting @hideinitializer*/ -#define OPUS_BITRATE_MAX -1 /**<Maximum bitrate @hideinitializer*/ - -/** Best for most VoIP/videoconference applications where listening quality and intelligibility matter most - * @hideinitializer */ -#define OPUS_APPLICATION_VOIP 2048 -/** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input - * @hideinitializer */ -#define OPUS_APPLICATION_AUDIO 2049 -/** Only use when lowest-achievable latency is what matters most. Voice-optimized modes cannot be used. - * @hideinitializer */ -#define OPUS_APPLICATION_RESTRICTED_LOWDELAY 2051 - -#define OPUS_SIGNAL_VOICE 3001 /**< Signal being encoded is voice */ -#define OPUS_SIGNAL_MUSIC 3002 /**< Signal being encoded is music */ -#define OPUS_BANDWIDTH_NARROWBAND 1101 /**< 4 kHz bandpass @hideinitializer*/ -#define OPUS_BANDWIDTH_MEDIUMBAND 1102 /**< 6 kHz bandpass @hideinitializer*/ -#define OPUS_BANDWIDTH_WIDEBAND 1103 /**< 8 kHz bandpass @hideinitializer*/ -#define OPUS_BANDWIDTH_SUPERWIDEBAND 1104 /**<12 kHz bandpass @hideinitializer*/ -#define OPUS_BANDWIDTH_FULLBAND 1105 /**<20 kHz bandpass @hideinitializer*/ - -#define OPUS_FRAMESIZE_ARG 5000 /**< Select frame size from the argument (default) */ -#define OPUS_FRAMESIZE_2_5_MS 5001 /**< Use 2.5 ms frames */ -#define OPUS_FRAMESIZE_5_MS 5002 /**< Use 5 ms frames */ -#define OPUS_FRAMESIZE_10_MS 5003 /**< Use 10 ms frames */ -#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */ -#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */ -#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */ - -/**@}*/ - - -/** @defgroup opus_encoderctls Encoder related CTLs - * - * These are convenience macros for use with the \c opus_encode_ctl - * interface. They are used to generate the appropriate series of - * arguments for that call, passing the correct type, size and so - * on as expected for each particular request. - * - * Some usage examples: - * - * @code - * int ret; - * ret = opus_encoder_ctl(enc_ctx, OPUS_SET_BANDWIDTH(OPUS_AUTO)); - * if (ret != OPUS_OK) return ret; - * - * opus_int32 rate; - * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&rate)); - * - * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE); - * @endcode - * - * @see opus_genericctls, opus_encoder - * @{ - */ - -/** Configures the encoder's computational complexity. - * The supported range is 0-10 inclusive with 10 representing the highest complexity. - * @see OPUS_GET_COMPLEXITY - * @param[in] x <tt>opus_int32</tt>: Allowed values: 0-10, inclusive. - * - * @hideinitializer */ -#define OPUS_SET_COMPLEXITY(x) OPUS_SET_COMPLEXITY_REQUEST, __opus_check_int(x) -/** Gets the encoder's complexity configuration. - * @see OPUS_SET_COMPLEXITY - * @param[out] x <tt>opus_int32 *</tt>: Returns a value in the range 0-10, - * inclusive. - * @hideinitializer */ -#define OPUS_GET_COMPLEXITY(x) OPUS_GET_COMPLEXITY_REQUEST, __opus_check_int_ptr(x) - -/** Configures the bitrate in the encoder. - * Rates from 500 to 512000 bits per second are meaningful, as well as the - * special values #OPUS_AUTO and #OPUS_BITRATE_MAX. - * The value #OPUS_BITRATE_MAX can be used to cause the codec to use as much - * rate as it can, which is useful for controlling the rate by adjusting the - * output buffer size. - * @see OPUS_GET_BITRATE - * @param[in] x <tt>opus_int32</tt>: Bitrate in bits per second. The default - * is determined based on the number of - * channels and the input sampling rate. - * @hideinitializer */ -#define OPUS_SET_BITRATE(x) OPUS_SET_BITRATE_REQUEST, __opus_check_int(x) -/** Gets the encoder's bitrate configuration. - * @see OPUS_SET_BITRATE - * @param[out] x <tt>opus_int32 *</tt>: Returns the bitrate in bits per second. - * The default is determined based on the - * number of channels and the input - * sampling rate. - * @hideinitializer */ -#define OPUS_GET_BITRATE(x) OPUS_GET_BITRATE_REQUEST, __opus_check_int_ptr(x) - -/** Enables or disables variable bitrate (VBR) in the encoder. - * The configured bitrate may not be met exactly because frames must - * be an integer number of bytes in length. - * @see OPUS_GET_VBR - * @see OPUS_SET_VBR_CONSTRAINT - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>0</dt><dd>Hard CBR. For LPC/hybrid modes at very low bit-rate, this can - * cause noticeable quality degradation.</dd> - * <dt>1</dt><dd>VBR (default). The exact type of VBR is controlled by - * #OPUS_SET_VBR_CONSTRAINT.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_VBR(x) OPUS_SET_VBR_REQUEST, __opus_check_int(x) -/** Determine if variable bitrate (VBR) is enabled in the encoder. - * @see OPUS_SET_VBR - * @see OPUS_GET_VBR_CONSTRAINT - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>Hard CBR.</dd> - * <dt>1</dt><dd>VBR (default). The exact type of VBR may be retrieved via - * #OPUS_GET_VBR_CONSTRAINT.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_VBR(x) OPUS_GET_VBR_REQUEST, __opus_check_int_ptr(x) - -/** Enables or disables constrained VBR in the encoder. - * This setting is ignored when the encoder is in CBR mode. - * @warning Only the MDCT mode of Opus currently heeds the constraint. - * Speech mode ignores it completely, hybrid mode may fail to obey it - * if the LPC layer uses more bitrate than the constraint would have - * permitted. - * @see OPUS_GET_VBR_CONSTRAINT - * @see OPUS_SET_VBR - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>0</dt><dd>Unconstrained VBR.</dd> - * <dt>1</dt><dd>Constrained VBR (default). This creates a maximum of one - * frame of buffering delay assuming a transport with a - * serialization speed of the nominal bitrate.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_VBR_CONSTRAINT(x) OPUS_SET_VBR_CONSTRAINT_REQUEST, __opus_check_int(x) -/** Determine if constrained VBR is enabled in the encoder. - * @see OPUS_SET_VBR_CONSTRAINT - * @see OPUS_GET_VBR - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>Unconstrained VBR.</dd> - * <dt>1</dt><dd>Constrained VBR (default).</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_VBR_CONSTRAINT(x) OPUS_GET_VBR_CONSTRAINT_REQUEST, __opus_check_int_ptr(x) - -/** Configures mono/stereo forcing in the encoder. - * This can force the encoder to produce packets encoded as either mono or - * stereo, regardless of the format of the input audio. This is useful when - * the caller knows that the input signal is currently a mono source embedded - * in a stereo stream. - * @see OPUS_GET_FORCE_CHANNELS - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd> - * <dt>1</dt> <dd>Forced mono</dd> - * <dt>2</dt> <dd>Forced stereo</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_FORCE_CHANNELS(x) OPUS_SET_FORCE_CHANNELS_REQUEST, __opus_check_int(x) -/** Gets the encoder's forced channel configuration. - * @see OPUS_SET_FORCE_CHANNELS - * @param[out] x <tt>opus_int32 *</tt>: - * <dl> - * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd> - * <dt>1</dt> <dd>Forced mono</dd> - * <dt>2</dt> <dd>Forced stereo</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_FORCE_CHANNELS(x) OPUS_GET_FORCE_CHANNELS_REQUEST, __opus_check_int_ptr(x) - -/** Configures the maximum bandpass that the encoder will select automatically. - * Applications should normally use this instead of #OPUS_SET_BANDWIDTH - * (leaving that set to the default, #OPUS_AUTO). This allows the - * application to set an upper bound based on the type of input it is - * providing, but still gives the encoder the freedom to reduce the bandpass - * when the bitrate becomes too low, for better overall quality. - * @see OPUS_GET_MAX_BANDWIDTH - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> - * <dt>OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> - * <dt>OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> - * <dt>OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> - * <dt>OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband (default)</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_MAX_BANDWIDTH(x) OPUS_SET_MAX_BANDWIDTH_REQUEST, __opus_check_int(x) - -/** Gets the encoder's configured maximum allowed bandpass. - * @see OPUS_SET_MAX_BANDWIDTH - * @param[out] x <tt>opus_int32 *</tt>: Allowed values: - * <dl> - * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband (default)</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_MAX_BANDWIDTH(x) OPUS_GET_MAX_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) - -/** Sets the encoder's bandpass to a specific value. - * This prevents the encoder from automatically selecting the bandpass based - * on the available bitrate. If an application knows the bandpass of the input - * audio it is providing, it should normally use #OPUS_SET_MAX_BANDWIDTH - * instead, which still gives the encoder the freedom to reduce the bandpass - * when the bitrate becomes too low, for better overall quality. - * @see OPUS_GET_BANDWIDTH - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> - * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_BANDWIDTH(x) OPUS_SET_BANDWIDTH_REQUEST, __opus_check_int(x) - -/** Configures the type of signal being encoded. - * This is a hint which helps the encoder's mode selection. - * @see OPUS_GET_SIGNAL - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> - * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd> - * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured signal type. - * @see OPUS_SET_SIGNAL - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> - * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd> - * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __opus_check_int_ptr(x) - - -/** Configures the encoder's intended application. - * The initial value is a mandatory argument to the encoder_create function. - * @see OPUS_GET_APPLICATION - * @param[in] x <tt>opus_int32</tt>: Returns one of the following values: - * <dl> - * <dt>#OPUS_APPLICATION_VOIP</dt> - * <dd>Process signal for improved speech intelligibility.</dd> - * <dt>#OPUS_APPLICATION_AUDIO</dt> - * <dd>Favor faithfulness to the original input.</dd> - * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> - * <dd>Configure the minimum possible coding delay by disabling certain modes - * of operation.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_APPLICATION(x) OPUS_SET_APPLICATION_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured application. - * @see OPUS_SET_APPLICATION - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>#OPUS_APPLICATION_VOIP</dt> - * <dd>Process signal for improved speech intelligibility.</dd> - * <dt>#OPUS_APPLICATION_AUDIO</dt> - * <dd>Favor faithfulness to the original input.</dd> - * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> - * <dd>Configure the minimum possible coding delay by disabling certain modes - * of operation.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x) - -/** Gets the total samples of delay added by the entire codec. - * This can be queried by the encoder and then the provided number of samples can be - * skipped on from the start of the decoder's output to provide time aligned input - * and output. From the perspective of a decoding application the real data begins this many - * samples late. - * - * The decoder contribution to this delay is identical for all decoders, but the - * encoder portion of the delay may vary from implementation to implementation, - * version to version, or even depend on the encoder's initial configuration. - * Applications needing delay compensation should call this CTL rather than - * hard-coding a value. - * @param[out] x <tt>opus_int32 *</tt>: Number of lookahead samples - * @hideinitializer */ -#define OPUS_GET_LOOKAHEAD(x) OPUS_GET_LOOKAHEAD_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's use of inband forward error correction (FEC). - * @note This is only applicable to the LPC layer - * @see OPUS_GET_INBAND_FEC - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>0</dt><dd>Disable inband FEC (default).</dd> - * <dt>1</dt><dd>Enable inband FEC.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x) -/** Gets encoder's configured use of inband forward error correction. - * @see OPUS_SET_INBAND_FEC - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>Inband FEC disabled (default).</dd> - * <dt>1</dt><dd>Inband FEC enabled.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's expected packet loss percentage. - * Higher values trigger progressively more loss resistant behavior in the encoder - * at the expense of quality at a given bitrate in the absence of packet loss, but - * greater quality under loss. - * @see OPUS_GET_PACKET_LOSS_PERC - * @param[in] x <tt>opus_int32</tt>: Loss percentage in the range 0-100, inclusive (default: 0). - * @hideinitializer */ -#define OPUS_SET_PACKET_LOSS_PERC(x) OPUS_SET_PACKET_LOSS_PERC_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured packet loss percentage. - * @see OPUS_SET_PACKET_LOSS_PERC - * @param[out] x <tt>opus_int32 *</tt>: Returns the configured loss percentage - * in the range 0-100, inclusive (default: 0). - * @hideinitializer */ -#define OPUS_GET_PACKET_LOSS_PERC(x) OPUS_GET_PACKET_LOSS_PERC_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's use of discontinuous transmission (DTX). - * @note This is only applicable to the LPC layer - * @see OPUS_GET_DTX - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>0</dt><dd>Disable DTX (default).</dd> - * <dt>1</dt><dd>Enabled DTX.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_DTX(x) OPUS_SET_DTX_REQUEST, __opus_check_int(x) -/** Gets encoder's configured use of discontinuous transmission. - * @see OPUS_SET_DTX - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>DTX disabled (default).</dd> - * <dt>1</dt><dd>DTX enabled.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x) -/** Configures the depth of signal being encoded. - * - * This is a hint which helps the encoder identify silence and near-silence. - * It represents the number of significant bits of linear intensity below - * which the signal contains ignorable quantization or other noise. - * - * For example, OPUS_SET_LSB_DEPTH(14) would be an appropriate setting - * for G.711 u-law input. OPUS_SET_LSB_DEPTH(16) would be appropriate - * for 16-bit linear pcm input with opus_encode_float(). - * - * When using opus_encode() instead of opus_encode_float(), or when libopus - * is compiled for fixed-point, the encoder uses the minimum of the value - * set here and the value 16. - * - * @see OPUS_GET_LSB_DEPTH - * @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24 - * (default: 24). - * @hideinitializer */ -#define OPUS_SET_LSB_DEPTH(x) OPUS_SET_LSB_DEPTH_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured signal depth. - * @see OPUS_SET_LSB_DEPTH - * @param[out] x <tt>opus_int32 *</tt>: Input precision in bits, between 8 and - * 24 (default: 24). - * @hideinitializer */ -#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) - -/** Configures the encoder's use of variable duration frames. - * When variable duration is enabled, the encoder is free to use a shorter frame - * size than the one requested in the opus_encode*() call. - * It is then the user's responsibility - * to verify how much audio was encoded by checking the ToC byte of the encoded - * packet. The part of the audio that was not encoded needs to be resent to the - * encoder for the next call. Do not use this option unless you <b>really</b> - * know what you are doing. - * @see OPUS_GET_EXPERT_FRAME_DURATION - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd> - * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured use of variable duration frames. - * @see OPUS_SET_EXPERT_FRAME_DURATION - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd> - * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 5 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> - * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) - -/** If set to 1, disables almost all use of prediction, making frames almost - * completely independent. This reduces quality. - * @see OPUS_GET_PREDICTION_DISABLED - * @param[in] x <tt>opus_int32</tt>: Allowed values: - * <dl> - * <dt>0</dt><dd>Enable prediction (default).</dd> - * <dt>1</dt><dd>Disable prediction.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured prediction status. - * @see OPUS_SET_PREDICTION_DISABLED - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>0</dt><dd>Prediction enabled (default).</dd> - * <dt>1</dt><dd>Prediction disabled.</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x) - -/**@}*/ - -/** @defgroup opus_genericctls Generic CTLs - * - * These macros are used with the \c opus_decoder_ctl and - * \c opus_encoder_ctl calls to generate a particular - * request. - * - * When called on an \c OpusDecoder they apply to that - * particular decoder instance. When called on an - * \c OpusEncoder they apply to the corresponding setting - * on that encoder instance, if present. - * - * Some usage examples: - * - * @code - * int ret; - * opus_int32 pitch; - * ret = opus_decoder_ctl(dec_ctx, OPUS_GET_PITCH(&pitch)); - * if (ret == OPUS_OK) return ret; - * - * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE); - * opus_decoder_ctl(dec_ctx, OPUS_RESET_STATE); - * - * opus_int32 enc_bw, dec_bw; - * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&enc_bw)); - * opus_decoder_ctl(dec_ctx, OPUS_GET_BANDWIDTH(&dec_bw)); - * if (enc_bw != dec_bw) { - * printf("packet bandwidth mismatch!\n"); - * } - * @endcode - * - * @see opus_encoder, opus_decoder_ctl, opus_encoder_ctl, opus_decoderctls, opus_encoderctls - * @{ - */ - -/** Resets the codec state to be equivalent to a freshly initialized state. - * This should be called when switching streams in order to prevent - * the back to back decoding from giving different results from - * one at a time decoding. - * @hideinitializer */ -#define OPUS_RESET_STATE 4028 - -/** Gets the final state of the codec's entropy coder. - * This is used for testing purposes, - * The encoder and decoder state should be identical after coding a payload - * (assuming no data corruption or software bugs) - * - * @param[out] x <tt>opus_uint32 *</tt>: Entropy coder state - * - * @hideinitializer */ -#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) - -/** Gets the encoder's configured bandpass or the decoder's last bandpass. - * @see OPUS_SET_BANDWIDTH - * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: - * <dl> - * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> - * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> - * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband</dd> - * </dl> - * @hideinitializer */ -#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) - -/** Gets the sampling rate the encoder or decoder was initialized with. - * This simply returns the <code>Fs</code> value passed to opus_encoder_init() - * or opus_decoder_init(). - * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder. - * @hideinitializer - */ -#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) - -/**@}*/ - -/** @defgroup opus_decoderctls Decoder related CTLs - * @see opus_genericctls, opus_encoderctls, opus_decoder - * @{ - */ - -/** Configures decoder gain adjustment. - * Scales the decoded output by a factor specified in Q8 dB units. - * This has a maximum range of -32768 to 32767 inclusive, and returns - * OPUS_BAD_ARG otherwise. The default is zero indicating no adjustment. - * This setting survives decoder reset. - * - * gain = pow(10, x/(20.0*256)) - * - * @param[in] x <tt>opus_int32</tt>: Amount to scale PCM signal by in Q8 dB units. - * @hideinitializer */ -#define OPUS_SET_GAIN(x) OPUS_SET_GAIN_REQUEST, __opus_check_int(x) -/** Gets the decoder's configured gain adjustment. @see OPUS_SET_GAIN - * - * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units. - * @hideinitializer */ -#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) - -/** Gets the duration (in samples) of the last packet successfully decoded or concealed. - * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate). - * @hideinitializer */ -#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) - -/** Gets the pitch of the last decoded frame, if available. - * This can be used for any post-processing algorithm requiring the use of pitch, - * e.g. time stretching/shortening. If the last frame was not voiced, or if the - * pitch was not coded in the frame, then zero is returned. - * - * This CTL is only implemented for decoder instances. - * - * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available) - * - * @hideinitializer */ -#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) - -/**@}*/ - -/** @defgroup opus_libinfo Opus library information functions - * @{ - */ - -/** Converts an opus error code into a human readable string. - * - * @param[in] error <tt>int</tt>: Error number - * @returns Error string - */ -OPUS_EXPORT const char *opus_strerror(int error); - -/** Gets the libopus version string. - * - * Applications may look for the substring "-fixed" in the version string to - * determine whether they have a fixed-point or floating-point build at - * runtime. - * - * @returns Version string - */ -OPUS_EXPORT const char *opus_get_version_string(void); -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_DEFINES_H */ diff --git a/thirdparty/opus/opus/opus_multistream.h b/thirdparty/opus/opus/opus_multistream.h deleted file mode 100644 index 3622e009fb..0000000000 --- a/thirdparty/opus/opus/opus_multistream.h +++ /dev/null @@ -1,660 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/** - * @file opus_multistream.h - * @brief Opus reference implementation multistream API - */ - -#ifndef OPUS_MULTISTREAM_H -#define OPUS_MULTISTREAM_H - -#include "opus.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** @cond OPUS_INTERNAL_DOC */ - -/** Macros to trigger compilation errors when the wrong types are provided to a - * CTL. */ -/**@{*/ -#define __opus_check_encstate_ptr(ptr) ((ptr) + ((ptr) - (OpusEncoder**)(ptr))) -#define __opus_check_decstate_ptr(ptr) ((ptr) + ((ptr) - (OpusDecoder**)(ptr))) -/**@}*/ - -/** These are the actual encoder and decoder CTL ID numbers. - * They should not be used directly by applications. - * In general, SETs should be even and GETs should be odd.*/ -/**@{*/ -#define OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST 5120 -#define OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST 5122 -/**@}*/ - -/** @endcond */ - -/** @defgroup opus_multistream_ctls Multistream specific encoder and decoder CTLs - * - * These are convenience macros that are specific to the - * opus_multistream_encoder_ctl() and opus_multistream_decoder_ctl() - * interface. - * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, and - * @ref opus_decoderctls may be applied to a multistream encoder or decoder as - * well. - * In addition, you may retrieve the encoder or decoder state for an specific - * stream via #OPUS_MULTISTREAM_GET_ENCODER_STATE or - * #OPUS_MULTISTREAM_GET_DECODER_STATE and apply CTLs to it individually. - */ -/**@{*/ - -/** Gets the encoder state for an individual stream of a multistream encoder. - * @param[in] x <tt>opus_int32</tt>: The index of the stream whose encoder you - * wish to retrieve. - * This must be non-negative and less than - * the <code>streams</code> parameter used - * to initialize the encoder. - * @param[out] y <tt>OpusEncoder**</tt>: Returns a pointer to the given - * encoder state. - * @retval OPUS_BAD_ARG The index of the requested stream was out of range. - * @hideinitializer - */ -#define OPUS_MULTISTREAM_GET_ENCODER_STATE(x,y) OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST, __opus_check_int(x), __opus_check_encstate_ptr(y) - -/** Gets the decoder state for an individual stream of a multistream decoder. - * @param[in] x <tt>opus_int32</tt>: The index of the stream whose decoder you - * wish to retrieve. - * This must be non-negative and less than - * the <code>streams</code> parameter used - * to initialize the decoder. - * @param[out] y <tt>OpusDecoder**</tt>: Returns a pointer to the given - * decoder state. - * @retval OPUS_BAD_ARG The index of the requested stream was out of range. - * @hideinitializer - */ -#define OPUS_MULTISTREAM_GET_DECODER_STATE(x,y) OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST, __opus_check_int(x), __opus_check_decstate_ptr(y) - -/**@}*/ - -/** @defgroup opus_multistream Opus Multistream API - * @{ - * - * The multistream API allows individual Opus streams to be combined into a - * single packet, enabling support for up to 255 channels. Unlike an - * elementary Opus stream, the encoder and decoder must negotiate the channel - * configuration before the decoder can successfully interpret the data in the - * packets produced by the encoder. Some basic information, such as packet - * duration, can be computed without any special negotiation. - * - * The format for multistream Opus packets is defined in - * <a href="https://tools.ietf.org/html/rfc7845">RFC 7845</a> - * and is based on the self-delimited Opus framing described in Appendix B of - * <a href="https://tools.ietf.org/html/rfc6716">RFC 6716</a>. - * Normal Opus packets are just a degenerate case of multistream Opus packets, - * and can be encoded or decoded with the multistream API by setting - * <code>streams</code> to <code>1</code> when initializing the encoder or - * decoder. - * - * Multistream Opus streams can contain up to 255 elementary Opus streams. - * These may be either "uncoupled" or "coupled", indicating that the decoder - * is configured to decode them to either 1 or 2 channels, respectively. - * The streams are ordered so that all coupled streams appear at the - * beginning. - * - * A <code>mapping</code> table defines which decoded channel <code>i</code> - * should be used for each input/output (I/O) channel <code>j</code>. This table is - * typically provided as an unsigned char array. - * Let <code>i = mapping[j]</code> be the index for I/O channel <code>j</code>. - * If <code>i < 2*coupled_streams</code>, then I/O channel <code>j</code> is - * encoded as the left channel of stream <code>(i/2)</code> if <code>i</code> - * is even, or as the right channel of stream <code>(i/2)</code> if - * <code>i</code> is odd. Otherwise, I/O channel <code>j</code> is encoded as - * mono in stream <code>(i - coupled_streams)</code>, unless it has the special - * value 255, in which case it is omitted from the encoding entirely (the - * decoder will reproduce it as silence). Each value <code>i</code> must either - * be the special value 255 or be less than <code>streams + coupled_streams</code>. - * - * The output channels specified by the encoder - * should use the - * <a href="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810004.3.9">Vorbis - * channel ordering</a>. A decoder may wish to apply an additional permutation - * to the mapping the encoder used to achieve a different output channel - * order (e.g. for outputing in WAV order). - * - * Each multistream packet contains an Opus packet for each stream, and all of - * the Opus packets in a single multistream packet must have the same - * duration. Therefore the duration of a multistream packet can be extracted - * from the TOC sequence of the first stream, which is located at the - * beginning of the packet, just like an elementary Opus stream: - * - * @code - * int nb_samples; - * int nb_frames; - * nb_frames = opus_packet_get_nb_frames(data, len); - * if (nb_frames < 1) - * return nb_frames; - * nb_samples = opus_packet_get_samples_per_frame(data, 48000) * nb_frames; - * @endcode - * - * The general encoding and decoding process proceeds exactly the same as in - * the normal @ref opus_encoder and @ref opus_decoder APIs. - * See their documentation for an overview of how to use the corresponding - * multistream functions. - */ - -/** Opus multistream encoder state. - * This contains the complete state of a multistream Opus encoder. - * It is position independent and can be freely copied. - * @see opus_multistream_encoder_create - * @see opus_multistream_encoder_init - */ -typedef struct OpusMSEncoder OpusMSEncoder; - -/** Opus multistream decoder state. - * This contains the complete state of a multistream Opus decoder. - * It is position independent and can be freely copied. - * @see opus_multistream_decoder_create - * @see opus_multistream_decoder_init - */ -typedef struct OpusMSDecoder OpusMSDecoder; - -/**\name Multistream encoder functions */ -/**@{*/ - -/** Gets the size of an OpusMSEncoder structure. - * @param streams <tt>int</tt>: The total number of streams to encode from the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @returns The size in bytes on success, or a negative error code - * (see @ref opus_errorcodes) on error. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size( - int streams, - int coupled_streams -); - -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size( - int channels, - int mapping_family -); - - -/** Allocates and initializes a multistream encoder state. - * Call opus_multistream_encoder_destroy() to release - * this object when finished. - * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels in the input signal. - * This must be at most 255. - * It may be greater than the number of - * coded channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams to encode from the - * input. - * This must be no more than the number of channels. - * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (<code>streams + - * coupled_streams</code>) must be no - * more than the number of input channels. - * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from - * encoded channels to input channels, as described in - * @ref opus_multistream. As an extra constraint, the - * multistream encoder does not allow encoding coupled - * streams for which one channel is unused since this - * is never a good idea. - * @param application <tt>int</tt>: The target encoder application. - * This must be one of the following: - * <dl> - * <dt>#OPUS_APPLICATION_VOIP</dt> - * <dd>Process signal for improved speech intelligibility.</dd> - * <dt>#OPUS_APPLICATION_AUDIO</dt> - * <dd>Favor faithfulness to the original input.</dd> - * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> - * <dd>Configure the minimum possible coding delay by disabling certain modes - * of operation.</dd> - * </dl> - * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error - * code (see @ref opus_errorcodes) on - * failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application, - int *error -) OPUS_ARG_NONNULL(5); - -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create( - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application, - int *error -) OPUS_ARG_NONNULL(5); - -/** Initialize a previously allocated multistream encoder state. - * The memory pointed to by \a st must be at least the size returned by - * opus_multistream_encoder_get_size(). - * This is intended for applications which use their own allocator instead of - * malloc. - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @see opus_multistream_encoder_create - * @see opus_multistream_encoder_get_size - * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize. - * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels in the input signal. - * This must be at most 255. - * It may be greater than the number of - * coded channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams to encode from the - * input. - * This must be no more than the number of channels. - * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams - * to encode. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * encoded channels (<code>streams + - * coupled_streams</code>) must be no - * more than the number of input channels. - * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from - * encoded channels to input channels, as described in - * @ref opus_multistream. As an extra constraint, the - * multistream encoder does not allow encoding coupled - * streams for which one channel is unused since this - * is never a good idea. - * @param application <tt>int</tt>: The target encoder application. - * This must be one of the following: - * <dl> - * <dt>#OPUS_APPLICATION_VOIP</dt> - * <dd>Process signal for improved speech intelligibility.</dd> - * <dt>#OPUS_APPLICATION_AUDIO</dt> - * <dd>Favor faithfulness to the original input.</dd> - * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> - * <dd>Configure the minimum possible coding delay by disabling certain modes - * of operation.</dd> - * </dl> - * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) - * on failure. - */ -OPUS_EXPORT int opus_multistream_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - -OPUS_EXPORT int opus_multistream_surround_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - -/** Encodes a multistream Opus frame. - * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. - * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved - * samples. - * This must contain - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: Number of samples per channel in the input - * signal. - * This must be an Opus frame size for the - * encoder's sampling rate. - * For example, at 48 kHz the permitted values - * are 120, 240, 480, 960, 1920, and 2880. - * Passing in a duration of less than 10 ms - * (480 samples at 48 kHz) will prevent the - * encoder from using the LPC or hybrid modes. - * @param[out] data <tt>unsigned char*</tt>: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode( - OpusMSEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Encodes a multistream Opus frame from floating point input. - * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. - * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved - * samples with a normal range of - * +/-1.0. - * Samples with a range beyond +/-1.0 - * are supported but will be clipped by - * decoders using the integer API and - * should only be used if it is known - * that the far end supports extended - * dynamic range. - * This must contain - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: Number of samples per channel in the input - * signal. - * This must be an Opus frame size for the - * encoder's sampling rate. - * For example, at 48 kHz the permitted values - * are 120, 240, 480, 960, 1920, and 2880. - * Passing in a duration of less than 10 ms - * (480 samples at 48 kHz) will prevent the - * encoder from using the LPC or hybrid modes. - * @param[out] data <tt>unsigned char*</tt>: Output payload. - * This must contain storage for at - * least \a max_data_bytes. - * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated - * memory for the output - * payload. This may be - * used to impose an upper limit on - * the instant bitrate, but should - * not be used as the only bitrate - * control. Use #OPUS_SET_BITRATE to - * control the bitrate. - * @returns The length of the encoded packet (in bytes) on success or a - * negative error code (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode_float( - OpusMSEncoder *st, - const float *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); - -/** Frees an <code>OpusMSEncoder</code> allocated by - * opus_multistream_encoder_create(). - * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to be freed. - */ -OPUS_EXPORT void opus_multistream_encoder_destroy(OpusMSEncoder *st); - -/** Perform a CTL function on a multistream Opus encoder. - * - * Generally the request and subsequent arguments are generated by a - * convenience macro. - * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls, - * @ref opus_encoderctls, or @ref opus_multistream_ctls. - * @see opus_genericctls - * @see opus_encoderctls - * @see opus_multistream_ctls - */ -OPUS_EXPORT int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); - -/**@}*/ - -/**\name Multistream decoder functions */ -/**@{*/ - -/** Gets the size of an <code>OpusMSDecoder</code> structure. - * @param streams <tt>int</tt>: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @returns The size in bytes on success, or a negative error code - * (see @ref opus_errorcodes) on error. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_decoder_get_size( - int streams, - int coupled_streams -); - -/** Allocates and initializes a multistream decoder state. - * Call opus_multistream_decoder_destroy() to release - * this object when finished. - * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels to output. - * This must be at most 255. - * It may be different from the number of coded - * channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from - * coded channels to output channels, as described in - * @ref opus_multistream. - * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error - * code (see @ref opus_errorcodes) on - * failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSDecoder *opus_multistream_decoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int *error -) OPUS_ARG_NONNULL(5); - -/** Intialize a previously allocated decoder state object. - * The memory pointed to by \a st must be at least the size returned by - * opus_multistream_encoder_get_size(). - * This is intended for applications which use their own allocator instead of - * malloc. - * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. - * @see opus_multistream_decoder_create - * @see opus_multistream_deocder_get_size - * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize. - * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz). - * This must be one of 8000, 12000, 16000, - * 24000, or 48000. - * @param channels <tt>int</tt>: Number of channels to output. - * This must be at most 255. - * It may be different from the number of coded - * channels (<code>streams + - * coupled_streams</code>). - * @param streams <tt>int</tt>: The total number of streams coded in the - * input. - * This must be no more than 255. - * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled - * (2 channel) streams. - * This must be no larger than the total - * number of streams. - * Additionally, The total number of - * coded channels (<code>streams + - * coupled_streams</code>) must be no - * more than 255. - * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from - * coded channels to output channels, as described in - * @ref opus_multistream. - * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) - * on failure. - */ -OPUS_EXPORT int opus_multistream_decoder_init( - OpusMSDecoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); - -/** Decode a multistream Opus packet. - * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state. - * @param[in] data <tt>const unsigned char*</tt>: Input payload. - * Use a <code>NULL</code> - * pointer to indicate packet - * loss. - * @param len <tt>opus_int32</tt>: Number of bytes in payload. - * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved - * samples. - * This must contain room for - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: The number of samples per channel of - * available space in \a pcm. - * If this is less than the maximum packet duration - * (120 ms; 5760 for 48kHz), this function will not be capable - * of decoding some packets. In the case of PLC (data==NULL) - * or FEC (decode_fec=1), then frame_size needs to be exactly - * the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the - * next incoming packet. For the PLC and FEC cases, frame_size - * <b>must</b> be a multiple of 2.5 ms. - * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band - * forward error correction data be decoded. - * If no such data is available, the frame is - * decoded as if it were lost. - * @returns Number of samples decoded on success or a negative error code - * (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - opus_int16 *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Decode a multistream Opus packet with floating point output. - * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state. - * @param[in] data <tt>const unsigned char*</tt>: Input payload. - * Use a <code>NULL</code> - * pointer to indicate packet - * loss. - * @param len <tt>opus_int32</tt>: Number of bytes in payload. - * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved - * samples. - * This must contain room for - * <code>frame_size*channels</code> - * samples. - * @param frame_size <tt>int</tt>: The number of samples per channel of - * available space in \a pcm. - * If this is less than the maximum packet duration - * (120 ms; 5760 for 48kHz), this function will not be capable - * of decoding some packets. In the case of PLC (data==NULL) - * or FEC (decode_fec=1), then frame_size needs to be exactly - * the duration of audio that is missing, otherwise the - * decoder will not be in the optimal state to decode the - * next incoming packet. For the PLC and FEC cases, frame_size - * <b>must</b> be a multiple of 2.5 ms. - * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band - * forward error correction data be decoded. - * If no such data is available, the frame is - * decoded as if it were lost. - * @returns Number of samples decoded on success or a negative error code - * (see @ref opus_errorcodes) on failure. - */ -OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode_float( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - float *pcm, - int frame_size, - int decode_fec -) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); - -/** Perform a CTL function on a multistream Opus decoder. - * - * Generally the request and subsequent arguments are generated by a - * convenience macro. - * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state. - * @param request This and all remaining parameters should be replaced by one - * of the convenience macros in @ref opus_genericctls, - * @ref opus_decoderctls, or @ref opus_multistream_ctls. - * @see opus_genericctls - * @see opus_decoderctls - * @see opus_multistream_ctls - */ -OPUS_EXPORT int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); - -/** Frees an <code>OpusMSDecoder</code> allocated by - * opus_multistream_decoder_create(). - * @param st <tt>OpusMSDecoder</tt>: Multistream decoder state to be freed. - */ -OPUS_EXPORT void opus_multistream_decoder_destroy(OpusMSDecoder *st); - -/**@}*/ - -/**@}*/ - -#ifdef __cplusplus -} -#endif - -#endif /* OPUS_MULTISTREAM_H */ diff --git a/thirdparty/opus/opus/opus_types.h b/thirdparty/opus/opus/opus_types.h deleted file mode 100644 index b28e03aea2..0000000000 --- a/thirdparty/opus/opus/opus_types.h +++ /dev/null @@ -1,159 +0,0 @@ -/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */ -/* Modified by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -/* opus_types.h based on ogg_types.h from libogg */ - -/** - @file opus_types.h - @brief Opus reference implementation types -*/ -#ifndef OPUS_TYPES_H -#define OPUS_TYPES_H - -/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */ -#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) -#include <stdint.h> - - typedef int16_t opus_int16; - typedef uint16_t opus_uint16; - typedef int32_t opus_int32; - typedef uint32_t opus_uint32; -#elif defined(_WIN32) - -# if defined(__CYGWIN__) -# include <_G_config.h> - typedef _G_int32_t opus_int32; - typedef _G_uint32_t opus_uint32; - typedef _G_int16 opus_int16; - typedef _G_uint16 opus_uint16; -# elif defined(__MINGW32__) - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; -# elif defined(__MWERKS__) - typedef int opus_int32; - typedef unsigned int opus_uint32; - typedef short opus_int16; - typedef unsigned short opus_uint16; -# else - /* MSVC/Borland */ - typedef __int32 opus_int32; - typedef unsigned __int32 opus_uint32; - typedef __int16 opus_int16; - typedef unsigned __int16 opus_uint16; -# endif - -#elif defined(__MACOS__) - -# include <sys/types.h> - typedef SInt16 opus_int16; - typedef UInt16 opus_uint16; - typedef SInt32 opus_int32; - typedef UInt32 opus_uint32; - -#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */ - -# include <sys/types.h> - typedef int16_t opus_int16; - typedef u_int16_t opus_uint16; - typedef int32_t opus_int32; - typedef u_int32_t opus_uint32; - -#elif defined(__BEOS__) - - /* Be */ -# include <inttypes.h> - typedef int16 opus_int16; - typedef u_int16 opus_uint16; - typedef int32_t opus_int32; - typedef u_int32_t opus_uint32; - -#elif defined (__EMX__) - - /* OS/2 GCC */ - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#elif defined (DJGPP) - - /* DJGPP */ - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#elif defined(R5900) - - /* PS2 EE */ - typedef int opus_int32; - typedef unsigned opus_uint32; - typedef short opus_int16; - typedef unsigned short opus_uint16; - -#elif defined(__SYMBIAN32__) - - /* Symbian GCC */ - typedef signed short opus_int16; - typedef unsigned short opus_uint16; - typedef signed int opus_int32; - typedef unsigned int opus_uint32; - -#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) - - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef long opus_int32; - typedef unsigned long opus_uint32; - -#elif defined(CONFIG_TI_C6X) - - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#else - - /* Give up, take a reasonable guess */ - typedef short opus_int16; - typedef unsigned short opus_uint16; - typedef int opus_int32; - typedef unsigned int opus_uint32; - -#endif - -#define opus_int int /* used for counters etc; at least 16 bits */ -#define opus_int64 long long -#define opus_int8 signed char - -#define opus_uint unsigned int /* used for counters etc; at least 16 bits */ -#define opus_uint64 unsigned long long -#define opus_uint8 unsigned char - -#endif /* OPUS_TYPES_H */ diff --git a/thirdparty/opus/opus/opusfile.h b/thirdparty/opus/opus/opusfile.h deleted file mode 100644 index 4bf2fba926..0000000000 --- a/thirdparty/opus/opus/opusfile.h +++ /dev/null @@ -1,2157 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.h 17182 2010-04-29 03:48:32Z xiphmont $ - - ********************************************************************/ -#if !defined(_opusfile_h) -# define _opusfile_h (1) - -/**\mainpage - \section Introduction - - This is the documentation for the <tt>libopusfile</tt> C API. - - The <tt>libopusfile</tt> package provides a convenient high-level API for - decoding and basic manipulation of all Ogg Opus audio streams. - <tt>libopusfile</tt> is implemented as a layer on top of Xiph.Org's - reference - <tt><a href="https://www.xiph.org/ogg/doc/libogg/reference.html">libogg</a></tt> - and - <tt><a href="https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/index.html">libopus</a></tt> - libraries. - - <tt>libopusfile</tt> provides several sets of built-in routines for - file/stream access, and may also use custom stream I/O routines provided by - the embedded environment. - There are built-in I/O routines provided for ANSI-compliant - <code>stdio</code> (<code>FILE *</code>), memory buffers, and URLs - (including <file:> URLs, plus optionally <http:> and <https:> URLs). - - \section Organization - - The main API is divided into several sections: - - \ref stream_open_close - - \ref stream_info - - \ref stream_decoding - - \ref stream_seeking - - Several additional sections are not tied to the main API. - - \ref stream_callbacks - - \ref header_info - - \ref error_codes - - \section Overview - - The <tt>libopusfile</tt> API always decodes files to 48 kHz. - The original sample rate is not preserved by the lossy compression, though - it is stored in the header to allow you to resample to it after decoding - (the <tt>libopusfile</tt> API does not currently provide a resampler, - but the - <a href="http://www.speex.org/docs/manual/speex-manual/node7.html#SECTION00760000000000000000">the - Speex resampler</a> is a good choice if you need one). - In general, if you are playing back the audio, you should leave it at - 48 kHz, provided your audio hardware supports it. - When decoding to a file, it may be worth resampling back to the original - sample rate, so as not to surprise users who might not expect the sample - rate to change after encoding to Opus and decoding. - - Opus files can contain anywhere from 1 to 255 channels of audio. - The channel mappings for up to 8 channels are the same as the - <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis - mappings</a>. - A special stereo API can convert everything to 2 channels, making it simple - to support multichannel files in an application which only has stereo - output. - Although the <tt>libopusfile</tt> ABI provides support for the theoretical - maximum number of channels, the current implementation does not support - files with more than 8 channels, as they do not have well-defined channel - mappings. - - Like all Ogg files, Opus files may be "chained". - That is, multiple Opus files may be combined into a single, longer file just - by concatenating the original files. - This is commonly done in internet radio streaming, as it allows the title - and artist to be updated each time the song changes, since each link in the - chain includes its own set of metadata. - - <tt>libopusfile</tt> fully supports chained files. - It will decode the first Opus stream found in each link of a chained file - (ignoring any other streams that might be concurrently multiplexed with it, - such as a video stream). - - The channel count can also change between links. - If your application is not prepared to deal with this, it can use the stereo - API to ensure the audio from all links will always get decoded into a - common format. - Since <tt>libopusfile</tt> always decodes to 48 kHz, you do not have to - worry about the sample rate changing between links (as was possible with - Vorbis). - This makes application support for chained files with <tt>libopusfile</tt> - very easy.*/ - -# if defined(__cplusplus) -extern "C" { -# endif - -# include <stdarg.h> -# include <stdio.h> -# include <ogg/ogg.h> -# include <opus_multistream.h> - -/**@cond PRIVATE*/ - -/*Enable special features for gcc and gcc-compatible compilers.*/ -# if !defined(OP_GNUC_PREREQ) -# if defined(__GNUC__)&&defined(__GNUC_MINOR__) -# define OP_GNUC_PREREQ(_maj,_min) \ - ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) -# else -# define OP_GNUC_PREREQ(_maj,_min) 0 -# endif -# endif - -# if OP_GNUC_PREREQ(4,0) -# pragma GCC visibility push(default) -# endif - -typedef struct OpusHead OpusHead; -typedef struct OpusTags OpusTags; -typedef struct OpusPictureTag OpusPictureTag; -typedef struct OpusServerInfo OpusServerInfo; -typedef struct OpusFileCallbacks OpusFileCallbacks; -typedef struct OggOpusFile OggOpusFile; - -/*Warning attributes for libopusfile functions.*/ -# if OP_GNUC_PREREQ(3,4) -# define OP_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -# else -# define OP_WARN_UNUSED_RESULT -# endif -# if OP_GNUC_PREREQ(3,4) -# define OP_ARG_NONNULL(_x) __attribute__((__nonnull__(_x))) -# else -# define OP_ARG_NONNULL(_x) -# endif - -/**@endcond*/ - -/**\defgroup error_codes Error Codes*/ -/*@{*/ -/**\name List of possible error codes - Many of the functions in this library return a negative error code when a - function fails. - This list provides a brief explanation of the common errors. - See each individual function for more details on what a specific error code - means in that context.*/ -/*@{*/ - -/**A request did not succeed.*/ -#define OP_FALSE (-1) -/*Currently not used externally.*/ -#define OP_EOF (-2) -/**There was a hole in the page sequence numbers (e.g., a page was corrupt or - missing).*/ -#define OP_HOLE (-3) -/**An underlying read, seek, or tell operation failed when it should have - succeeded.*/ -#define OP_EREAD (-128) -/**A <code>NULL</code> pointer was passed where one was unexpected, or an - internal memory allocation failed, or an internal library error was - encountered.*/ -#define OP_EFAULT (-129) -/**The stream used a feature that is not implemented, such as an unsupported - channel family.*/ -#define OP_EIMPL (-130) -/**One or more parameters to a function were invalid.*/ -#define OP_EINVAL (-131) -/**A purported Ogg Opus stream did not begin with an Ogg page, a purported - header packet did not start with one of the required strings, "OpusHead" or - "OpusTags", or a link in a chained file was encountered that did not - contain any logical Opus streams.*/ -#define OP_ENOTFORMAT (-132) -/**A required header packet was not properly formatted, contained illegal - values, or was missing altogether.*/ -#define OP_EBADHEADER (-133) -/**The ID header contained an unrecognized version number.*/ -#define OP_EVERSION (-134) -/*Currently not used at all.*/ -#define OP_ENOTAUDIO (-135) -/**An audio packet failed to decode properly. - This is usually caused by a multistream Ogg packet where the durations of - the individual Opus packets contained in it are not all the same.*/ -#define OP_EBADPACKET (-136) -/**We failed to find data we had seen before, or the bitstream structure was - sufficiently malformed that seeking to the target destination was - impossible.*/ -#define OP_EBADLINK (-137) -/**An operation that requires seeking was requested on an unseekable stream.*/ -#define OP_ENOSEEK (-138) -/**The first or last granule position of a link failed basic validity checks.*/ -#define OP_EBADTIMESTAMP (-139) - -/*@}*/ -/*@}*/ - -/**\defgroup header_info Header Information*/ -/*@{*/ - -/**The maximum number of channels in an Ogg Opus stream.*/ -#define OPUS_CHANNEL_COUNT_MAX (255) - -/**Ogg Opus bitstream information. - This contains the basic playback parameters for a stream, and corresponds to - the initial ID header packet of an Ogg Opus stream.*/ -struct OpusHead{ - /**The Ogg Opus format version, in the range 0...255. - The top 4 bits represent a "major" version, and the bottom four bits - represent backwards-compatible "minor" revisions. - The current specification describes version 1. - This library will recognize versions up through 15 as backwards compatible - with the current specification. - An earlier draft of the specification described a version 0, but the only - difference between version 1 and version 0 is that version 0 did - not specify the semantics for handling the version field.*/ - int version; - /**The number of channels, in the range 1...255.*/ - int channel_count; - /**The number of samples that should be discarded from the beginning of the - stream.*/ - unsigned pre_skip; - /**The sampling rate of the original input. - All Opus audio is coded at 48 kHz, and should also be decoded at 48 kHz - for playback (unless the target hardware does not support this sampling - rate). - However, this field may be used to resample the audio back to the original - sampling rate, for example, when saving the output to a file.*/ - opus_uint32 input_sample_rate; - /**The gain to apply to the decoded output, in dB, as a Q8 value in the range - -32768...32767. - The <tt>libopusfile</tt> API will automatically apply this gain to the - decoded output before returning it, scaling it by - <code>pow(10,output_gain/(20.0*256))</code>.*/ - int output_gain; - /**The channel mapping family, in the range 0...255. - Channel mapping family 0 covers mono or stereo in a single stream. - Channel mapping family 1 covers 1 to 8 channels in one or more streams, - using the Vorbis speaker assignments. - Channel mapping family 255 covers 1 to 255 channels in one or more - streams, but without any defined speaker assignment.*/ - int mapping_family; - /**The number of Opus streams in each Ogg packet, in the range 1...255.*/ - int stream_count; - /**The number of coupled Opus streams in each Ogg packet, in the range - 0...127. - This must satisfy <code>0 <= coupled_count <= stream_count</code> and - <code>coupled_count + stream_count <= 255</code>. - The coupled streams appear first, before all uncoupled streams, in an Ogg - Opus packet.*/ - int coupled_count; - /**The mapping from coded stream channels to output channels. - Let <code>index=mapping[k]</code> be the value for channel <code>k</code>. - If <code>index<2*coupled_count</code>, then it refers to the left channel - from stream <code>(index/2)</code> if even, and the right channel from - stream <code>(index/2)</code> if odd. - Otherwise, it refers to the output of the uncoupled stream - <code>(index-coupled_count)</code>.*/ - unsigned char mapping[OPUS_CHANNEL_COUNT_MAX]; -}; - -/**The metadata from an Ogg Opus stream. - - This structure holds the in-stream metadata corresponding to the 'comment' - header packet of an Ogg Opus stream. - The comment header is meant to be used much like someone jotting a quick - note on the label of a CD. - It should be a short, to the point text note that can be more than a couple - words, but not more than a short paragraph. - - The metadata is stored as a series of (tag, value) pairs, in length-encoded - string vectors, using the same format as Vorbis (without the final "framing - bit"), Theora, and Speex, except for the packet header. - The first occurrence of the '=' character delimits the tag and value. - A particular tag may occur more than once, and order is significant. - The character set encoding for the strings is always UTF-8, but the tag - names are limited to ASCII, and treated as case-insensitive. - See <a href="http://www.xiph.org/vorbis/doc/v-comment.html">the Vorbis - comment header specification</a> for details. - - In filling in this structure, <tt>libopusfile</tt> will null-terminate the - #user_comments strings for safety. - However, the bitstream format itself treats them as 8-bit clean vectors, - possibly containing NUL characters, so the #comment_lengths array should be - treated as their authoritative length. - - This structure is binary and source-compatible with a - <code>vorbis_comment</code>, and pointers to it may be freely cast to - <code>vorbis_comment</code> pointers, and vice versa. - It is provided as a separate type to avoid introducing a compile-time - dependency on the libvorbis headers.*/ -struct OpusTags{ - /**The array of comment string vectors.*/ - char **user_comments; - /**An array of the corresponding length of each vector, in bytes.*/ - int *comment_lengths; - /**The total number of comment streams.*/ - int comments; - /**The null-terminated vendor string. - This identifies the software used to encode the stream.*/ - char *vendor; -}; - -/**\name Picture tag image formats*/ -/*@{*/ - -/**The MIME type was not recognized, or the image data did not match the - declared MIME type.*/ -#define OP_PIC_FORMAT_UNKNOWN (-1) -/**The MIME type indicates the image data is really a URL.*/ -#define OP_PIC_FORMAT_URL (0) -/**The image is a JPEG.*/ -#define OP_PIC_FORMAT_JPEG (1) -/**The image is a PNG.*/ -#define OP_PIC_FORMAT_PNG (2) -/**The image is a GIF.*/ -#define OP_PIC_FORMAT_GIF (3) - -/*@}*/ - -/**The contents of a METADATA_BLOCK_PICTURE tag.*/ -struct OpusPictureTag{ - /**The picture type according to the ID3v2 APIC frame: - <ol start="0"> - <li>Other</li> - <li>32x32 pixels 'file icon' (PNG only)</li> - <li>Other file icon</li> - <li>Cover (front)</li> - <li>Cover (back)</li> - <li>Leaflet page</li> - <li>Media (e.g. label side of CD)</li> - <li>Lead artist/lead performer/soloist</li> - <li>Artist/performer</li> - <li>Conductor</li> - <li>Band/Orchestra</li> - <li>Composer</li> - <li>Lyricist/text writer</li> - <li>Recording Location</li> - <li>During recording</li> - <li>During performance</li> - <li>Movie/video screen capture</li> - <li>A bright colored fish</li> - <li>Illustration</li> - <li>Band/artist logotype</li> - <li>Publisher/Studio logotype</li> - </ol> - Others are reserved and should not be used. - There may only be one each of picture type 1 and 2 in a file.*/ - opus_int32 type; - /**The MIME type of the picture, in printable ASCII characters 0x20-0x7E. - The MIME type may also be <code>"-->"</code> to signify that the data part - is a URL pointing to the picture instead of the picture data itself. - In this case, a terminating NUL is appended to the URL string in #data, - but #data_length is set to the length of the string excluding that - terminating NUL.*/ - char *mime_type; - /**The description of the picture, in UTF-8.*/ - char *description; - /**The width of the picture in pixels.*/ - opus_uint32 width; - /**The height of the picture in pixels.*/ - opus_uint32 height; - /**The color depth of the picture in bits-per-pixel (<em>not</em> - bits-per-channel).*/ - opus_uint32 depth; - /**For indexed-color pictures (e.g., GIF), the number of colors used, or 0 - for non-indexed pictures.*/ - opus_uint32 colors; - /**The length of the picture data in bytes.*/ - opus_uint32 data_length; - /**The binary picture data.*/ - unsigned char *data; - /**The format of the picture data, if known. - One of - <ul> - <li>#OP_PIC_FORMAT_UNKNOWN,</li> - <li>#OP_PIC_FORMAT_URL,</li> - <li>#OP_PIC_FORMAT_JPEG,</li> - <li>#OP_PIC_FORMAT_PNG, or</li> - <li>#OP_PIC_FORMAT_GIF.</li> - </ul>*/ - int format; -}; - -/**\name Functions for manipulating header data - - These functions manipulate the #OpusHead and #OpusTags structures, - which describe the audio parameters and tag-value metadata, respectively. - These can be used to query the headers returned by <tt>libopusfile</tt>, or - to parse Opus headers from sources other than an Ogg Opus stream, provided - they use the same format.*/ -/*@{*/ - -/**Parses the contents of the ID header packet of an Ogg Opus stream. - \param[out] _head Returns the contents of the parsed packet. - The contents of this structure are untouched on error. - This may be <code>NULL</code> to merely test the header - for validity. - \param[in] _data The contents of the ID header packet. - \param _len The number of bytes of data in the ID header packet. - \return 0 on success or a negative value on error. - \retval #OP_ENOTFORMAT If the data does not start with the "OpusHead" - string. - \retval #OP_EVERSION If the version field signaled a version this library - does not know how to parse. - \retval #OP_EIMPL If the channel mapping family was 255, which general - purpose players should not attempt to play. - \retval #OP_EBADHEADER If the contents of the packet otherwise violate the - Ogg Opus specification: - <ul> - <li>Insufficient data,</li> - <li>Too much data for the known minor versions,</li> - <li>An unrecognized channel mapping family,</li> - <li>Zero channels or too many channels,</li> - <li>Zero coded streams,</li> - <li>Too many coupled streams, or</li> - <li>An invalid channel mapping index.</li> - </ul>*/ -OP_WARN_UNUSED_RESULT int opus_head_parse(OpusHead *_head, - const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2); - -/**Converts a granule position to a sample offset for a given Ogg Opus stream. - The sample offset is simply <code>_gp-_head->pre_skip</code>. - Granule position values smaller than OpusHead#pre_skip correspond to audio - that should never be played, and thus have no associated sample offset. - This function returns -1 for such values. - This function also correctly handles extremely large granule positions, - which may have wrapped around to a negative number when stored in a signed - ogg_int64_t value. - \param _head The #OpusHead information from the ID header of the stream. - \param _gp The granule position to convert. - \return The sample offset associated with the given granule position - (counting at a 48 kHz sampling rate), or the special value -1 on - error (i.e., the granule position was smaller than the pre-skip - amount).*/ -ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp) - OP_ARG_NONNULL(1); - -/**Parses the contents of the 'comment' header packet of an Ogg Opus stream. - \param[out] _tags An uninitialized #OpusTags structure. - This returns the contents of the parsed packet. - The contents of this structure are untouched on error. - This may be <code>NULL</code> to merely test the header - for validity. - \param[in] _data The contents of the 'comment' header packet. - \param _len The number of bytes of data in the 'info' header packet. - \retval 0 Success. - \retval #OP_ENOTFORMAT If the data does not start with the "OpusTags" - string. - \retval #OP_EBADHEADER If the contents of the packet otherwise violate the - Ogg Opus specification. - \retval #OP_EFAULT If there wasn't enough memory to store the tags.*/ -OP_WARN_UNUSED_RESULT int opus_tags_parse(OpusTags *_tags, - const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2); - -/**Performs a deep copy of an #OpusTags structure. - \param _dst The #OpusTags structure to copy into. - If this function fails, the contents of this structure remain - untouched. - \param _src The #OpusTags structure to copy from. - \retval 0 Success. - \retval #OP_EFAULT If there wasn't enough memory to copy the tags.*/ -int opus_tags_copy(OpusTags *_dst,const OpusTags *_src) OP_ARG_NONNULL(1); - -/**Initializes an #OpusTags structure. - This should be called on a freshly allocated #OpusTags structure before - attempting to use it. - \param _tags The #OpusTags structure to initialize.*/ -void opus_tags_init(OpusTags *_tags) OP_ARG_NONNULL(1); - -/**Add a (tag, value) pair to an initialized #OpusTags structure. - \note Neither opus_tags_add() nor opus_tags_add_comment() support values - containing embedded NULs, although the bitstream format does support them. - To add such tags, you will need to manipulate the #OpusTags structure - directly. - \param _tags The #OpusTags structure to add the (tag, value) pair to. - \param _tag A NUL-terminated, case-insensitive, ASCII string containing - the tag to add (without an '=' character). - \param _value A NUL-terminated UTF-8 containing the corresponding value. - \return 0 on success, or a negative value on failure. - \retval #OP_EFAULT An internal memory allocation failed.*/ -int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) OP_ARG_NONNULL(3); - -/**Add a comment to an initialized #OpusTags structure. - \note Neither opus_tags_add_comment() nor opus_tags_add() support comments - containing embedded NULs, although the bitstream format does support them. - To add such tags, you will need to manipulate the #OpusTags structure - directly. - \param _tags The #OpusTags structure to add the comment to. - \param _comment A NUL-terminated UTF-8 string containing the comment in - "TAG=value" form. - \return 0 on success, or a negative value on failure. - \retval #OP_EFAULT An internal memory allocation failed.*/ -int opus_tags_add_comment(OpusTags *_tags,const char *_comment) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Replace the binary suffix data at the end of the packet (if any). - \param _tags An initialized #OpusTags structure. - \param _data A buffer of binary data to append after the encoded user - comments. - The least significant bit of the first byte of this data must - be set (to ensure the data is preserved by other editors). - \param _len The number of bytes of binary data to append. - This may be zero to remove any existing binary suffix data. - \return 0 on success, or a negative value on error. - \retval #OP_EINVAL \a _len was negative, or \a _len was positive but - \a _data was <code>NULL</code> or the least significant - bit of the first byte was not set. - \retval #OP_EFAULT An internal memory allocation failed.*/ -int opus_tags_set_binary_suffix(OpusTags *_tags, - const unsigned char *_data,int _len) OP_ARG_NONNULL(1); - -/**Look up a comment value by its tag. - \param _tags An initialized #OpusTags structure. - \param _tag The tag to look up. - \param _count The instance of the tag. - The same tag can appear multiple times, each with a distinct - value, so an index is required to retrieve them all. - The order in which these values appear is significant and - should be preserved. - Use opus_tags_query_count() to get the legal range for the - \a _count parameter. - \return A pointer to the queried tag's value. - This points directly to data in the #OpusTags structure. - It should not be modified or freed by the application, and - modifications to the structure may invalidate the pointer. - \retval NULL If no matching tag is found.*/ -const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Look up the number of instances of a tag. - Call this first when querying for a specific tag and then iterate over the - number of instances with separate calls to opus_tags_query() to retrieve - all the values for that tag in order. - \param _tags An initialized #OpusTags structure. - \param _tag The tag to look up. - \return The number of instances of this particular tag.*/ -int opus_tags_query_count(const OpusTags *_tags,const char *_tag) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Retrieve the binary suffix data at the end of the packet (if any). - \param _tags An initialized #OpusTags structure. - \param[out] _len Returns the number of bytes of binary suffix data returned. - \return A pointer to the binary suffix data, or <code>NULL</code> if none - was present.*/ -const unsigned char *opus_tags_get_binary_suffix(const OpusTags *_tags, - int *_len) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Get the album gain from an R128_ALBUM_GAIN tag, if one was specified. - This searches for the first R128_ALBUM_GAIN tag with a valid signed, - 16-bit decimal integer value and returns the value. - This routine is exposed merely for convenience for applications which wish - to do something special with the album gain (i.e., display it). - If you simply wish to apply the album gain instead of the header gain, you - can use op_set_gain_offset() with an #OP_ALBUM_GAIN type and no offset. - \param _tags An initialized #OpusTags structure. - \param[out] _gain_q8 The album gain, in 1/256ths of a dB. - This will lie in the range [-32768,32767], and should - be applied in <em>addition</em> to the header gain. - On error, no value is returned, and the previous - contents remain unchanged. - \return 0 on success, or a negative value on error. - \retval #OP_FALSE There was no album gain available in the given tags.*/ -int opus_tags_get_album_gain(const OpusTags *_tags,int *_gain_q8) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Get the track gain from an R128_TRACK_GAIN tag, if one was specified. - This searches for the first R128_TRACK_GAIN tag with a valid signed, - 16-bit decimal integer value and returns the value. - This routine is exposed merely for convenience for applications which wish - to do something special with the track gain (i.e., display it). - If you simply wish to apply the track gain instead of the header gain, you - can use op_set_gain_offset() with an #OP_TRACK_GAIN type and no offset. - \param _tags An initialized #OpusTags structure. - \param[out] _gain_q8 The track gain, in 1/256ths of a dB. - This will lie in the range [-32768,32767], and should - be applied in <em>addition</em> to the header gain. - On error, no value is returned, and the previous - contents remain unchanged. - \return 0 on success, or a negative value on error. - \retval #OP_FALSE There was no track gain available in the given tags.*/ -int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8) - OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Clears the #OpusTags structure. - This should be called on an #OpusTags structure after it is no longer - needed. - It will free all memory used by the structure members. - \param _tags The #OpusTags structure to clear.*/ -void opus_tags_clear(OpusTags *_tags) OP_ARG_NONNULL(1); - -/**Check if \a _comment is an instance of a \a _tag_name tag. - \see opus_tagncompare - \param _tag_name A NUL-terminated, case-insensitive, ASCII string containing - the name of the tag to check for (without the terminating - '=' character). - \param _comment The comment string to check. - \return An integer less than, equal to, or greater than zero if \a _comment - is found respectively, to be less than, to match, or be greater - than a "tag=value" string whose tag matches \a _tag_name.*/ -int opus_tagcompare(const char *_tag_name,const char *_comment); - -/**Check if \a _comment is an instance of a \a _tag_name tag. - This version is slightly more efficient than opus_tagcompare() if the length - of the tag name is already known (e.g., because it is a constant). - \see opus_tagcompare - \param _tag_name A case-insensitive ASCII string containing the name of the - tag to check for (without the terminating '=' character). - \param _tag_len The number of characters in the tag name. - This must be non-negative. - \param _comment The comment string to check. - \return An integer less than, equal to, or greater than zero if \a _comment - is found respectively, to be less than, to match, or be greater - than a "tag=value" string whose tag matches the first \a _tag_len - characters of \a _tag_name.*/ -int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment); - -/**Parse a single METADATA_BLOCK_PICTURE tag. - This decodes the BASE64-encoded content of the tag and returns a structure - with the MIME type, description, image parameters (if known), and the - compressed image data. - If the MIME type indicates the presence of an image format we recognize - (JPEG, PNG, or GIF) and the actual image data contains the magic signature - associated with that format, then the OpusPictureTag::format field will be - set to the corresponding format. - This is provided as a convenience to avoid requiring applications to parse - the MIME type and/or do their own format detection for the commonly used - formats. - In this case, we also attempt to extract the image parameters directly from - the image data (overriding any that were present in the tag, which the - specification says applications are not meant to rely on). - The application must still provide its own support for actually decoding the - image data and, if applicable, retrieving that data from URLs. - \param[out] _pic Returns the parsed picture data. - No sanitation is done on the type, MIME type, or - description fields, so these might return invalid values. - The contents of this structure are left unmodified on - failure. - \param _tag The METADATA_BLOCK_PICTURE tag contents. - The leading "METADATA_BLOCK_PICTURE=" portion is optional, - to allow the function to be used on either directly on the - values in OpusTags::user_comments or on the return value - of opus_tags_query(). - \return 0 on success or a negative value on error. - \retval #OP_ENOTFORMAT The METADATA_BLOCK_PICTURE contents were not valid. - \retval #OP_EFAULT There was not enough memory to store the picture tag - contents.*/ -OP_WARN_UNUSED_RESULT int opus_picture_tag_parse(OpusPictureTag *_pic, - const char *_tag) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Initializes an #OpusPictureTag structure. - This should be called on a freshly allocated #OpusPictureTag structure - before attempting to use it. - \param _pic The #OpusPictureTag structure to initialize.*/ -void opus_picture_tag_init(OpusPictureTag *_pic) OP_ARG_NONNULL(1); - -/**Clears the #OpusPictureTag structure. - This should be called on an #OpusPictureTag structure after it is no longer - needed. - It will free all memory used by the structure members. - \param _pic The #OpusPictureTag structure to clear.*/ -void opus_picture_tag_clear(OpusPictureTag *_pic) OP_ARG_NONNULL(1); - -/*@}*/ - -/*@}*/ - -/**\defgroup url_options URL Reading Options*/ -/*@{*/ -/**\name URL reading options - Options for op_url_stream_create() and associated functions. - These allow you to provide proxy configuration parameters, skip SSL - certificate checks, etc. - Options are processed in order, and if the same option is passed multiple - times, only the value specified by the last occurrence has an effect - (unless otherwise specified). - They may be expanded in the future.*/ -/*@{*/ - -/**@cond PRIVATE*/ - -/*These are the raw numbers used to define the request codes. - They should not be used directly.*/ -#define OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST (6464) -#define OP_HTTP_PROXY_HOST_REQUEST (6528) -#define OP_HTTP_PROXY_PORT_REQUEST (6592) -#define OP_HTTP_PROXY_USER_REQUEST (6656) -#define OP_HTTP_PROXY_PASS_REQUEST (6720) -#define OP_GET_SERVER_INFO_REQUEST (6784) - -#define OP_URL_OPT(_request) ((_request)+(char *)0) - -/*These macros trigger compilation errors or warnings if the wrong types are - provided to one of the URL options.*/ -#define OP_CHECK_INT(_x) ((void)((_x)==(opus_int32)0),(opus_int32)(_x)) -#define OP_CHECK_CONST_CHAR_PTR(_x) ((_x)+((_x)-(const char *)(_x))) -#define OP_CHECK_SERVER_INFO_PTR(_x) ((_x)+((_x)-(OpusServerInfo *)(_x))) - -/**@endcond*/ - -/**HTTP/Shoutcast/Icecast server information associated with a URL.*/ -struct OpusServerInfo{ - /**The name of the server (icy-name/ice-name). - This is <code>NULL</code> if there was no <code>icy-name</code> or - <code>ice-name</code> header.*/ - char *name; - /**A short description of the server (icy-description/ice-description). - This is <code>NULL</code> if there was no <code>icy-description</code> or - <code>ice-description</code> header.*/ - char *description; - /**The genre the server falls under (icy-genre/ice-genre). - This is <code>NULL</code> if there was no <code>icy-genre</code> or - <code>ice-genre</code> header.*/ - char *genre; - /**The homepage for the server (icy-url/ice-url). - This is <code>NULL</code> if there was no <code>icy-url</code> or - <code>ice-url</code> header.*/ - char *url; - /**The software used by the origin server (Server). - This is <code>NULL</code> if there was no <code>Server</code> header.*/ - char *server; - /**The media type of the entity sent to the recepient (Content-Type). - This is <code>NULL</code> if there was no <code>Content-Type</code> - header.*/ - char *content_type; - /**The nominal stream bitrate in kbps (icy-br/ice-bitrate). - This is <code>-1</code> if there was no <code>icy-br</code> or - <code>ice-bitrate</code> header.*/ - opus_int32 bitrate_kbps; - /**Flag indicating whether the server is public (<code>1</code>) or not - (<code>0</code>) (icy-pub/ice-public). - This is <code>-1</code> if there was no <code>icy-pub</code> or - <code>ice-public</code> header.*/ - int is_public; - /**Flag indicating whether the server is using HTTPS instead of HTTP. - This is <code>0</code> unless HTTPS is being used. - This may not match the protocol used in the original URL if there were - redirections.*/ - int is_ssl; -}; - -/**Initializes an #OpusServerInfo structure. - All fields are set as if the corresponding header was not available. - \param _info The #OpusServerInfo structure to initialize. - \note If you use this function, you must link against <tt>libopusurl</tt>.*/ -void opus_server_info_init(OpusServerInfo *_info) OP_ARG_NONNULL(1); - -/**Clears the #OpusServerInfo structure. - This should be called on an #OpusServerInfo structure after it is no longer - needed. - It will free all memory used by the structure members. - \param _info The #OpusServerInfo structure to clear. - \note If you use this function, you must link against <tt>libopusurl</tt>.*/ -void opus_server_info_clear(OpusServerInfo *_info) OP_ARG_NONNULL(1); - -/**Skip the certificate check when connecting via TLS/SSL (https). - \param _b <code>opus_int32</code>: Whether or not to skip the certificate - check. - The check will be skipped if \a _b is non-zero, and will not be - skipped if \a _b is zero. - \hideinitializer*/ -#define OP_SSL_SKIP_CERTIFICATE_CHECK(_b) \ - OP_URL_OPT(OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST),OP_CHECK_INT(_b) - -/**Proxy connections through the given host. - If no port is specified via #OP_HTTP_PROXY_PORT, the port number defaults - to 8080 (http-alt). - All proxy parameters are ignored for non-http and non-https URLs. - \param _host <code>const char *</code>: The proxy server hostname. - This may be <code>NULL</code> to disable the use of a proxy - server. - \hideinitializer*/ -#define OP_HTTP_PROXY_HOST(_host) \ - OP_URL_OPT(OP_HTTP_PROXY_HOST_REQUEST),OP_CHECK_CONST_CHAR_PTR(_host) - -/**Use the given port when proxying connections. - This option only has an effect if #OP_HTTP_PROXY_HOST is specified with a - non-<code>NULL</code> \a _host. - If this option is not provided, the proxy port number defaults to 8080 - (http-alt). - All proxy parameters are ignored for non-http and non-https URLs. - \param _port <code>opus_int32</code>: The proxy server port. - This must be in the range 0...65535 (inclusive), or the - URL function this is passed to will fail. - \hideinitializer*/ -#define OP_HTTP_PROXY_PORT(_port) \ - OP_URL_OPT(OP_HTTP_PROXY_PORT_REQUEST),OP_CHECK_INT(_port) - -/**Use the given user name for authentication when proxying connections. - All proxy parameters are ignored for non-http and non-https URLs. - \param _user const char *: The proxy server user name. - This may be <code>NULL</code> to disable proxy - authentication. - A non-<code>NULL</code> value only has an effect - if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_PASS - are also specified with non-<code>NULL</code> - arguments. - \hideinitializer*/ -#define OP_HTTP_PROXY_USER(_user) \ - OP_URL_OPT(OP_HTTP_PROXY_USER_REQUEST),OP_CHECK_CONST_CHAR_PTR(_user) - -/**Use the given password for authentication when proxying connections. - All proxy parameters are ignored for non-http and non-https URLs. - \param _pass const char *: The proxy server password. - This may be <code>NULL</code> to disable proxy - authentication. - A non-<code>NULL</code> value only has an effect - if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_USER - are also specified with non-<code>NULL</code> - arguments. - \hideinitializer*/ -#define OP_HTTP_PROXY_PASS(_pass) \ - OP_URL_OPT(OP_HTTP_PROXY_PASS_REQUEST),OP_CHECK_CONST_CHAR_PTR(_pass) - -/**Parse information about the streaming server (if any) and return it. - Very little validation is done. - In particular, OpusServerInfo::url may not be a valid URL, - OpusServerInfo::bitrate_kbps may not really be in kbps, and - OpusServerInfo::content_type may not be a valid MIME type. - The character set of the string fields is not specified anywhere, and should - not be assumed to be valid UTF-8. - \param _info OpusServerInfo *: Returns information about the server. - If there is any error opening the stream, the - contents of this structure remain - unmodified. - On success, fills in the structure with the - server information that was available, if - any. - After a successful return, the contents of - this structure should be freed by calling - opus_server_info_clear(). - \hideinitializer*/ -#define OP_GET_SERVER_INFO(_info) \ - OP_URL_OPT(OP_GET_SERVER_INFO_REQUEST),OP_CHECK_SERVER_INFO_PTR(_info) - -/*@}*/ -/*@}*/ - -/**\defgroup stream_callbacks Abstract Stream Reading Interface*/ -/*@{*/ -/**\name Functions for reading from streams - These functions define the interface used to read from and seek in a stream - of data. - A stream does not need to implement seeking, but the decoder will not be - able to seek if it does not do so. - These functions also include some convenience routines for working with - standard <code>FILE</code> pointers, complete streams stored in a single - block of memory, or URLs.*/ -/*@{*/ - -/**Reads up to \a _nbytes bytes of data from \a _stream. - \param _stream The stream to read from. - \param[out] _ptr The buffer to store the data in. - \param _nbytes The maximum number of bytes to read. - This function may return fewer, though it will not - return zero unless it reaches end-of-file. - \return The number of bytes successfully read, or a negative value on - error.*/ -typedef int (*op_read_func)(void *_stream,unsigned char *_ptr,int _nbytes); - -/**Sets the position indicator for \a _stream. - The new position, measured in bytes, is obtained by adding \a _offset - bytes to the position specified by \a _whence. - If \a _whence is set to <code>SEEK_SET</code>, <code>SEEK_CUR</code>, or - <code>SEEK_END</code>, the offset is relative to the start of the stream, - the current position indicator, or end-of-file, respectively. - \retval 0 Success. - \retval -1 Seeking is not supported or an error occurred. - <code>errno</code> need not be set.*/ -typedef int (*op_seek_func)(void *_stream,opus_int64 _offset,int _whence); - -/**Obtains the current value of the position indicator for \a _stream. - \return The current position indicator.*/ -typedef opus_int64 (*op_tell_func)(void *_stream); - -/**Closes the underlying stream. - \retval 0 Success. - \retval EOF An error occurred. - <code>errno</code> need not be set.*/ -typedef int (*op_close_func)(void *_stream); - -/**The callbacks used to access non-<code>FILE</code> stream resources. - The function prototypes are basically the same as for the stdio functions - <code>fread()</code>, <code>fseek()</code>, <code>ftell()</code>, and - <code>fclose()</code>. - The differences are that the <code>FILE *</code> arguments have been - replaced with a <code>void *</code>, which is to be used as a pointer to - whatever internal data these functions might need, that #seek and #tell - take and return 64-bit offsets, and that #seek <em>must</em> return -1 if - the stream is unseekable.*/ -struct OpusFileCallbacks{ - /**Used to read data from the stream. - This must not be <code>NULL</code>.*/ - op_read_func read; - /**Used to seek in the stream. - This may be <code>NULL</code> if seeking is not implemented.*/ - op_seek_func seek; - /**Used to return the current read position in the stream. - This may be <code>NULL</code> if seeking is not implemented.*/ - op_tell_func tell; - /**Used to close the stream when the decoder is freed. - This may be <code>NULL</code> to leave the stream open.*/ - op_close_func close; -}; - -/**Opens a stream with <code>fopen()</code> and fills in a set of callbacks - that can be used to access it. - This is useful to avoid writing your own portable 64-bit seeking wrappers, - and also avoids cross-module linking issues on Windows, where a - <code>FILE *</code> must be accessed by routines defined in the same module - that opened it. - \param[out] _cb The callbacks to use for this file. - If there is an error opening the file, nothing will be - filled in here. - \param _path The path to the file to open. - On Windows, this string must be UTF-8 (to allow access to - files whose names cannot be represented in the current - MBCS code page). - All other systems use the native character encoding. - \param _mode The mode to open the file in. - \return A stream handle to use with the callbacks, or <code>NULL</code> on - error.*/ -OP_WARN_UNUSED_RESULT void *op_fopen(OpusFileCallbacks *_cb, - const char *_path,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) - OP_ARG_NONNULL(3); - -/**Opens a stream with <code>fdopen()</code> and fills in a set of callbacks - that can be used to access it. - This is useful to avoid writing your own portable 64-bit seeking wrappers, - and also avoids cross-module linking issues on Windows, where a - <code>FILE *</code> must be accessed by routines defined in the same module - that opened it. - \param[out] _cb The callbacks to use for this file. - If there is an error opening the file, nothing will be - filled in here. - \param _fd The file descriptor to open. - \param _mode The mode to open the file in. - \return A stream handle to use with the callbacks, or <code>NULL</code> on - error.*/ -OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb, - int _fd,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(3); - -/**Opens a stream with <code>freopen()</code> and fills in a set of callbacks - that can be used to access it. - This is useful to avoid writing your own portable 64-bit seeking wrappers, - and also avoids cross-module linking issues on Windows, where a - <code>FILE *</code> must be accessed by routines defined in the same module - that opened it. - \param[out] _cb The callbacks to use for this file. - If there is an error opening the file, nothing will be - filled in here. - \param _path The path to the file to open. - On Windows, this string must be UTF-8 (to allow access - to files whose names cannot be represented in the - current MBCS code page). - All other systems use the native character encoding. - \param _mode The mode to open the file in. - \param _stream A stream previously returned by op_fopen(), op_fdopen(), - or op_freopen(). - \return A stream handle to use with the callbacks, or <code>NULL</code> on - error.*/ -OP_WARN_UNUSED_RESULT void *op_freopen(OpusFileCallbacks *_cb, - const char *_path,const char *_mode,void *_stream) OP_ARG_NONNULL(1) - OP_ARG_NONNULL(2) OP_ARG_NONNULL(3) OP_ARG_NONNULL(4); - -/**Creates a stream that reads from the given block of memory. - This block of memory must contain the complete stream to decode. - This is useful for caching small streams (e.g., sound effects) in RAM. - \param[out] _cb The callbacks to use for this stream. - If there is an error creating the stream, nothing will be - filled in here. - \param _data The block of memory to read from. - \param _size The size of the block of memory. - \return A stream handle to use with the callbacks, or <code>NULL</code> on - error.*/ -OP_WARN_UNUSED_RESULT void *op_mem_stream_create(OpusFileCallbacks *_cb, - const unsigned char *_data,size_t _size) OP_ARG_NONNULL(1); - -/**Creates a stream that reads from the given URL. - This function behaves identically to op_url_stream_create(), except that it - takes a va_list instead of a variable number of arguments. - It does not call the <code>va_end</code> macro, and because it invokes the - <code>va_arg</code> macro, the value of \a _ap is undefined after the call. - \note If you use this function, you must link against <tt>libopusurl</tt>. - \param[out] _cb The callbacks to use for this stream. - If there is an error creating the stream, nothing will - be filled in here. - \param _url The URL to read from. - Currently only the <file:>, <http:>, and <https:> - schemes are supported. - Both <http:> and <https:> may be disabled at compile - time, in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, - before passing them to this function. - \param[in,out] _ap A list of the \ref url_options "optional flags" to use. - This is a variable-length list of options terminated - with <code>NULL</code>. - \return A stream handle to use with the callbacks, or <code>NULL</code> on - error.*/ -OP_WARN_UNUSED_RESULT void *op_url_stream_vcreate(OpusFileCallbacks *_cb, - const char *_url,va_list _ap) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/**Creates a stream that reads from the given URL. - \note If you use this function, you must link against <tt>libopusurl</tt>. - \param[out] _cb The callbacks to use for this stream. - If there is an error creating the stream, nothing will be - filled in here. - \param _url The URL to read from. - Currently only the <file:>, <http:>, and <https:> schemes - are supported. - Both <http:> and <https:> may be disabled at compile time, - in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, before - passing them to this function. - \param ... The \ref url_options "optional flags" to use. - This is a variable-length list of options terminated with - <code>NULL</code>. - \return A stream handle to use with the callbacks, or <code>NULL</code> on - error.*/ -OP_WARN_UNUSED_RESULT void *op_url_stream_create(OpusFileCallbacks *_cb, - const char *_url,...) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_open_close Opening and Closing*/ -/*@{*/ -/**\name Functions for opening and closing streams - - These functions allow you to test a stream to see if it is Opus, open it, - and close it. - Several flavors are provided for each of the built-in stream types, plus a - more general version which takes a set of application-provided callbacks.*/ -/*@{*/ - -/**Test to see if this is an Opus stream. - For good results, you will need at least 57 bytes (for a pure Opus-only - stream). - Something like 512 bytes will give more reliable results for multiplexed - streams. - This function is meant to be a quick-rejection filter. - Its purpose is not to guarantee that a stream is a valid Opus stream, but to - ensure that it looks enough like Opus that it isn't going to be recognized - as some other format (except possibly an Opus stream that is also - multiplexed with other codecs, such as video). - \param[out] _head The parsed ID header contents. - You may pass <code>NULL</code> if you do not need - this information. - If the function fails, the contents of this structure - remain untouched. - \param _initial_data An initial buffer of data from the start of the - stream. - \param _initial_bytes The number of bytes in \a _initial_data. - \return 0 if the data appears to be Opus, or a negative value on error. - \retval #OP_FALSE There was not enough data to tell if this was an Opus - stream or not. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL The stream used a feature that is not implemented, - such as an unsupported channel family. - \retval #OP_ENOTFORMAT If the data did not contain a recognizable ID - header for an Opus stream. - \retval #OP_EVERSION If the version field signaled a version this library - does not know how to parse. - \retval #OP_EBADHEADER The ID header was not properly formatted or contained - illegal values.*/ -int op_test(OpusHead *_head, - const unsigned char *_initial_data,size_t _initial_bytes); - -/**Open a stream from the given file path. - \param _path The path to the file to open. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want the - failure code. - The failure code will be #OP_EFAULT if the file could not - be opened, or one of the other failure codes from - op_open_callbacks() otherwise. - \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_file(const char *_path,int *_error) - OP_ARG_NONNULL(1); - -/**Open a stream from a memory buffer. - \param _data The memory buffer to open. - \param _size The number of bytes in the buffer. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want the - failure code. - See op_open_callbacks() for a full list of failure codes. - \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_memory(const unsigned char *_data, - size_t _size,int *_error); - -/**Open a stream from a URL. - This function behaves identically to op_open_url(), except that it - takes a va_list instead of a variable number of arguments. - It does not call the <code>va_end</code> macro, and because it invokes the - <code>va_arg</code> macro, the value of \a _ap is undefined after the call. - \note If you use this function, you must link against <tt>libopusurl</tt>. - \param _url The URL to open. - Currently only the <file:>, <http:>, and <https:> - schemes are supported. - Both <http:> and <https:> may be disabled at compile - time, in which case opening such URLs will always - fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, - with internationalized domain names encoded in - punycode, before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want - the failure code. - See op_open_callbacks() for a full list of failure - codes. - \param[in,out] _ap A list of the \ref url_options "optional flags" to - use. - This is a variable-length list of options terminated - with <code>NULL</code>. - \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_vopen_url(const char *_url, - int *_error,va_list _ap) OP_ARG_NONNULL(1); - -/**Open a stream from a URL. - \note If you use this function, you must link against <tt>libopusurl</tt>. - \param _url The URL to open. - Currently only the <file:>, <http:>, and <https:> schemes - are supported. - Both <http:> and <https:> may be disabled at compile - time, in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, - before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want the - failure code. - See op_open_callbacks() for a full list of failure codes. - \param ... The \ref url_options "optional flags" to use. - This is a variable-length list of options terminated with - <code>NULL</code>. - \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url, - int *_error,...) OP_ARG_NONNULL(1); - -/**Open a stream using the given set of callbacks to access it. - \param _source The stream to read from (e.g., a <code>FILE *</code>). - \param _cb The callbacks with which to access the stream. - <code><a href="#op_read_func">read()</a></code> must - be implemented. - <code><a href="#op_seek_func">seek()</a></code> and - <code><a href="#op_tell_func">tell()</a></code> may - be <code>NULL</code>, or may always return -1 to - indicate a source is unseekable, but if - <code><a href="#op_seek_func">seek()</a></code> is - implemented and succeeds on a particular source, then - <code><a href="#op_tell_func">tell()</a></code> must - also. - <code><a href="#op_close_func">close()</a></code> may - be <code>NULL</code>, but if it is not, it will be - called when the \c OggOpusFile is destroyed by - op_free(). - It will not be called if op_open_callbacks() fails - with an error. - \param _initial_data An initial buffer of data from the start of the - stream. - Applications can read some number of bytes from the - start of the stream to help identify this as an Opus - stream, and then provide them here to allow the - stream to be opened, even if it is unseekable. - \param _initial_bytes The number of bytes in \a _initial_data. - If the stream is seekable, its current position (as - reported by - <code><a href="#opus_tell_func">tell()</a></code> - at the start of this function) must be equal to - \a _initial_bytes. - Otherwise, seeking to absolute positions will - generate inconsistent results. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want - the failure code. - The failure code will be one of - <dl> - <dt>#OP_EREAD</dt> - <dd>An underlying read, seek, or tell operation - failed when it should have succeeded, or we failed - to find data in the stream we had seen before.</dd> - <dt>#OP_EFAULT</dt> - <dd>There was a memory allocation failure, or an - internal library error.</dd> - <dt>#OP_EIMPL</dt> - <dd>The stream used a feature that is not - implemented, such as an unsupported channel - family.</dd> - <dt>#OP_EINVAL</dt> - <dd><code><a href="#op_seek_func">seek()</a></code> - was implemented and succeeded on this source, but - <code><a href="#op_tell_func">tell()</a></code> - did not, or the starting position indicator was - not equal to \a _initial_bytes.</dd> - <dt>#OP_ENOTFORMAT</dt> - <dd>The stream contained a link that did not have - any logical Opus streams in it.</dd> - <dt>#OP_EBADHEADER</dt> - <dd>A required header packet was not properly - formatted, contained illegal values, or was missing - altogether.</dd> - <dt>#OP_EVERSION</dt> - <dd>An ID header contained an unrecognized version - number.</dd> - <dt>#OP_EBADLINK</dt> - <dd>We failed to find data we had seen before after - seeking.</dd> - <dt>#OP_EBADTIMESTAMP</dt> - <dd>The first or last timestamp in a link failed - basic validity checks.</dd> - </dl> - \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error. - <tt>libopusfile</tt> does <em>not</em> take ownership of the source - if the call fails. - The calling application is responsible for closing the source if - this call returns an error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source, - const OpusFileCallbacks *_cb,const unsigned char *_initial_data, - size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); - -/**Partially open a stream from the given file path. - \see op_test_callbacks - \param _path The path to the file to open. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want the - failure code. - The failure code will be #OP_EFAULT if the file could not - be opened, or one of the other failure codes from - op_open_callbacks() otherwise. - \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_file(const char *_path,int *_error) - OP_ARG_NONNULL(1); - -/**Partially open a stream from a memory buffer. - \see op_test_callbacks - \param _data The memory buffer to open. - \param _size The number of bytes in the buffer. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want the - failure code. - See op_open_callbacks() for a full list of failure codes. - \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_memory(const unsigned char *_data, - size_t _size,int *_error); - -/**Partially open a stream from a URL. - This function behaves identically to op_test_url(), except that it - takes a va_list instead of a variable number of arguments. - It does not call the <code>va_end</code> macro, and because it invokes the - <code>va_arg</code> macro, the value of \a _ap is undefined after the call. - \note If you use this function, you must link against <tt>libopusurl</tt>. - \see op_test_url - \see op_test_callbacks - \param _url The URL to open. - Currently only the <file:>, <http:>, and <https:> - schemes are supported. - Both <http:> and <https:> may be disabled at compile - time, in which case opening such URLs will always - fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, - with internationalized domain names encoded in - punycode, before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want - the failure code. - See op_open_callbacks() for a full list of failure - codes. - \param[in,out] _ap A list of the \ref url_options "optional flags" to - use. - This is a variable-length list of options terminated - with <code>NULL</code>. - \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_vtest_url(const char *_url, - int *_error,va_list _ap) OP_ARG_NONNULL(1); - -/**Partially open a stream from a URL. - \note If you use this function, you must link against <tt>libopusurl</tt>. - \see op_test_callbacks - \param _url The URL to open. - Currently only the <file:>, <http:>, and <https:> - schemes are supported. - Both <http:> and <https:> may be disabled at compile - time, in which case opening such URLs will always fail. - Currently this only supports URIs. - IRIs should be converted to UTF-8 and URL-escaped, with - internationalized domain names encoded in punycode, - before passing them to this function. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want the - failure code. - See op_open_callbacks() for a full list of failure - codes. - \param ... The \ref url_options "optional flags" to use. - This is a variable-length list of options terminated - with <code>NULL</code>. - \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url, - int *_error,...) OP_ARG_NONNULL(1); - -/**Partially open a stream using the given set of callbacks to access it. - This tests for Opusness and loads the headers for the first link. - It does not seek (although it tests for seekability). - You can query a partially open stream for the few pieces of basic - information returned by op_serialno(), op_channel_count(), op_head(), and - op_tags() (but only for the first link). - You may also determine if it is seekable via a call to op_seekable(). - You cannot read audio from the stream, seek, get the size or duration, - get information from links other than the first one, or even get the total - number of links until you finish opening the stream with op_test_open(). - If you do not need to do any of these things, you can dispose of it with - op_free() instead. - - This function is provided mostly to simplify porting existing code that used - <tt>libvorbisfile</tt>. - For new code, you are likely better off using op_test() instead, which - is less resource-intensive, requires less data to succeed, and imposes a - hard limit on the amount of data it examines (important for unseekable - sources, where all such data must be buffered until you are sure of the - stream type). - \param _source The stream to read from (e.g., a <code>FILE *</code>). - \param _cb The callbacks with which to access the stream. - <code><a href="#op_read_func">read()</a></code> must - be implemented. - <code><a href="#op_seek_func">seek()</a></code> and - <code><a href="#op_tell_func">tell()</a></code> may - be <code>NULL</code>, or may always return -1 to - indicate a source is unseekable, but if - <code><a href="#op_seek_func">seek()</a></code> is - implemented and succeeds on a particular source, then - <code><a href="#op_tell_func">tell()</a></code> must - also. - <code><a href="#op_close_func">close()</a></code> may - be <code>NULL</code>, but if it is not, it will be - called when the \c OggOpusFile is destroyed by - op_free(). - It will not be called if op_open_callbacks() fails - with an error. - \param _initial_data An initial buffer of data from the start of the - stream. - Applications can read some number of bytes from the - start of the stream to help identify this as an Opus - stream, and then provide them here to allow the - stream to be tested more thoroughly, even if it is - unseekable. - \param _initial_bytes The number of bytes in \a _initial_data. - If the stream is seekable, its current position (as - reported by - <code><a href="#opus_tell_func">tell()</a></code> - at the start of this function) must be equal to - \a _initial_bytes. - Otherwise, seeking to absolute positions will - generate inconsistent results. - \param[out] _error Returns 0 on success, or a failure code on error. - You may pass in <code>NULL</code> if you don't want - the failure code. - See op_open_callbacks() for a full list of failure - codes. - \return A partially opened \c OggOpusFile, or <code>NULL</code> on error. - <tt>libopusfile</tt> does <em>not</em> take ownership of the source - if the call fails. - The calling application is responsible for closing the source if - this call returns an error.*/ -OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source, - const OpusFileCallbacks *_cb,const unsigned char *_initial_data, - size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); - -/**Finish opening a stream partially opened with op_test_callbacks() or one of - the associated convenience functions. - If this function fails, you are still responsible for freeing the - \c OggOpusFile with op_free(). - \param _of The \c OggOpusFile to finish opening. - \return 0 on success, or a negative value on error. - \retval #OP_EREAD An underlying read, seek, or tell operation failed - when it should have succeeded. - \retval #OP_EFAULT There was a memory allocation failure, or an - internal library error. - \retval #OP_EIMPL The stream used a feature that is not implemented, - such as an unsupported channel family. - \retval #OP_EINVAL The stream was not partially opened with - op_test_callbacks() or one of the associated - convenience functions. - \retval #OP_ENOTFORMAT The stream contained a link that did not have any - logical Opus streams in it. - \retval #OP_EBADHEADER A required header packet was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An ID header contained an unrecognized version - number. - \retval #OP_EBADLINK We failed to find data we had seen before after - seeking. - \retval #OP_EBADTIMESTAMP The first or last timestamp in a link failed basic - validity checks.*/ -int op_test_open(OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Release all memory used by an \c OggOpusFile. - \param _of The \c OggOpusFile to free.*/ -void op_free(OggOpusFile *_of); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_info Stream Information*/ -/*@{*/ -/**\name Functions for obtaining information about streams - - These functions allow you to get basic information about a stream, including - seekability, the number of links (for chained streams), plus the size, - duration, bitrate, header parameters, and meta information for each link - (or, where available, the stream as a whole). - Some of these (size, duration) are only available for seekable streams. - You can also query the current stream position, link, and playback time, - and instantaneous bitrate during playback. - - Some of these functions may be used successfully on the partially open - streams returned by op_test_callbacks() or one of the associated - convenience functions. - Their documention will indicate so explicitly.*/ -/*@{*/ - -/**Returns whether or not the data source being read is seekable. - This is true if - <ol> - <li>The <code><a href="#op_seek_func">seek()</a></code> and - <code><a href="#op_tell_func">tell()</a></code> callbacks are both - non-<code>NULL</code>,</li> - <li>The <code><a href="#op_seek_func">seek()</a></code> callback was - successfully executed at least once, and</li> - <li>The <code><a href="#op_tell_func">tell()</a></code> callback was - successfully able to report the position indicator afterwards.</li> - </ol> - This function may be called on partially-opened streams. - \param _of The \c OggOpusFile whose seekable status is to be returned. - \return A non-zero value if seekable, and 0 if unseekable.*/ -int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Returns the number of links in this chained stream. - This function may be called on partially-opened streams, but it will always - return 1. - The actual number of links is not known until the stream is fully opened. - \param _of The \c OggOpusFile from which to retrieve the link count. - \return For fully-open seekable sources, this returns the total number of - links in the whole stream, which will be at least 1. - For partially-open or unseekable sources, this always returns 1.*/ -int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Get the serial number of the given link in a (possibly-chained) Ogg Opus - stream. - This function may be called on partially-opened streams, but it will always - return the serial number of the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the serial number. - \param _li The index of the link whose serial number should be retrieved. - Use a negative number to get the serial number of the current - link. - \return The serial number of the given link. - If \a _li is greater than the total number of links, this returns - the serial number of the last link. - If the source is not seekable, this always returns the serial number - of the current link.*/ -opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the channel count of the given link in a (possibly-chained) Ogg Opus - stream. - This is equivalent to <code>op_head(_of,_li)->channel_count</code>, but - is provided for convenience. - This function may be called on partially-opened streams, but it will always - return the channel count of the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the channel count. - \param _li The index of the link whose channel count should be retrieved. - Use a negative number to get the channel count of the current - link. - \return The channel count of the given link. - If \a _li is greater than the total number of links, this returns - the channel count of the last link. - If the source is not seekable, this always returns the channel count - of the current link.*/ -int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the total (compressed) size of the stream, or of an individual link in - a (possibly-chained) Ogg Opus stream, including all headers and Ogg muxing - overhead. - \warning If the Opus stream (or link) is concurrently multiplexed with other - logical streams (e.g., video), this returns the size of the entire stream - (or link), not just the number of bytes in the first logical Opus stream. - Returning the latter would require scanning the entire file. - \param _of The \c OggOpusFile from which to retrieve the compressed size. - \param _li The index of the link whose compressed size should be computed. - Use a negative number to get the compressed size of the entire - stream. - \return The compressed size of the entire stream if \a _li is negative, the - compressed size of link \a _li if it is non-negative, or a negative - value on error. - The compressed size of the entire stream may be smaller than that - of the underlying source if trailing garbage was detected in the - file. - \retval #OP_EINVAL The source is not seekable (so we can't know the length), - \a _li wasn't less than the total number of links in - the stream, or the stream was only partially open.*/ -opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the total PCM length (number of samples at 48 kHz) of the stream, or of - an individual link in a (possibly-chained) Ogg Opus stream. - Users looking for <code>op_time_total()</code> should use op_pcm_total() - instead. - Because timestamps in Opus are fixed at 48 kHz, there is no need for a - separate function to convert this to seconds (and leaving it out avoids - introducing floating point to the API, for those that wish to avoid it). - \param _of The \c OggOpusFile from which to retrieve the PCM offset. - \param _li The index of the link whose PCM length should be computed. - Use a negative number to get the PCM length of the entire stream. - \return The PCM length of the entire stream if \a _li is negative, the PCM - length of link \a _li if it is non-negative, or a negative value on - error. - \retval #OP_EINVAL The source is not seekable (so we can't know the length), - \a _li wasn't less than the total number of links in - the stream, or the stream was only partially open.*/ -ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the ID header information for the given link in a (possibly chained) Ogg - Opus stream. - This function may be called on partially-opened streams, but it will always - return the ID header information of the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the ID header - information. - \param _li The index of the link whose ID header information should be - retrieved. - Use a negative number to get the ID header information of the - current link. - For an unseekable stream, \a _li is ignored, and the ID header - information for the current link is always returned, if - available. - \return The contents of the ID header for the given link.*/ -const OpusHead *op_head(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Get the comment header information for the given link in a (possibly - chained) Ogg Opus stream. - This function may be called on partially-opened streams, but it will always - return the tags from the Opus stream in the first link. - \param _of The \c OggOpusFile from which to retrieve the comment header - information. - \param _li The index of the link whose comment header information should be - retrieved. - Use a negative number to get the comment header information of - the current link. - For an unseekable stream, \a _li is ignored, and the comment - header information for the current link is always returned, if - available. - \return The contents of the comment header for the given link, or - <code>NULL</code> if this is an unseekable stream that encountered - an invalid link.*/ -const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Retrieve the index of the current link. - This is the link that produced the data most recently read by - op_read_float() or its associated functions, or, after a seek, the link - that the seek target landed in. - Reading more data may advance the link index (even on the first read after a - seek). - \param _of The \c OggOpusFile from which to retrieve the current link index. - \return The index of the current link on success, or a negative value on - failure. - For seekable streams, this is a number between 0 and the value - returned by op_link_count(). - For unseekable streams, this value starts at 0 and increments by one - each time a new link is encountered (even though op_link_count() - always returns 1). - \retval #OP_EINVAL The stream was only partially open.*/ -int op_current_link(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Computes the bitrate of the stream, or of an individual link in a - (possibly-chained) Ogg Opus stream. - The stream must be seekable to compute the bitrate. - For unseekable streams, use op_bitrate_instant() to get periodic estimates. - \warning If the Opus stream (or link) is concurrently multiplexed with other - logical streams (e.g., video), this uses the size of the entire stream (or - link) to compute the bitrate, not just the number of bytes in the first - logical Opus stream. - Returning the latter requires scanning the entire file, but this may be done - by decoding the whole file and calling op_bitrate_instant() once at the - end. - Install a trivial decoding callback with op_set_decode_callback() if you - wish to skip actual decoding during this process. - \param _of The \c OggOpusFile from which to retrieve the bitrate. - \param _li The index of the link whose bitrate should be computed. - Use a negative number to get the bitrate of the whole stream. - \return The bitrate on success, or a negative value on error. - \retval #OP_EINVAL The stream was only partially open, the stream was not - seekable, or \a _li was larger than the number of - links.*/ -opus_int32 op_bitrate(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); - -/**Compute the instantaneous bitrate, measured as the ratio of bits to playable - samples decoded since a) the last call to op_bitrate_instant(), b) the last - seek, or c) the start of playback, whichever was most recent. - This will spike somewhat after a seek or at the start/end of a chain - boundary, as pre-skip, pre-roll, and end-trimming causes samples to be - decoded but not played. - \param _of The \c OggOpusFile from which to retrieve the bitrate. - \return The bitrate, in bits per second, or a negative value on error. - \retval #OP_FALSE No data has been decoded since any of the events - described above. - \retval #OP_EINVAL The stream was only partially open.*/ -opus_int32 op_bitrate_instant(OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Obtain the current value of the position indicator for \a _of. - \param _of The \c OggOpusFile from which to retrieve the position indicator. - \return The byte position that is currently being read from. - \retval #OP_EINVAL The stream was only partially open.*/ -opus_int64 op_raw_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/**Obtain the PCM offset of the next sample to be read. - If the stream is not properly timestamped, this might not increment by the - proper amount between reads, or even return monotonically increasing - values. - \param _of The \c OggOpusFile from which to retrieve the PCM offset. - \return The PCM offset of the next sample to be read. - \retval #OP_EINVAL The stream was only partially open.*/ -ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_seeking Seeking*/ -/*@{*/ -/**\name Functions for seeking in Opus streams - - These functions let you seek in Opus streams, if the underlying source - support it. - Seeking is implemented for all built-in stream I/O routines, though some - individual sources may not be seekable (pipes, live HTTP streams, or HTTP - streams from a server that does not support <code>Range</code> requests). - - op_raw_seek() is the fastest: it is guaranteed to perform at most one - physical seek, but, since the target is a byte position, makes no guarantee - how close to a given time it will come. - op_pcm_seek() provides sample-accurate seeking. - The number of physical seeks it requires is still quite small (often 1 or - 2, even in highly variable bitrate streams). - - Seeking in Opus requires decoding some pre-roll amount before playback to - allow the internal state to converge (as if recovering from packet loss). - This is handled internally by <tt>libopusfile</tt>, but means there is - little extra overhead for decoding up to the exact position requested - (since it must decode some amount of audio anyway). - It also means that decoding after seeking may not return exactly the same - values as would be obtained by decoding the stream straight through. - However, such differences are expected to be smaller than the loss - introduced by Opus's lossy compression.*/ -/*@{*/ - -/**Seek to a byte offset relative to the <b>compressed</b> data. - This also scans packets to update the PCM cursor. - It will cross a logical bitstream boundary, but only if it can't get any - packets out of the tail of the link to which it seeks. - \param _of The \c OggOpusFile in which to seek. - \param _byte_offset The byte position to seek to. - \return 0 on success, or a negative error code on failure. - \retval #OP_EREAD The underlying seek operation failed. - \retval #OP_EINVAL The stream was only partially open, or the target was - outside the valid range for the stream. - \retval #OP_ENOSEEK This stream is not seekable. - \retval #OP_EBADLINK Failed to initialize a decoder for a stream for an - unknown reason.*/ -int op_raw_seek(OggOpusFile *_of,opus_int64 _byte_offset) OP_ARG_NONNULL(1); - -/**Seek to the specified PCM offset, such that decoding will begin at exactly - the requested position. - \param _of The \c OggOpusFile in which to seek. - \param _pcm_offset The PCM offset to seek to. - This is in samples at 48 kHz relative to the start of the - stream. - \return 0 on success, or a negative value on error. - \retval #OP_EREAD An underlying read or seek operation failed. - \retval #OP_EINVAL The stream was only partially open, or the target was - outside the valid range for the stream. - \retval #OP_ENOSEEK This stream is not seekable. - \retval #OP_EBADLINK We failed to find data we had seen before, or the - bitstream structure was sufficiently malformed that - seeking to the target destination was impossible.*/ -int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset) OP_ARG_NONNULL(1); - -/*@}*/ -/*@}*/ - -/**\defgroup stream_decoding Decoding*/ -/*@{*/ -/**\name Functions for decoding audio data - - These functions retrieve actual decoded audio data from the stream. - The general functions, op_read() and op_read_float() return 16-bit or - floating-point output, both using native endian ordering. - The number of channels returned can change from link to link in a chained - stream. - There are special functions, op_read_stereo() and op_read_float_stereo(), - which always output two channels, to simplify applications which do not - wish to handle multichannel audio. - These downmix multichannel files to two channels, so they can always return - samples in the same format for every link in a chained file. - - If the rest of your audio processing chain can handle floating point, the - floating-point routines should be preferred, as they prevent clipping and - other issues which might be avoided entirely if, e.g., you scale down the - volume at some other stage. - However, if you intend to consume 16-bit samples directly, the conversion in - <tt>libopusfile</tt> provides noise-shaping dithering and, if compiled - against <tt>libopus</tt> 1.1 or later, soft-clipping prevention. - - <tt>libopusfile</tt> can also be configured at compile time to use the - fixed-point <tt>libopus</tt> API. - If so, <tt>libopusfile</tt>'s floating-point API may also be disabled. - In that configuration, nothing in <tt>libopusfile</tt> will use any - floating-point operations, to simplify support on devices without an - adequate FPU. - - \warning HTTPS streams may be be vulnerable to truncation attacks if you do - not check the error return code from op_read_float() or its associated - functions. - If the remote peer does not close the connection gracefully (with a TLS - "close notify" message), these functions will return #OP_EREAD instead of 0 - when they reach the end of the file. - If you are reading from an <https:> URL (particularly if seeking is not - supported), you should make sure to check for this error and warn the user - appropriately.*/ -/*@{*/ - -/**Indicates that the decoding callback should produce signed 16-bit - native-endian output samples.*/ -#define OP_DEC_FORMAT_SHORT (7008) -/**Indicates that the decoding callback should produce 32-bit native-endian - float samples.*/ -#define OP_DEC_FORMAT_FLOAT (7040) - -/**Indicates that the decoding callback did not decode anything, and that - <tt>libopusfile</tt> should decode normally instead.*/ -#define OP_DEC_USE_DEFAULT (6720) - -/**Called to decode an Opus packet. - This should invoke the functional equivalent of opus_multistream_decode() or - opus_multistream_decode_float(), except that it returns 0 on success - instead of the number of decoded samples (which is known a priori). - \param _ctx The application-provided callback context. - \param _decoder The decoder to use to decode the packet. - \param[out] _pcm The buffer to decode into. - This will always have enough room for \a _nchannels of - \a _nsamples samples, which should be placed into this - buffer interleaved. - \param _op The packet to decode. - This will always have its granule position set to a valid - value. - \param _nsamples The number of samples expected from the packet. - \param _nchannels The number of channels expected from the packet. - \param _format The desired sample output format. - This is either #OP_DEC_FORMAT_SHORT or - #OP_DEC_FORMAT_FLOAT. - \param _li The index of the link from which this packet was decoded. - \return A non-negative value on success, or a negative value on error. - Any error codes should be the same as those returned by - opus_multistream_decode() or opus_multistream_decode_float(). - Success codes are as follows: - \retval 0 Decoding was successful. - The application has filled the buffer with - exactly <code>\a _nsamples*\a - _nchannels</code> samples in the requested - format. - \retval #OP_DEC_USE_DEFAULT No decoding was done. - <tt>libopusfile</tt> should do the decoding - by itself instead.*/ -typedef int (*op_decode_cb_func)(void *_ctx,OpusMSDecoder *_decoder,void *_pcm, - const ogg_packet *_op,int _nsamples,int _nchannels,int _format,int _li); - -/**Sets the packet decode callback function. - If set, this is called once for each packet that needs to be decoded. - This can be used by advanced applications to do additional processing on the - compressed or uncompressed data. - For example, an application might save the final entropy coder state for - debugging and testing purposes, or it might apply additional filters - before the downmixing, dithering, or soft-clipping performed by - <tt>libopusfile</tt>, so long as these filters do not introduce any - latency. - - A call to this function is no guarantee that the audio will eventually be - delivered to the application. - <tt>libopusfile</tt> may discard some or all of the decoded audio data - (i.e., at the beginning or end of a link, or after a seek), however the - callback is still required to provide all of it. - \param _of The \c OggOpusFile on which to set the decode callback. - \param _decode_cb The callback function to call. - This may be <code>NULL</code> to disable calling the - callback. - \param _ctx The application-provided context pointer to pass to the - callback on each call.*/ -void op_set_decode_callback(OggOpusFile *_of, - op_decode_cb_func _decode_cb,void *_ctx) OP_ARG_NONNULL(1); - -/**Gain offset type that indicates that the provided offset is relative to the - header gain. - This is the default.*/ -#define OP_HEADER_GAIN (0) - -/**Gain offset type that indicates that the provided offset is relative to the - R128_ALBUM_GAIN value (if any), in addition to the header gain.*/ -#define OP_ALBUM_GAIN (3007) - -/**Gain offset type that indicates that the provided offset is relative to the - R128_TRACK_GAIN value (if any), in addition to the header gain.*/ -#define OP_TRACK_GAIN (3008) - -/**Gain offset type that indicates that the provided offset should be used as - the gain directly, without applying any the header or track gains.*/ -#define OP_ABSOLUTE_GAIN (3009) - -/**Sets the gain to be used for decoded output. - By default, the gain in the header is applied with no additional offset. - The total gain (including header gain and/or track gain, if applicable, and - this offset), will be clamped to [-32768,32767]/256 dB. - This is more than enough to saturate or underflow 16-bit PCM. - \note The new gain will not be applied to any already buffered, decoded - output. - This means you cannot change it sample-by-sample, as at best it will be - updated packet-by-packet. - It is meant for setting a target volume level, rather than applying smooth - fades, etc. - \param _of The \c OggOpusFile on which to set the gain offset. - \param _gain_type One of #OP_HEADER_GAIN, #OP_ALBUM_GAIN, - #OP_TRACK_GAIN, or #OP_ABSOLUTE_GAIN. - \param _gain_offset_q8 The gain offset to apply, in 1/256ths of a dB. - \return 0 on success or a negative value on error. - \retval #OP_EINVAL The \a _gain_type was unrecognized.*/ -int op_set_gain_offset(OggOpusFile *_of, - int _gain_type,opus_int32 _gain_offset_q8) OP_ARG_NONNULL(1); - -/**Sets whether or not dithering is enabled for 16-bit decoding. - By default, when <tt>libopusfile</tt> is compiled to use floating-point - internally, calling op_read() or op_read_stereo() will first decode to - float, and then convert to fixed-point using noise-shaping dithering. - This flag can be used to disable that dithering. - When the application uses op_read_float() or op_read_float_stereo(), or when - the library has been compiled to decode directly to fixed point, this flag - has no effect. - \param _of The \c OggOpusFile on which to enable or disable dithering. - \param _enabled A non-zero value to enable dithering, or 0 to disable it.*/ -void op_set_dither_enabled(OggOpusFile *_of,int _enabled) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream. - \note Although \a _buf_size must indicate the total number of values that - can be stored in \a _pcm, the return value is the number of samples - <em>per channel</em>. - This is done because - <ol> - <li>The channel count cannot be known a priori (reading more samples might - advance us into the next link, with a different channel count), so - \a _buf_size cannot also be in units of samples per channel,</li> - <li>Returning the samples per channel matches the <code>libopus</code> API - as closely as we're able,</li> - <li>Returning the total number of values instead of samples per channel - would mean the caller would need a division to compute the samples per - channel, and might worry about the possibility of getting back samples - for some channels and not others, and</li> - <li>This approach is relatively fool-proof: if an application passes too - small a value to \a _buf_size, they will simply get fewer samples back, - and if they assume the return value is the total number of values, then - they will simply read too few (rather than reading too many and going - off the end of the buffer).</li> - </ol> - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples, as - signed native-endian 16-bit values at 48 kHz - with a nominal range of <code>[-32768,32767)</code>. - Multiple channels are interleaved using the - <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis - channel ordering</a>. - This must have room for at least \a _buf_size values. - \param _buf_size The number of values that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (5760 - values per channel). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - <tt>libopusfile</tt> may return less data than - requested. - If so, there is no guarantee that the remaining data - in \a _pcm will be unmodified. - \param[out] _li The index of the link this data was decoded from. - You may pass <code>NULL</code> if you do not need this - information. - If this function fails (returning a negative value), - this parameter is left unset. - \return The number of samples read per channel on success, or a negative - value on failure. - The channel count can be retrieved on success by calling - <code>op_head(_of,*_li)</code>. - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for all channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - <https:> source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read(OggOpusFile *_of, - opus_int16 *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream. - \note Although \a _buf_size must indicate the total number of values that - can be stored in \a _pcm, the return value is the number of samples - <em>per channel</em>. - <ol> - <li>The channel count cannot be known a priori (reading more samples might - advance us into the next link, with a different channel count), so - \a _buf_size cannot also be in units of samples per channel,</li> - <li>Returning the samples per channel matches the <code>libopus</code> API - as closely as we're able,</li> - <li>Returning the total number of values instead of samples per channel - would mean the caller would need a division to compute the samples per - channel, and might worry about the possibility of getting back samples - for some channels and not others, and</li> - <li>This approach is relatively fool-proof: if an application passes too - small a value to \a _buf_size, they will simply get fewer samples back, - and if they assume the return value is the total number of values, then - they will simply read too few (rather than reading too many and going - off the end of the buffer).</li> - </ol> - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples as - signed floats at 48 kHz with a nominal range of - <code>[-1.0,1.0]</code>. - Multiple channels are interleaved using the - <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis - channel ordering</a>. - This must have room for at least \a _buf_size floats. - \param _buf_size The number of floats that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (5760 - samples per channel). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - If less than \a _buf_size values are returned, - <tt>libopusfile</tt> makes no guarantee that the - remaining data in \a _pcm will be unmodified. - \param[out] _li The index of the link this data was decoded from. - You may pass <code>NULL</code> if you do not need this - information. - If this function fails (returning a negative value), - this parameter is left unset. - \return The number of samples read per channel on success, or a negative - value on failure. - The channel count can be retrieved on success by calling - <code>op_head(_of,*_li)</code>. - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for all channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - <https:> source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read_float(OggOpusFile *_of, - float *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream and downmixes to stereo, if necessary. - This function is intended for simple players that want a uniform output - format, even if the channel count changes between links in a chained - stream. - \note \a _buf_size indicates the total number of values that can be stored - in \a _pcm, while the return value is the number of samples <em>per - channel</em>, even though the channel count is known, for consistency with - op_read(). - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples, as - signed native-endian 16-bit values at 48 kHz - with a nominal range of <code>[-32768,32767)</code>. - The left and right channels are interleaved in the - buffer. - This must have room for at least \a _buf_size values. - \param _buf_size The number of values that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (11520 - values total). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - If less than \a _buf_size values are returned, - <tt>libopusfile</tt> makes no guarantee that the - remaining data in \a _pcm will be unmodified. - \return The number of samples read per channel on success, or a negative - value on failure. - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for both channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - <https:> source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read_stereo(OggOpusFile *_of, - opus_int16 *_pcm,int _buf_size) OP_ARG_NONNULL(1); - -/**Reads more samples from the stream and downmixes to stereo, if necessary. - This function is intended for simple players that want a uniform output - format, even if the channel count changes between links in a chained - stream. - \note \a _buf_size indicates the total number of values that can be stored - in \a _pcm, while the return value is the number of samples <em>per - channel</em>, even though the channel count is known, for consistency with - op_read_float(). - \param _of The \c OggOpusFile from which to read. - \param[out] _pcm A buffer in which to store the output PCM samples, as - signed floats at 48 kHz with a nominal range of - <code>[-1.0,1.0]</code>. - The left and right channels are interleaved in the - buffer. - This must have room for at least \a _buf_size values. - \param _buf_size The number of values that can be stored in \a _pcm. - It is recommended that this be large enough for at - least 120 ms of data at 48 kHz per channel (11520 - values total). - Smaller buffers will simply return less data, possibly - consuming more memory to buffer the data internally. - If less than \a _buf_size values are returned, - <tt>libopusfile</tt> makes no guarantee that the - remaining data in \a _pcm will be unmodified. - \return The number of samples read per channel on success, or a negative - value on failure. - The number of samples returned may be 0 if the buffer was too small - to store even a single sample for both channels, or if end-of-file - was reached. - The list of possible failure codes follows. - Most of them can only be returned by unseekable, chained streams - that encounter a new link. - \retval #OP_HOLE There was a hole in the data, and some samples - may have been skipped. - Call this function again to continue decoding - past the hole. - \retval #OP_EREAD An underlying read operation failed. - This may signal a truncation attack from an - <https:> source. - \retval #OP_EFAULT An internal memory allocation failed. - \retval #OP_EIMPL An unseekable stream encountered a new link that - used a feature that is not implemented, such as - an unsupported channel family. - \retval #OP_EINVAL The stream was only partially open. - \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that - that did not have any logical Opus streams in it. - \retval #OP_EBADHEADER An unseekable stream encountered a new link with a - required header packet that was not properly - formatted, contained illegal values, or was - missing altogether. - \retval #OP_EVERSION An unseekable stream encountered a new link with - an ID header that contained an unrecognized - version number. - \retval #OP_EBADPACKET Failed to properly decode the next packet. - \retval #OP_EBADLINK We failed to find data we had seen before. - \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with - a starting timestamp that failed basic validity - checks.*/ -OP_WARN_UNUSED_RESULT int op_read_float_stereo(OggOpusFile *_of, - float *_pcm,int _buf_size) OP_ARG_NONNULL(1); - -/*@}*/ -/*@}*/ - -# if OP_GNUC_PREREQ(4,0) -# pragma GCC visibility pop -# endif - -# if defined(__cplusplus) -} -# endif - -#endif diff --git a/thirdparty/opus/opus_compare.c b/thirdparty/opus/opus_compare.c deleted file mode 100644 index 06c67d752f..0000000000 --- a/thirdparty/opus/opus_compare.c +++ /dev/null @@ -1,379 +0,0 @@ -/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation - Written by Jean-Marc Valin and Timothy B. Terriberry */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include <math.h> -#include <string.h> - -#define OPUS_PI (3.14159265F) - -#define OPUS_COSF(_x) ((float)cos(_x)) -#define OPUS_SINF(_x) ((float)sin(_x)) - -static void *check_alloc(void *_ptr){ - if(_ptr==NULL){ - fprintf(stderr,"Out of memory.\n"); - exit(EXIT_FAILURE); - } - return _ptr; -} - -static void *opus_malloc(size_t _size){ - return check_alloc(malloc(_size)); -} - -static void *opus_realloc(void *_ptr,size_t _size){ - return check_alloc(realloc(_ptr,_size)); -} - -static size_t read_pcm16(float **_samples,FILE *_fin,int _nchannels){ - unsigned char buf[1024]; - float *samples; - size_t nsamples; - size_t csamples; - size_t xi; - size_t nread; - samples=NULL; - nsamples=csamples=0; - for(;;){ - nread=fread(buf,2*_nchannels,1024/(2*_nchannels),_fin); - if(nread<=0)break; - if(nsamples+nread>csamples){ - do csamples=csamples<<1|1; - while(nsamples+nread>csamples); - samples=(float *)opus_realloc(samples, - _nchannels*csamples*sizeof(*samples)); - } - for(xi=0;xi<nread;xi++){ - int ci; - for(ci=0;ci<_nchannels;ci++){ - int s; - s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)]; - s=((s&0xFFFF)^0x8000)-0x8000; - samples[(nsamples+xi)*_nchannels+ci]=s; - } - } - nsamples+=nread; - } - *_samples=(float *)opus_realloc(samples, - _nchannels*nsamples*sizeof(*samples)); - return nsamples; -} - -static void band_energy(float *_out,float *_ps,const int *_bands,int _nbands, - const float *_in,int _nchannels,size_t _nframes,int _window_sz, - int _step,int _downsample){ - float *window; - float *x; - float *c; - float *s; - size_t xi; - int xj; - int ps_sz; - window=(float *)opus_malloc((3+_nchannels)*_window_sz*sizeof(*window)); - c=window+_window_sz; - s=c+_window_sz; - x=s+_window_sz; - ps_sz=_window_sz/2; - for(xj=0;xj<_window_sz;xj++){ - window[xj]=0.5F-0.5F*OPUS_COSF((2*OPUS_PI/(_window_sz-1))*xj); - } - for(xj=0;xj<_window_sz;xj++){ - c[xj]=OPUS_COSF((2*OPUS_PI/_window_sz)*xj); - } - for(xj=0;xj<_window_sz;xj++){ - s[xj]=OPUS_SINF((2*OPUS_PI/_window_sz)*xj); - } - for(xi=0;xi<_nframes;xi++){ - int ci; - int xk; - int bi; - for(ci=0;ci<_nchannels;ci++){ - for(xk=0;xk<_window_sz;xk++){ - x[ci*_window_sz+xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci]; - } - } - for(bi=xj=0;bi<_nbands;bi++){ - float p[2]={0}; - for(;xj<_bands[bi+1];xj++){ - for(ci=0;ci<_nchannels;ci++){ - float re; - float im; - int ti; - ti=0; - re=im=0; - for(xk=0;xk<_window_sz;xk++){ - re+=c[ti]*x[ci*_window_sz+xk]; - im-=s[ti]*x[ci*_window_sz+xk]; - ti+=xj; - if(ti>=_window_sz)ti-=_window_sz; - } - re*=_downsample; - im*=_downsample; - _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+100000; - p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci]; - } - } - if(_out){ - _out[(xi*_nbands+bi)*_nchannels]=p[0]/(_bands[bi+1]-_bands[bi]); - if(_nchannels==2){ - _out[(xi*_nbands+bi)*_nchannels+1]=p[1]/(_bands[bi+1]-_bands[bi]); - } - } - } - } - free(window); -} - -#define NBANDS (21) -#define NFREQS (240) - -/*Bands on which we compute the pseudo-NMR (Bark-derived - CELT bands).*/ -static const int BANDS[NBANDS+1]={ - 0,2,4,6,8,10,12,14,16,20,24,28,32,40,48,56,68,80,96,120,156,200 -}; - -#define TEST_WIN_SIZE (480) -#define TEST_WIN_STEP (120) - -int main(int _argc,const char **_argv){ - FILE *fin1; - FILE *fin2; - float *x; - float *y; - float *xb; - float *X; - float *Y; - double err; - float Q; - size_t xlength; - size_t ylength; - size_t nframes; - size_t xi; - int ci; - int xj; - int bi; - int nchannels; - unsigned rate; - int downsample; - int ybands; - int yfreqs; - int max_compare; - if(_argc<3||_argc>6){ - fprintf(stderr,"Usage: %s [-s] [-r rate2] <file1.sw> <file2.sw>\n", - _argv[0]); - return EXIT_FAILURE; - } - nchannels=1; - if(strcmp(_argv[1],"-s")==0){ - nchannels=2; - _argv++; - } - rate=48000; - ybands=NBANDS; - yfreqs=NFREQS; - downsample=1; - if(strcmp(_argv[1],"-r")==0){ - rate=atoi(_argv[2]); - if(rate!=8000&&rate!=12000&&rate!=16000&&rate!=24000&&rate!=48000){ - fprintf(stderr, - "Sampling rate must be 8000, 12000, 16000, 24000, or 48000\n"); - return EXIT_FAILURE; - } - downsample=48000/rate; - switch(rate){ - case 8000:ybands=13;break; - case 12000:ybands=15;break; - case 16000:ybands=17;break; - case 24000:ybands=19;break; - } - yfreqs=NFREQS/downsample; - _argv+=2; - } - fin1=fopen(_argv[1],"rb"); - if(fin1==NULL){ - fprintf(stderr,"Error opening '%s'.\n",_argv[1]); - return EXIT_FAILURE; - } - fin2=fopen(_argv[2],"rb"); - if(fin2==NULL){ - fprintf(stderr,"Error opening '%s'.\n",_argv[2]); - fclose(fin1); - return EXIT_FAILURE; - } - /*Read in the data and allocate scratch space.*/ - xlength=read_pcm16(&x,fin1,2); - if(nchannels==1){ - for(xi=0;xi<xlength;xi++)x[xi]=.5*(x[2*xi]+x[2*xi+1]); - } - fclose(fin1); - ylength=read_pcm16(&y,fin2,nchannels); - fclose(fin2); - if(xlength!=ylength*downsample){ - fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n", - (unsigned long)xlength,(unsigned long)ylength*downsample); - return EXIT_FAILURE; - } - if(xlength<TEST_WIN_SIZE){ - fprintf(stderr,"Insufficient sample data (%lu<%i).\n", - (unsigned long)xlength,TEST_WIN_SIZE); - return EXIT_FAILURE; - } - nframes=(xlength-TEST_WIN_SIZE+TEST_WIN_STEP)/TEST_WIN_STEP; - xb=(float *)opus_malloc(nframes*NBANDS*nchannels*sizeof(*xb)); - X=(float *)opus_malloc(nframes*NFREQS*nchannels*sizeof(*X)); - Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y)); - /*Compute the per-band spectral energy of the original signal - and the error.*/ - band_energy(xb,X,BANDS,NBANDS,x,nchannels,nframes, - TEST_WIN_SIZE,TEST_WIN_STEP,1); - free(x); - band_energy(NULL,Y,BANDS,ybands,y,nchannels,nframes, - TEST_WIN_SIZE/downsample,TEST_WIN_STEP/downsample,downsample); - free(y); - for(xi=0;xi<nframes;xi++){ - /*Frequency masking (low to high): 10 dB/Bark slope.*/ - for(bi=1;bi<NBANDS;bi++){ - for(ci=0;ci<nchannels;ci++){ - xb[(xi*NBANDS+bi)*nchannels+ci]+= - 0.1F*xb[(xi*NBANDS+bi-1)*nchannels+ci]; - } - } - /*Frequency masking (high to low): 15 dB/Bark slope.*/ - for(bi=NBANDS-1;bi-->0;){ - for(ci=0;ci<nchannels;ci++){ - xb[(xi*NBANDS+bi)*nchannels+ci]+= - 0.03F*xb[(xi*NBANDS+bi+1)*nchannels+ci]; - } - } - if(xi>0){ - /*Temporal masking: -3 dB/2.5ms slope.*/ - for(bi=0;bi<NBANDS;bi++){ - for(ci=0;ci<nchannels;ci++){ - xb[(xi*NBANDS+bi)*nchannels+ci]+= - 0.5F*xb[((xi-1)*NBANDS+bi)*nchannels+ci]; - } - } - } - /* Allowing some cross-talk */ - if(nchannels==2){ - for(bi=0;bi<NBANDS;bi++){ - float l,r; - l=xb[(xi*NBANDS+bi)*nchannels+0]; - r=xb[(xi*NBANDS+bi)*nchannels+1]; - xb[(xi*NBANDS+bi)*nchannels+0]+=0.01F*r; - xb[(xi*NBANDS+bi)*nchannels+1]+=0.01F*l; - } - } - - /* Apply masking */ - for(bi=0;bi<ybands;bi++){ - for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){ - for(ci=0;ci<nchannels;ci++){ - X[(xi*NFREQS+xj)*nchannels+ci]+= - 0.1F*xb[(xi*NBANDS+bi)*nchannels+ci]; - Y[(xi*yfreqs+xj)*nchannels+ci]+= - 0.1F*xb[(xi*NBANDS+bi)*nchannels+ci]; - } - } - } - } - - /* Average of consecutive frames to make comparison slightly less sensitive */ - for(bi=0;bi<ybands;bi++){ - for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){ - for(ci=0;ci<nchannels;ci++){ - float xtmp; - float ytmp; - xtmp = X[xj*nchannels+ci]; - ytmp = Y[xj*nchannels+ci]; - for(xi=1;xi<nframes;xi++){ - float xtmp2; - float ytmp2; - xtmp2 = X[(xi*NFREQS+xj)*nchannels+ci]; - ytmp2 = Y[(xi*yfreqs+xj)*nchannels+ci]; - X[(xi*NFREQS+xj)*nchannels+ci] += xtmp; - Y[(xi*yfreqs+xj)*nchannels+ci] += ytmp; - xtmp = xtmp2; - ytmp = ytmp2; - } - } - } - } - - /*If working at a lower sampling rate, don't take into account the last - 300 Hz to allow for different transition bands. - For 12 kHz, we don't skip anything, because the last band already skips - 400 Hz.*/ - if(rate==48000)max_compare=BANDS[NBANDS]; - else if(rate==12000)max_compare=BANDS[ybands]; - else max_compare=BANDS[ybands]-3; - err=0; - for(xi=0;xi<nframes;xi++){ - double Ef; - Ef=0; - for(bi=0;bi<ybands;bi++){ - double Eb; - Eb=0; - for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){ - for(ci=0;ci<nchannels;ci++){ - float re; - float im; - re=Y[(xi*yfreqs+xj)*nchannels+ci]/X[(xi*NFREQS+xj)*nchannels+ci]; - im=re-log(re)-1; - /*Make comparison less sensitive around the SILK/CELT cross-over to - allow for mode freedom in the filters.*/ - if(xj>=79&&xj<=81)im*=0.1F; - if(xj==80)im*=0.1F; - Eb+=im; - } - } - Eb /= (BANDS[bi+1]-BANDS[bi])*nchannels; - Ef += Eb*Eb; - } - /*Using a fixed normalization value means we're willing to accept slightly - lower quality for lower sampling rates.*/ - Ef/=NBANDS; - Ef*=Ef; - err+=Ef*Ef; - } - err=pow(err/nframes,1.0/16); - Q=100*(1-0.5*log(1+err)/log(1.13)); - if(Q<0){ - fprintf(stderr,"Test vector FAILS\n"); - fprintf(stderr,"Internal weighted error is %f\n",err); - return EXIT_FAILURE; - } - else{ - fprintf(stderr,"Test vector PASSES\n"); - fprintf(stderr, - "Opus quality metric: %.1f %% (internal weighted error is %f)\n",Q,err); - return EXIT_SUCCESS; - } -} diff --git a/thirdparty/opus/opus_decoder.c b/thirdparty/opus/opus_decoder.c deleted file mode 100644 index 080bec5072..0000000000 --- a/thirdparty/opus/opus_decoder.c +++ /dev/null @@ -1,981 +0,0 @@ -/* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#ifndef OPUS_BUILD -# error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details." -#endif - -#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) && !defined(OPUS_WILL_BE_SLOW) -# pragma message "You appear to be compiling without optimization, if so opus will be very slow." -#endif - -#include <stdarg.h> -#include "celt.h" -#include "opus.h" -#include "entdec.h" -#include "modes.h" -#include "API.h" -#include "stack_alloc.h" -#include "float_cast.h" -#include "opus_private.h" -#include "os_support.h" -#include "structs.h" -#include "define.h" -#include "mathops.h" -#include "cpu_support.h" - -struct OpusDecoder { - int celt_dec_offset; - int silk_dec_offset; - int channels; - opus_int32 Fs; /** Sampling rate (at the API level) */ - silk_DecControlStruct DecControl; - int decode_gain; - int arch; - - /* Everything beyond this point gets cleared on a reset */ -#define OPUS_DECODER_RESET_START stream_channels - int stream_channels; - - int bandwidth; - int mode; - int prev_mode; - int frame_size; - int prev_redundancy; - int last_packet_duration; -#ifndef FIXED_POINT - opus_val16 softclip_mem[2]; -#endif - - opus_uint32 rangeFinal; -}; - - -int opus_decoder_get_size(int channels) -{ - int silkDecSizeBytes, celtDecSizeBytes; - int ret; - if (channels<1 || channels > 2) - return 0; - ret = silk_Get_Decoder_Size( &silkDecSizeBytes ); - if(ret) - return 0; - silkDecSizeBytes = align(silkDecSizeBytes); - celtDecSizeBytes = celt_decoder_get_size(channels); - return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes; -} - -int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) -{ - void *silk_dec; - CELTDecoder *celt_dec; - int ret, silkDecSizeBytes; - - if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000) - || (channels!=1&&channels!=2)) - return OPUS_BAD_ARG; - - OPUS_CLEAR((char*)st, opus_decoder_get_size(channels)); - /* Initialize SILK encoder */ - ret = silk_Get_Decoder_Size(&silkDecSizeBytes); - if (ret) - return OPUS_INTERNAL_ERROR; - - silkDecSizeBytes = align(silkDecSizeBytes); - st->silk_dec_offset = align(sizeof(OpusDecoder)); - st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes; - silk_dec = (char*)st+st->silk_dec_offset; - celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); - st->stream_channels = st->channels = channels; - - st->Fs = Fs; - st->DecControl.API_sampleRate = st->Fs; - st->DecControl.nChannelsAPI = st->channels; - - /* Reset decoder */ - ret = silk_InitDecoder( silk_dec ); - if(ret)return OPUS_INTERNAL_ERROR; - - /* Initialize CELT decoder */ - ret = celt_decoder_init(celt_dec, Fs, channels); - if(ret!=OPUS_OK)return OPUS_INTERNAL_ERROR; - - celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0)); - - st->prev_mode = 0; - st->frame_size = Fs/400; - st->arch = opus_select_arch(); - return OPUS_OK; -} - -OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error) -{ - int ret; - OpusDecoder *st; - if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000) - || (channels!=1&&channels!=2)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels)); - if (st == NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_decoder_init(st, Fs, channels); - if (error) - *error = ret; - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - return st; -} - -static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2, - opus_val16 *out, int overlap, int channels, - const opus_val16 *window, opus_int32 Fs) -{ - int i, c; - int inc = 48000/Fs; - for (c=0;c<channels;c++) - { - for (i=0;i<overlap;i++) - { - opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]); - out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]), - Q15ONE-w, in1[i*channels+c]), 15); - } - } -} - -static int opus_packet_get_mode(const unsigned char *data) -{ - int mode; - if (data[0]&0x80) - { - mode = MODE_CELT_ONLY; - } else if ((data[0]&0x60) == 0x60) - { - mode = MODE_HYBRID; - } else { - mode = MODE_SILK_ONLY; - } - return mode; -} - -static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) -{ - void *silk_dec; - CELTDecoder *celt_dec; - int i, silk_ret=0, celt_ret=0; - ec_dec dec; - opus_int32 silk_frame_size; - int pcm_silk_size; - VARDECL(opus_int16, pcm_silk); - int pcm_transition_silk_size; - VARDECL(opus_val16, pcm_transition_silk); - int pcm_transition_celt_size; - VARDECL(opus_val16, pcm_transition_celt); - opus_val16 *pcm_transition=NULL; - int redundant_audio_size; - VARDECL(opus_val16, redundant_audio); - - int audiosize; - int mode; - int transition=0; - int start_band; - int redundancy=0; - int redundancy_bytes = 0; - int celt_to_silk=0; - int c; - int F2_5, F5, F10, F20; - const opus_val16 *window; - opus_uint32 redundant_rng = 0; - int celt_accum; - ALLOC_STACK; - - silk_dec = (char*)st+st->silk_dec_offset; - celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); - F20 = st->Fs/50; - F10 = F20>>1; - F5 = F10>>1; - F2_5 = F5>>1; - if (frame_size < F2_5) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - /* Limit frame_size to avoid excessive stack allocations. */ - frame_size = IMIN(frame_size, st->Fs/25*3); - /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */ - if (len<=1) - { - data = NULL; - /* In that case, don't conceal more than what the ToC says */ - frame_size = IMIN(frame_size, st->frame_size); - } - if (data != NULL) - { - audiosize = st->frame_size; - mode = st->mode; - ec_dec_init(&dec,(unsigned char*)data,len); - } else { - audiosize = frame_size; - mode = st->prev_mode; - - if (mode == 0) - { - /* If we haven't got any packet yet, all we can do is return zeros */ - for (i=0;i<audiosize*st->channels;i++) - pcm[i] = 0; - RESTORE_STACK; - return audiosize; - } - - /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT), - 10, or 20 (e.g. 12.5 or 30 ms). */ - if (audiosize > F20) - { - do { - int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0); - if (ret<0) - { - RESTORE_STACK; - return ret; - } - pcm += ret*st->channels; - audiosize -= ret; - } while (audiosize > 0); - RESTORE_STACK; - return frame_size; - } else if (audiosize < F20) - { - if (audiosize > F10) - audiosize = F10; - else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10) - audiosize = F5; - } - } - - /* In fixed-point, we can tell CELT to do the accumulation on top of the - SILK PCM buffer. This saves some stack space. */ -#ifdef FIXED_POINT - celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); -#else - celt_accum = 0; -#endif - - pcm_transition_silk_size = ALLOC_NONE; - pcm_transition_celt_size = ALLOC_NONE; - if (data!=NULL && st->prev_mode > 0 && ( - (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy) - || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ) - ) - { - transition = 1; - /* Decide where to allocate the stack memory for pcm_transition */ - if (mode == MODE_CELT_ONLY) - pcm_transition_celt_size = F5*st->channels; - else - pcm_transition_silk_size = F5*st->channels; - } - ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16); - if (transition && mode == MODE_CELT_ONLY) - { - pcm_transition = pcm_transition_celt; - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); - } - if (audiosize > frame_size) - { - /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/ - RESTORE_STACK; - return OPUS_BAD_ARG; - } else { - frame_size = audiosize; - } - - /* Don't allocate any memory when in CELT-only mode */ - pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; - ALLOC(pcm_silk, pcm_silk_size, opus_int16); - - /* SILK processing */ - if (mode != MODE_CELT_ONLY) - { - int lost_flag, decoded_samples; - opus_int16 *pcm_ptr; -#ifdef FIXED_POINT - if (celt_accum) - pcm_ptr = pcm; - else -#endif - pcm_ptr = pcm_silk; - - if (st->prev_mode==MODE_CELT_ONLY) - silk_InitDecoder( silk_dec ); - - /* The SILK PLC cannot produce frames of less than 10 ms */ - st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); - - if (data != NULL) - { - st->DecControl.nChannelsInternal = st->stream_channels; - if( mode == MODE_SILK_ONLY ) { - if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) { - st->DecControl.internalSampleRate = 8000; - } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) { - st->DecControl.internalSampleRate = 12000; - } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) { - st->DecControl.internalSampleRate = 16000; - } else { - st->DecControl.internalSampleRate = 16000; - silk_assert( 0 ); - } - } else { - /* Hybrid mode */ - st->DecControl.internalSampleRate = 16000; - } - } - - lost_flag = data == NULL ? 1 : 2 * decode_fec; - decoded_samples = 0; - do { - /* Call SILK decoder */ - int first_frame = decoded_samples == 0; - silk_ret = silk_Decode( silk_dec, &st->DecControl, - lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch ); - if( silk_ret ) { - if (lost_flag) { - /* PLC failure should not be fatal */ - silk_frame_size = frame_size; - for (i=0;i<frame_size*st->channels;i++) - pcm_ptr[i] = 0; - } else { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - pcm_ptr += silk_frame_size * st->channels; - decoded_samples += silk_frame_size; - } while( decoded_samples < frame_size ); - } - - start_band = 0; - if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL - && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len) - { - /* Check if we have a redundant 0-8 kHz band */ - if (mode == MODE_HYBRID) - redundancy = ec_dec_bit_logp(&dec, 12); - else - redundancy = 1; - if (redundancy) - { - celt_to_silk = ec_dec_bit_logp(&dec, 1); - /* redundancy_bytes will be at least two, in the non-hybrid - case due to the ec_tell() check above */ - redundancy_bytes = mode==MODE_HYBRID ? - (opus_int32)ec_dec_uint(&dec, 256)+2 : - len-((ec_tell(&dec)+7)>>3); - len -= redundancy_bytes; - /* This is a sanity check. It should never happen for a valid - packet, so the exact behaviour is not normative. */ - if (len*8 < ec_tell(&dec)) - { - len = 0; - redundancy_bytes = 0; - redundancy = 0; - } - /* Shrink decoder because of raw bits */ - dec.storage -= redundancy_bytes; - } - } - if (mode != MODE_CELT_ONLY) - start_band = 17; - - { - int endband=21; - - switch(st->bandwidth) - { - case OPUS_BANDWIDTH_NARROWBAND: - endband = 13; - break; - case OPUS_BANDWIDTH_MEDIUMBAND: - case OPUS_BANDWIDTH_WIDEBAND: - endband = 17; - break; - case OPUS_BANDWIDTH_SUPERWIDEBAND: - endband = 19; - break; - case OPUS_BANDWIDTH_FULLBAND: - endband = 21; - break; - } - celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband)); - celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels)); - } - - if (redundancy) - { - transition = 0; - pcm_transition_silk_size=ALLOC_NONE; - } - - ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); - - if (transition && mode != MODE_CELT_ONLY) - { - pcm_transition = pcm_transition_silk; - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); - } - - /* Only allocation memory for redundancy if/when needed */ - redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE; - ALLOC(redundant_audio, redundant_audio_size, opus_val16); - - /* 5 ms redundant frame for CELT->SILK*/ - if (redundancy && celt_to_silk) - { - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, - redundant_audio, F5, NULL, 0); - celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); - } - - /* MUST be after PLC */ - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band)); - - if (mode != MODE_SILK_ONLY) - { - int celt_frame_size = IMIN(F20, frame_size); - /* Make sure to discard any previous CELT state */ - if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy) - celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - /* Decode CELT */ - celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, - len, pcm, celt_frame_size, &dec, celt_accum); - } else { - unsigned char silence[2] = {0xFF, 0xFF}; - if (!celt_accum) - { - for (i=0;i<frame_size*st->channels;i++) - pcm[i] = 0; - } - /* For hybrid -> SILK transitions, we let the CELT MDCT - do a fade-out by decoding a silence frame */ - if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) - { - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); - } - } - - if (mode != MODE_CELT_ONLY && !celt_accum) - { -#ifdef FIXED_POINT - for (i=0;i<frame_size*st->channels;i++) - pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); -#else - for (i=0;i<frame_size*st->channels;i++) - pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); -#endif - } - - { - const CELTMode *celt_mode; - celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)); - window = celt_mode->window; - } - - /* 5 ms redundant frame for SILK->CELT */ - if (redundancy && !celt_to_silk) - { - celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); - celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); - smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, - pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); - } - if (redundancy && celt_to_silk) - { - for (c=0;c<st->channels;c++) - { - for (i=0;i<F2_5;i++) - pcm[st->channels*i+c] = redundant_audio[st->channels*i+c]; - } - smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5, - pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs); - } - if (transition) - { - if (audiosize >= F5) - { - for (i=0;i<st->channels*F2_5;i++) - pcm[i] = pcm_transition[i]; - smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5, - pcm+st->channels*F2_5, F2_5, - st->channels, window, st->Fs); - } else { - /* Not enough time to do a clean transition, but we do it anyway - This will not preserve amplitude perfectly and may introduce - a bit of temporal aliasing, but it shouldn't be too bad and - that's pretty much the best we can do. In any case, generating this - transition it pretty silly in the first place */ - smooth_fade(pcm_transition, pcm, - pcm, F2_5, - st->channels, window, st->Fs); - } - } - - if(st->decode_gain) - { - opus_val32 gain; - gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain)); - for (i=0;i<frame_size*st->channels;i++) - { - opus_val32 x; - x = MULT16_32_P16(pcm[i],gain); - pcm[i] = SATURATE(x, 32767); - } - } - - if (len <= 1) - st->rangeFinal = 0; - else - st->rangeFinal = dec.rng ^ redundant_rng; - - st->prev_mode = mode; - st->prev_redundancy = redundancy && !celt_to_silk; - - if (celt_ret>=0) - { - if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels)) - OPUS_PRINT_INT(audiosize); - } - - RESTORE_STACK; - return celt_ret < 0 ? celt_ret : audiosize; - -} - -int opus_decode_native(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, - int self_delimited, opus_int32 *packet_offset, int soft_clip) -{ - int i, nb_samples; - int count, offset; - unsigned char toc; - int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels; - /* 48 x 2.5 ms = 120 ms */ - opus_int16 size[48]; - if (decode_fec<0 || decode_fec>1) - return OPUS_BAD_ARG; - /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */ - if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0) - return OPUS_BAD_ARG; - if (len==0 || data==NULL) - { - int pcm_count=0; - do { - int ret; - ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0); - if (ret<0) - return ret; - pcm_count += ret; - } while (pcm_count < frame_size); - celt_assert(pcm_count == frame_size); - if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels)) - OPUS_PRINT_INT(pcm_count); - st->last_packet_duration = pcm_count; - return pcm_count; - } else if (len<0) - return OPUS_BAD_ARG; - - packet_mode = opus_packet_get_mode(data); - packet_bandwidth = opus_packet_get_bandwidth(data); - packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs); - packet_stream_channels = opus_packet_get_nb_channels(data); - - count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL, - size, &offset, packet_offset); - if (count<0) - return count; - - data += offset; - - if (decode_fec) - { - int duration_copy; - int ret; - /* If no FEC can be present, run the PLC (recursive call) */ - if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY) - return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip); - /* Otherwise, run the PLC on everything except the size for which we might have FEC */ - duration_copy = st->last_packet_duration; - if (frame_size-packet_frame_size!=0) - { - ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip); - if (ret<0) - { - st->last_packet_duration = duration_copy; - return ret; - } - celt_assert(ret==frame_size-packet_frame_size); - } - /* Complete with FEC */ - st->mode = packet_mode; - st->bandwidth = packet_bandwidth; - st->frame_size = packet_frame_size; - st->stream_channels = packet_stream_channels; - ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size), - packet_frame_size, 1); - if (ret<0) - return ret; - else { - if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels)) - OPUS_PRINT_INT(frame_size); - st->last_packet_duration = frame_size; - return frame_size; - } - } - - if (count*packet_frame_size > frame_size) - return OPUS_BUFFER_TOO_SMALL; - - /* Update the state as the last step to avoid updating it on an invalid packet */ - st->mode = packet_mode; - st->bandwidth = packet_bandwidth; - st->frame_size = packet_frame_size; - st->stream_channels = packet_stream_channels; - - nb_samples=0; - for (i=0;i<count;i++) - { - int ret; - ret = opus_decode_frame(st, data, size[i], pcm+nb_samples*st->channels, frame_size-nb_samples, 0); - if (ret<0) - return ret; - celt_assert(ret==packet_frame_size); - data += size[i]; - nb_samples += ret; - } - st->last_packet_duration = nb_samples; - if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels)) - OPUS_PRINT_INT(nb_samples); -#ifndef FIXED_POINT - if (soft_clip) - opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem); - else - st->softclip_mem[0]=st->softclip_mem[1]=0; -#endif - return nb_samples; -} - -#ifdef FIXED_POINT - -int opus_decode(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) -{ - if(frame_size<=0) - return OPUS_BAD_ARG; - return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); -} - -#ifndef DISABLE_FLOAT_API -int opus_decode_float(OpusDecoder *st, const unsigned char *data, - opus_int32 len, float *pcm, int frame_size, int decode_fec) -{ - VARDECL(opus_int16, out); - int ret, i; - int nb_samples; - ALLOC_STACK; - - if(frame_size<=0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - if (data != NULL && len > 0 && !decode_fec) - { - nb_samples = opus_decoder_get_nb_samples(st, data, len); - if (nb_samples>0) - frame_size = IMIN(frame_size, nb_samples); - else - return OPUS_INVALID_PACKET; - } - ALLOC(out, frame_size*st->channels, opus_int16); - - ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); - if (ret > 0) - { - for (i=0;i<ret*st->channels;i++) - pcm[i] = (1.f/32768.f)*(out[i]); - } - RESTORE_STACK; - return ret; -} -#endif - - -#else -int opus_decode(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec) -{ - VARDECL(float, out); - int ret, i; - int nb_samples; - ALLOC_STACK; - - if(frame_size<=0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - if (data != NULL && len > 0 && !decode_fec) - { - nb_samples = opus_decoder_get_nb_samples(st, data, len); - if (nb_samples>0) - frame_size = IMIN(frame_size, nb_samples); - else - return OPUS_INVALID_PACKET; - } - ALLOC(out, frame_size*st->channels, float); - - ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); - if (ret > 0) - { - for (i=0;i<ret*st->channels;i++) - pcm[i] = FLOAT2INT16(out[i]); - } - RESTORE_STACK; - return ret; -} - -int opus_decode_float(OpusDecoder *st, const unsigned char *data, - opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) -{ - if(frame_size<=0) - return OPUS_BAD_ARG; - return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); -} - -#endif - -int opus_decoder_ctl(OpusDecoder *st, int request, ...) -{ - int ret = OPUS_OK; - va_list ap; - void *silk_dec; - CELTDecoder *celt_dec; - - silk_dec = (char*)st+st->silk_dec_offset; - celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); - - - va_start(ap, request); - - switch (request) - { - case OPUS_GET_BANDWIDTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->bandwidth; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 *value = va_arg(ap, opus_uint32*); - if (!value) - { - goto bad_arg; - } - *value = st->rangeFinal; - } - break; - case OPUS_RESET_STATE: - { - OPUS_CLEAR((char*)&st->OPUS_DECODER_RESET_START, - sizeof(OpusDecoder)- - ((char*)&st->OPUS_DECODER_RESET_START - (char*)st)); - - celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); - silk_InitDecoder( silk_dec ); - st->stream_channels = st->channels; - st->frame_size = st->Fs/400; - } - break; - case OPUS_GET_SAMPLE_RATE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->Fs; - } - break; - case OPUS_GET_PITCH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - if (st->prev_mode == MODE_CELT_ONLY) - celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); - else - *value = st->DecControl.prevPitchLag; - } - break; - case OPUS_GET_GAIN_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->decode_gain; - } - break; - case OPUS_SET_GAIN_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<-32768 || value>32767) - { - goto bad_arg; - } - st->decode_gain = value; - } - break; - case OPUS_GET_LAST_PACKET_DURATION_REQUEST: - { - opus_uint32 *value = va_arg(ap, opus_uint32*); - if (!value) - { - goto bad_arg; - } - *value = st->last_packet_duration; - } - break; - default: - /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/ - ret = OPUS_UNIMPLEMENTED; - break; - } - - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - -void opus_decoder_destroy(OpusDecoder *st) -{ - opus_free(st); -} - - -int opus_packet_get_bandwidth(const unsigned char *data) -{ - int bandwidth; - if (data[0]&0x80) - { - bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3); - if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - bandwidth = OPUS_BANDWIDTH_NARROWBAND; - } else if ((data[0]&0x60) == 0x60) - { - bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND : - OPUS_BANDWIDTH_SUPERWIDEBAND; - } else { - bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3); - } - return bandwidth; -} - -int opus_packet_get_nb_channels(const unsigned char *data) -{ - return (data[0]&0x4) ? 2 : 1; -} - -int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) -{ - int count; - if (len<1) - return OPUS_BAD_ARG; - count = packet[0]&0x3; - if (count==0) - return 1; - else if (count!=3) - return 2; - else if (len<2) - return OPUS_INVALID_PACKET; - else - return packet[1]&0x3F; -} - -int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, - opus_int32 Fs) -{ - int samples; - int count = opus_packet_get_nb_frames(packet, len); - - if (count<0) - return count; - - samples = count*opus_packet_get_samples_per_frame(packet, Fs); - /* Can't have more than 120 ms */ - if (samples*25 > Fs*3) - return OPUS_INVALID_PACKET; - else - return samples; -} - -int opus_decoder_get_nb_samples(const OpusDecoder *dec, - const unsigned char packet[], opus_int32 len) -{ - return opus_packet_get_nb_samples(packet, len, dec->Fs); -} diff --git a/thirdparty/opus/opus_encoder.c b/thirdparty/opus/opus_encoder.c deleted file mode 100644 index 9a516a884a..0000000000 --- a/thirdparty/opus/opus_encoder.c +++ /dev/null @@ -1,2536 +0,0 @@ -/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited - Written by Jean-Marc Valin and Koen Vos */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdarg.h> -#include "celt.h" -#include "entenc.h" -#include "modes.h" -#include "API.h" -#include "stack_alloc.h" -#include "float_cast.h" -#include "opus.h" -#include "arch.h" -#include "pitch.h" -#include "opus_private.h" -#include "os_support.h" -#include "cpu_support.h" -#include "analysis.h" -#include "mathops.h" -#include "tuning_parameters.h" -#ifdef FIXED_POINT -#include "fixed/structs_FIX.h" -#else -#include "float/structs_FLP.h" -#endif - -#define MAX_ENCODER_BUFFER 480 - -typedef struct { - opus_val32 XX, XY, YY; - opus_val16 smoothed_width; - opus_val16 max_follower; -} StereoWidthState; - -struct OpusEncoder { - int celt_enc_offset; - int silk_enc_offset; - silk_EncControlStruct silk_mode; - int application; - int channels; - int delay_compensation; - int force_channels; - int signal_type; - int user_bandwidth; - int max_bandwidth; - int user_forced_mode; - int voice_ratio; - opus_int32 Fs; - int use_vbr; - int vbr_constraint; - int variable_duration; - opus_int32 bitrate_bps; - opus_int32 user_bitrate_bps; - int lsb_depth; - int encoder_buffer; - int lfe; - int arch; -#ifndef DISABLE_FLOAT_API - TonalityAnalysisState analysis; -#endif - -#define OPUS_ENCODER_RESET_START stream_channels - int stream_channels; - opus_int16 hybrid_stereo_width_Q14; - opus_int32 variable_HP_smth2_Q15; - opus_val16 prev_HB_gain; - opus_val32 hp_mem[4]; - int mode; - int prev_mode; - int prev_channels; - int prev_framesize; - int bandwidth; - int silk_bw_switch; - /* Sampling rate (at the API level) */ - int first; - opus_val16 * energy_masking; - StereoWidthState width_mem; - opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; -#ifndef DISABLE_FLOAT_API - int detected_bandwidth; -#endif - opus_uint32 rangeFinal; -}; - -/* Transition tables for the voice and music. First column is the - middle (memoriless) threshold. The second column is the hysteresis - (difference with the middle) */ -static const opus_int32 mono_voice_bandwidth_thresholds[8] = { - 11000, 1000, /* NB<->MB */ - 14000, 1000, /* MB<->WB */ - 17000, 1000, /* WB<->SWB */ - 21000, 2000, /* SWB<->FB */ -}; -static const opus_int32 mono_music_bandwidth_thresholds[8] = { - 12000, 1000, /* NB<->MB */ - 15000, 1000, /* MB<->WB */ - 18000, 2000, /* WB<->SWB */ - 22000, 2000, /* SWB<->FB */ -}; -static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { - 11000, 1000, /* NB<->MB */ - 14000, 1000, /* MB<->WB */ - 21000, 2000, /* WB<->SWB */ - 28000, 2000, /* SWB<->FB */ -}; -static const opus_int32 stereo_music_bandwidth_thresholds[8] = { - 12000, 1000, /* NB<->MB */ - 18000, 2000, /* MB<->WB */ - 21000, 2000, /* WB<->SWB */ - 30000, 2000, /* SWB<->FB */ -}; -/* Threshold bit-rates for switching between mono and stereo */ -static const opus_int32 stereo_voice_threshold = 30000; -static const opus_int32 stereo_music_threshold = 30000; - -/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ -static const opus_int32 mode_thresholds[2][2] = { - /* voice */ /* music */ - { 64000, 16000}, /* mono */ - { 36000, 16000}, /* stereo */ -}; - -int opus_encoder_get_size(int channels) -{ - int silkEncSizeBytes, celtEncSizeBytes; - int ret; - if (channels<1 || channels > 2) - return 0; - ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); - if (ret) - return 0; - silkEncSizeBytes = align(silkEncSizeBytes); - celtEncSizeBytes = celt_encoder_get_size(channels); - return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes; -} - -int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application) -{ - void *silk_enc; - CELTEncoder *celt_enc; - int err; - int ret, silkEncSizeBytes; - - if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| - (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO - && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) - return OPUS_BAD_ARG; - - OPUS_CLEAR((char*)st, opus_encoder_get_size(channels)); - /* Create SILK encoder */ - ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); - if (ret) - return OPUS_BAD_ARG; - silkEncSizeBytes = align(silkEncSizeBytes); - st->silk_enc_offset = align(sizeof(OpusEncoder)); - st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes; - silk_enc = (char*)st+st->silk_enc_offset; - celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); - - st->stream_channels = st->channels = channels; - - st->Fs = Fs; - - st->arch = opus_select_arch(); - - ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode ); - if(ret)return OPUS_INTERNAL_ERROR; - - /* default SILK parameters */ - st->silk_mode.nChannelsAPI = channels; - st->silk_mode.nChannelsInternal = channels; - st->silk_mode.API_sampleRate = st->Fs; - st->silk_mode.maxInternalSampleRate = 16000; - st->silk_mode.minInternalSampleRate = 8000; - st->silk_mode.desiredInternalSampleRate = 16000; - st->silk_mode.payloadSize_ms = 20; - st->silk_mode.bitRate = 25000; - st->silk_mode.packetLossPercentage = 0; - st->silk_mode.complexity = 9; - st->silk_mode.useInBandFEC = 0; - st->silk_mode.useDTX = 0; - st->silk_mode.useCBR = 0; - st->silk_mode.reducedDependency = 0; - - /* Create CELT encoder */ - /* Initialize CELT encoder */ - err = celt_encoder_init(celt_enc, Fs, channels, st->arch); - if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; - - celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); - - st->use_vbr = 1; - /* Makes constrained VBR the default (safer for real-time use) */ - st->vbr_constraint = 1; - st->user_bitrate_bps = OPUS_AUTO; - st->bitrate_bps = 3000+Fs*channels; - st->application = application; - st->signal_type = OPUS_AUTO; - st->user_bandwidth = OPUS_AUTO; - st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND; - st->force_channels = OPUS_AUTO; - st->user_forced_mode = OPUS_AUTO; - st->voice_ratio = -1; - st->encoder_buffer = st->Fs/100; - st->lsb_depth = 24; - st->variable_duration = OPUS_FRAMESIZE_ARG; - - /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead - + 1.5 ms for SILK resamplers and stereo prediction) */ - st->delay_compensation = st->Fs/250; - - st->hybrid_stereo_width_Q14 = 1 << 14; - st->prev_HB_gain = Q15ONE; - st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); - st->first = 1; - st->mode = MODE_HYBRID; - st->bandwidth = OPUS_BANDWIDTH_FULLBAND; - -#ifndef DISABLE_FLOAT_API - tonality_analysis_init(&st->analysis); -#endif - - return OPUS_OK; -} - -static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels) -{ - int period; - unsigned char toc; - period = 0; - while (framerate < 400) - { - framerate <<= 1; - period++; - } - if (mode == MODE_SILK_ONLY) - { - toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5; - toc |= (period-2)<<3; - } else if (mode == MODE_CELT_ONLY) - { - int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND; - if (tmp < 0) - tmp = 0; - toc = 0x80; - toc |= tmp << 5; - toc |= period<<3; - } else /* Hybrid */ - { - toc = 0x60; - toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4; - toc |= (period-2)<<3; - } - toc |= (channels==2)<<2; - return toc; -} - -#ifndef FIXED_POINT -static void silk_biquad_float( - const opus_val16 *in, /* I: Input signal */ - const opus_int32 *B_Q28, /* I: MA coefficients [3] */ - const opus_int32 *A_Q28, /* I: AR coefficients [2] */ - opus_val32 *S, /* I/O: State vector [2] */ - opus_val16 *out, /* O: Output signal */ - const opus_int32 len, /* I: Signal length (must be even) */ - int stride -) -{ - /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ - opus_int k; - opus_val32 vout; - opus_val32 inval; - opus_val32 A[2], B[3]; - - A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28))); - A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28))); - B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28))); - B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28))); - B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28))); - - /* Negate A_Q28 values and split in two parts */ - - for( k = 0; k < len; k++ ) { - /* S[ 0 ], S[ 1 ]: Q12 */ - inval = in[ k*stride ]; - vout = S[ 0 ] + B[0]*inval; - - S[ 0 ] = S[1] - vout*A[0] + B[1]*inval; - - S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL; - - /* Scale back to Q0 and saturate */ - out[ k*stride ] = vout; - } -} -#endif - -static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) -{ - opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; - opus_int32 Fc_Q19, r_Q28, r_Q22; - - silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); - Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); - silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 ); - - r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 ); - - /* b = r * [ 1; -2; 1 ]; */ - /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ - B_Q28[ 0 ] = r_Q28; - B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 ); - B_Q28[ 2 ] = r_Q28; - - /* -r * ( 2 - Fc * Fc ); */ - r_Q22 = silk_RSHIFT( r_Q28, 6 ); - A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) ); - A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); - -#ifdef FIXED_POINT - silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels ); - if( channels == 2 ) { - silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); - } -#else - silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); - if( channels == 2 ) { - silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); - } -#endif -} - -#ifdef FIXED_POINT -static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) -{ - int c, i; - int shift; - - /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ - shift=celt_ilog2(Fs/(cutoff_Hz*3)); - for (c=0;c<channels;c++) - { - for (i=0;i<len;i++) - { - opus_val32 x, tmp, y; - x = SHL32(EXTEND32(in[channels*i+c]), 15); - /* First stage */ - tmp = x-hp_mem[2*c]; - hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); - /* Second stage */ - y = tmp - hp_mem[2*c+1]; - hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); - out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767)); - } - } -} - -#else -static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) -{ - int c, i; - float coef; - - coef = 4.0f*cutoff_Hz/Fs; - for (c=0;c<channels;c++) - { - for (i=0;i<len;i++) - { - opus_val32 x, tmp, y; - x = in[channels*i+c]; - /* First stage */ - tmp = x-hp_mem[2*c]; - hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL; - /* Second stage */ - y = tmp - hp_mem[2*c+1]; - hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL; - out[channels*i+c] = y; - } - } -} -#endif - -static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, - int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) -{ - int i; - int overlap; - int inc; - inc = 48000/Fs; - overlap=overlap48/inc; - g1 = Q15ONE-g1; - g2 = Q15ONE-g2; - for (i=0;i<overlap;i++) - { - opus_val32 diff; - opus_val16 g, w; - w = MULT16_16_Q15(window[i*inc], window[i*inc]); - g = SHR32(MAC16_16(MULT16_16(w,g2), - Q15ONE-w, g1), 15); - diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); - diff = MULT16_16_Q15(g, diff); - out[i*channels] = out[i*channels] - diff; - out[i*channels+1] = out[i*channels+1] + diff; - } - for (;i<frame_size;i++) - { - opus_val32 diff; - diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); - diff = MULT16_16_Q15(g2, diff); - out[i*channels] = out[i*channels] - diff; - out[i*channels+1] = out[i*channels+1] + diff; - } -} - -static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, - int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) -{ - int i; - int inc; - int overlap; - int c; - inc = 48000/Fs; - overlap=overlap48/inc; - if (channels==1) - { - for (i=0;i<overlap;i++) - { - opus_val16 g, w; - w = MULT16_16_Q15(window[i*inc], window[i*inc]); - g = SHR32(MAC16_16(MULT16_16(w,g2), - Q15ONE-w, g1), 15); - out[i] = MULT16_16_Q15(g, in[i]); - } - } else { - for (i=0;i<overlap;i++) - { - opus_val16 g, w; - w = MULT16_16_Q15(window[i*inc], window[i*inc]); - g = SHR32(MAC16_16(MULT16_16(w,g2), - Q15ONE-w, g1), 15); - out[i*2] = MULT16_16_Q15(g, in[i*2]); - out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]); - } - } - c=0;do { - for (i=overlap;i<frame_size;i++) - { - out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]); - } - } - while (++c<channels); -} - -OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error) -{ - int ret; - OpusEncoder *st; - if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| - (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO - && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels)); - if (st == NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_encoder_init(st, Fs, channels, application); - if (error) - *error = ret; - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - return st; -} - -static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes) -{ - if(!frame_size)frame_size=st->Fs/400; - if (st->user_bitrate_bps==OPUS_AUTO) - return 60*st->Fs/frame_size + st->Fs*st->channels; - else if (st->user_bitrate_bps==OPUS_BITRATE_MAX) - return max_data_bytes*8*st->Fs/frame_size; - else - return st->user_bitrate_bps; -} - -#ifndef DISABLE_FLOAT_API -/* Don't use more than 60 ms for the frame size analysis */ -#define MAX_DYNAMIC_FRAMESIZE 24 -/* Estimates how much the bitrate will be boosted based on the sub-frame energy */ -static float transient_boost(const float *E, const float *E_1, int LM, int maxM) -{ - int i; - int M; - float sumE=0, sumE_1=0; - float metric; - - M = IMIN(maxM, (1<<LM)+1); - for (i=0;i<M;i++) - { - sumE += E[i]; - sumE_1 += E_1[i]; - } - metric = sumE*sumE_1/(M*M); - /*if (LM==3) - printf("%f\n", metric);*/ - /*return metric>10 ? 1 : 0;*/ - /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ - return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); -} - -/* Viterbi decoding trying to find the best frame size combination using look-ahead - - State numbering: - 0: unused - 1: 2.5 ms - 2: 5 ms (#1) - 3: 5 ms (#2) - 4: 10 ms (#1) - 5: 10 ms (#2) - 6: 10 ms (#3) - 7: 10 ms (#4) - 8: 20 ms (#1) - 9: 20 ms (#2) - 10: 20 ms (#3) - 11: 20 ms (#4) - 12: 20 ms (#5) - 13: 20 ms (#6) - 14: 20 ms (#7) - 15: 20 ms (#8) -*/ -static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) -{ - int i; - float cost[MAX_DYNAMIC_FRAMESIZE][16]; - int states[MAX_DYNAMIC_FRAMESIZE][16]; - float best_cost; - int best_state; - float factor; - /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ - if (rate<80) - factor=0; - else if (rate>160) - factor=1; - else - factor = (rate-80.f)/80.f; - /* Makes variable framesize less aggressive at lower bitrates, but I can't - find any valid theoretical justification for this (other than it seems - to help) */ - for (i=0;i<16;i++) - { - /* Impossible state */ - states[0][i] = -1; - cost[0][i] = 1e10; - } - for (i=0;i<4;i++) - { - cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1)); - states[0][1<<i] = i; - } - for (i=1;i<N;i++) - { - int j; - - /* Follow continuations */ - for (j=2;j<16;j++) - { - cost[i][j] = cost[i-1][j-1]; - states[i][j] = j-1; - } - - /* New frames */ - for(j=0;j<4;j++) - { - int k; - float min_cost; - float curr_cost; - states[i][1<<j] = 1; - min_cost = cost[i-1][1]; - for(k=1;k<4;k++) - { - float tmp = cost[i-1][(1<<(k+1))-1]; - if (tmp < min_cost) - { - states[i][1<<j] = (1<<(k+1))-1; - min_cost = tmp; - } - } - curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1)); - cost[i][1<<j] = min_cost; - /* If part of the frame is outside the analysis window, only count part of the cost */ - if (N-i < (1<<j)) - cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j); - else - cost[i][1<<j] += curr_cost; - } - } - - best_state=1; - best_cost = cost[N-1][1]; - /* Find best end state (doesn't force a frame to end at N-1) */ - for (i=2;i<16;i++) - { - if (cost[N-1][i]<best_cost) - { - best_cost = cost[N-1][i]; - best_state = i; - } - } - - /* Follow transitions back */ - for (i=N-1;i>=0;i--) - { - /*printf("%d ", best_state);*/ - best_state = states[i][best_state]; - } - /*printf("%d\n", best_state);*/ - return best_state; -} - -static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs, - int bitrate, opus_val16 tonality, float *mem, int buffering, - downmix_func downmix) -{ - int N; - int i; - float e[MAX_DYNAMIC_FRAMESIZE+4]; - float e_1[MAX_DYNAMIC_FRAMESIZE+3]; - opus_val32 memx; - int bestLM=0; - int subframe; - int pos; - int offset; - VARDECL(opus_val32, sub); - - subframe = Fs/400; - ALLOC(sub, subframe, opus_val32); - e[0]=mem[0]; - e_1[0]=1.f/(EPSILON+mem[0]); - if (buffering) - { - /* Consider the CELT delay when not in restricted-lowdelay */ - /* We assume the buffering is between 2.5 and 5 ms */ - offset = 2*subframe - buffering; - celt_assert(offset>=0 && offset <= subframe); - len -= offset; - e[1]=mem[1]; - e_1[1]=1.f/(EPSILON+mem[1]); - e[2]=mem[2]; - e_1[2]=1.f/(EPSILON+mem[2]); - pos = 3; - } else { - pos=1; - offset=0; - } - N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); - /* Just silencing a warning, it's really initialized later */ - memx = 0; - for (i=0;i<N;i++) - { - float tmp; - opus_val32 tmpx; - int j; - tmp=EPSILON; - - downmix(x, sub, subframe, i*subframe+offset, 0, -2, C); - if (i==0) - memx = sub[0]; - for (j=0;j<subframe;j++) - { - tmpx = sub[j]; - tmp += (tmpx-memx)*(float)(tmpx-memx); - memx = tmpx; - } - e[i+pos] = tmp; - e_1[i+pos] = 1.f/tmp; - } - /* Hack to get 20 ms working with APPLICATION_AUDIO - The real problem is that the corresponding memory needs to use 1.5 ms - from this frame and 1 ms from the next frame */ - e[i+pos] = e[i+pos-1]; - if (buffering) - N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2); - bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400); - mem[0] = e[1<<bestLM]; - if (buffering) - { - mem[1] = e[(1<<bestLM)+1]; - mem[2] = e[(1<<bestLM)+2]; - } - return bestLM; -} - -#endif - -#ifndef DISABLE_FLOAT_API -#ifdef FIXED_POINT -#define PCM2VAL(x) FLOAT2INT16(x) -#else -#define PCM2VAL(x) SCALEIN(x) -#endif -void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) -{ - const float *x; - opus_val32 scale; - int j; - x = (const float *)_x; - for (j=0;j<subframe;j++) - sub[j] = PCM2VAL(x[(j+offset)*C+c1]); - if (c2>-1) - { - for (j=0;j<subframe;j++) - sub[j] += PCM2VAL(x[(j+offset)*C+c2]); - } else if (c2==-2) - { - int c; - for (c=1;c<C;c++) - { - for (j=0;j<subframe;j++) - sub[j] += PCM2VAL(x[(j+offset)*C+c]); - } - } -#ifdef FIXED_POINT - scale = (1<<SIG_SHIFT); -#else - scale = 1.f; -#endif - if (C==-2) - scale /= C; - else - scale /= 2; - for (j=0;j<subframe;j++) - sub[j] *= scale; -} -#endif - -void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) -{ - const opus_int16 *x; - opus_val32 scale; - int j; - x = (const opus_int16 *)_x; - for (j=0;j<subframe;j++) - sub[j] = x[(j+offset)*C+c1]; - if (c2>-1) - { - for (j=0;j<subframe;j++) - sub[j] += x[(j+offset)*C+c2]; - } else if (c2==-2) - { - int c; - for (c=1;c<C;c++) - { - for (j=0;j<subframe;j++) - sub[j] += x[(j+offset)*C+c]; - } - } -#ifdef FIXED_POINT - scale = (1<<SIG_SHIFT); -#else - scale = 1.f/32768; -#endif - if (C==-2) - scale /= C; - else - scale /= 2; - for (j=0;j<subframe;j++) - sub[j] *= scale; -} - -opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) -{ - int new_size; - if (frame_size<Fs/400) - return -1; - if (variable_duration == OPUS_FRAMESIZE_ARG) - new_size = frame_size; - else if (variable_duration == OPUS_FRAMESIZE_VARIABLE) - new_size = Fs/50; - else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) - new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); - else - return -1; - if (new_size>frame_size) - return -1; - if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && - 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) - return -1; - return new_size; -} - -opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, - int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, downmix_func downmix -#ifndef DISABLE_FLOAT_API - , float *subframe_mem -#endif - ) -{ -#ifndef DISABLE_FLOAT_API - if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) - { - int LM = 3; - LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, - 0, subframe_mem, delay_compensation, downmix); - while ((Fs/400<<LM)>frame_size) - LM--; - frame_size = (Fs/400<<LM); - } else -#else - (void)analysis_pcm; - (void)C; - (void)bitrate_bps; - (void)delay_compensation; - (void)downmix; -#endif - { - frame_size = frame_size_select(frame_size, variable_duration, Fs); - } - if (frame_size<0) - return -1; - return frame_size; -} - -opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) -{ - opus_val32 xx, xy, yy; - opus_val16 sqrt_xx, sqrt_yy; - opus_val16 qrrt_xx, qrrt_yy; - int frame_rate; - int i; - opus_val16 short_alpha; - - frame_rate = Fs/frame_size; - short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate); - xx=xy=yy=0; - /* Unroll by 4. The frame size is always a multiple of 4 *except* for - 2.5 ms frames at 12 kHz. Since this setting is very rare (and very - stupid), we just discard the last two samples. */ - for (i=0;i<frame_size-3;i+=4) - { - opus_val32 pxx=0; - opus_val32 pxy=0; - opus_val32 pyy=0; - opus_val16 x, y; - x = pcm[2*i]; - y = pcm[2*i+1]; - pxx = SHR32(MULT16_16(x,x),2); - pxy = SHR32(MULT16_16(x,y),2); - pyy = SHR32(MULT16_16(y,y),2); - x = pcm[2*i+2]; - y = pcm[2*i+3]; - pxx += SHR32(MULT16_16(x,x),2); - pxy += SHR32(MULT16_16(x,y),2); - pyy += SHR32(MULT16_16(y,y),2); - x = pcm[2*i+4]; - y = pcm[2*i+5]; - pxx += SHR32(MULT16_16(x,x),2); - pxy += SHR32(MULT16_16(x,y),2); - pyy += SHR32(MULT16_16(y,y),2); - x = pcm[2*i+6]; - y = pcm[2*i+7]; - pxx += SHR32(MULT16_16(x,x),2); - pxy += SHR32(MULT16_16(x,y),2); - pyy += SHR32(MULT16_16(y,y),2); - - xx += SHR32(pxx, 10); - xy += SHR32(pxy, 10); - yy += SHR32(pyy, 10); - } - mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX); - mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); - mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); - mem->XX = MAX32(0, mem->XX); - mem->XY = MAX32(0, mem->XY); - mem->YY = MAX32(0, mem->YY); - if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18)) - { - opus_val16 corr; - opus_val16 ldiff; - opus_val16 width; - sqrt_xx = celt_sqrt(mem->XX); - sqrt_yy = celt_sqrt(mem->YY); - qrrt_xx = celt_sqrt(sqrt_xx); - qrrt_yy = celt_sqrt(sqrt_yy); - /* Inter-channel correlation */ - mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy); - corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16); - /* Approximate loudness difference */ - ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy); - width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff); - /* Smoothing over one second */ - mem->smoothed_width += (width-mem->smoothed_width)/frame_rate; - /* Peak follower */ - mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width); - } - /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/ - return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower))); -} - -opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, - int analysis_channels, downmix_func downmix, int float_api) -{ - void *silk_enc; - CELTEncoder *celt_enc; - int i; - int ret=0; - opus_int32 nBytes; - ec_enc enc; - int bytes_target; - int prefill=0; - int start_band = 0; - int redundancy = 0; - int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */ - int celt_to_silk = 0; - VARDECL(opus_val16, pcm_buf); - int nb_compr_bytes; - int to_celt = 0; - opus_uint32 redundant_rng = 0; - int cutoff_Hz, hp_freq_smth1; - int voice_est; /* Probability of voice in Q7 */ - opus_int32 equiv_rate; - int delay_compensation; - int frame_rate; - opus_int32 max_rate; /* Max bitrate we're allowed to use */ - int curr_bandwidth; - opus_val16 HB_gain; - opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ - int total_buffer; - opus_val16 stereo_width; - const CELTMode *celt_mode; -#ifndef DISABLE_FLOAT_API - AnalysisInfo analysis_info; - int analysis_read_pos_bak=-1; - int analysis_read_subframe_bak=-1; -#endif - VARDECL(opus_val16, tmp_prefill); - - ALLOC_STACK; - - max_data_bytes = IMIN(1276, out_data_bytes); - - st->rangeFinal = 0; - if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && - 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) - || (400*frame_size < st->Fs) - || max_data_bytes<=0 - ) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - silk_enc = (char*)st+st->silk_enc_offset; - celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(lsb_depth, st->lsb_depth); - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); -#ifndef DISABLE_FLOAT_API - analysis_info.valid = 0; -#ifdef FIXED_POINT - if (st->silk_mode.complexity >= 10 && st->Fs==48000) -#else - if (st->silk_mode.complexity >= 7 && st->Fs==48000) -#endif - { - analysis_read_pos_bak = st->analysis.read_pos; - analysis_read_subframe_bak = st->analysis.read_subframe; - run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, - c1, c2, analysis_channels, st->Fs, - lsb_depth, downmix, &analysis_info); - } -#else - (void)analysis_pcm; - (void)analysis_size; -#endif - - st->voice_ratio = -1; - -#ifndef DISABLE_FLOAT_API - st->detected_bandwidth = 0; - if (analysis_info.valid) - { - int analysis_bandwidth; - if (st->signal_type == OPUS_AUTO) - st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); - - analysis_bandwidth = analysis_info.bandwidth; - if (analysis_bandwidth<=12) - st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; - else if (analysis_bandwidth<=14) - st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - else if (analysis_bandwidth<=16) - st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; - else if (analysis_bandwidth<=18) - st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; - else - st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; - } -#endif - - if (st->channels==2 && st->force_channels!=1) - stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem); - else - stereo_width = 0; - total_buffer = delay_compensation; - st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); - - frame_rate = st->Fs/frame_size; - if (!st->use_vbr) - { - int cbrBytes; - /* Multiply by 3 to make sure the division is exact. */ - int frame_rate3 = 3*st->Fs/frame_size; - /* We need to make sure that "int" values always fit in 16 bits. */ - cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes); - st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3; - max_data_bytes = cbrBytes; - } - if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 - || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) - { - /*If the space is too low to do something useful, emit 'PLC' frames.*/ - int tocmode = st->mode; - int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; - if (tocmode==0) - tocmode = MODE_SILK_ONLY; - if (frame_rate>100) - tocmode = MODE_CELT_ONLY; - if (frame_rate < 50) - tocmode = MODE_SILK_ONLY; - if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) - bw=OPUS_BANDWIDTH_WIDEBAND; - else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) - bw=OPUS_BANDWIDTH_NARROWBAND; - else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) - bw=OPUS_BANDWIDTH_SUPERWIDEBAND; - data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); - ret = 1; - if (!st->use_vbr) - { - ret = opus_packet_pad(data, ret, max_data_bytes); - if (ret == OPUS_OK) - ret = max_data_bytes; - } - RESTORE_STACK; - return ret; - } - max_rate = frame_rate*max_data_bytes*8; - - /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ - equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50); - - if (st->signal_type == OPUS_SIGNAL_VOICE) - voice_est = 127; - else if (st->signal_type == OPUS_SIGNAL_MUSIC) - voice_est = 0; - else if (st->voice_ratio >= 0) - { - voice_est = st->voice_ratio*327>>8; - /* For AUDIO, never be more than 90% confident of having speech */ - if (st->application == OPUS_APPLICATION_AUDIO) - voice_est = IMIN(voice_est, 115); - } else if (st->application == OPUS_APPLICATION_VOIP) - voice_est = 115; - else - voice_est = 48; - - if (st->force_channels!=OPUS_AUTO && st->channels == 2) - { - st->stream_channels = st->force_channels; - } else { -#ifdef FUZZING - /* Random mono/stereo decision */ - if (st->channels == 2 && (rand()&0x1F)==0) - st->stream_channels = 3-st->stream_channels; -#else - /* Rate-dependent mono-stereo decision */ - if (st->channels == 2) - { - opus_int32 stereo_threshold; - stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14); - if (st->stream_channels == 2) - stereo_threshold -= 1000; - else - stereo_threshold += 1000; - st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1; - } else { - st->stream_channels = st->channels; - } -#endif - } - equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50); - - /* Mode selection depending on application and signal type */ - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - { - st->mode = MODE_CELT_ONLY; - } else if (st->user_forced_mode == OPUS_AUTO) - { -#ifdef FUZZING - /* Random mode switching */ - if ((rand()&0xF)==0) - { - if ((rand()&0x1)==0) - st->mode = MODE_CELT_ONLY; - else - st->mode = MODE_SILK_ONLY; - } else { - if (st->prev_mode==MODE_CELT_ONLY) - st->mode = MODE_CELT_ONLY; - else - st->mode = MODE_SILK_ONLY; - } -#else - opus_int32 mode_voice, mode_music; - opus_int32 threshold; - - /* Interpolate based on stereo width */ - mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0]) - + MULT16_32_Q15(stereo_width,mode_thresholds[1][0])); - mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) - + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); - /* Interpolate based on speech/music probability */ - threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); - /* Bias towards SILK for VoIP because of some useful features */ - if (st->application == OPUS_APPLICATION_VOIP) - threshold += 8000; - - /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/ - /* Hysteresis */ - if (st->prev_mode == MODE_CELT_ONLY) - threshold -= 4000; - else if (st->prev_mode>0) - threshold += 4000; - - st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; - - /* When FEC is enabled and there's enough packet loss, use SILK */ - if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) - st->mode = MODE_SILK_ONLY; - /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */ - if (st->silk_mode.useDTX && voice_est > 100) - st->mode = MODE_SILK_ONLY; -#endif - } else { - st->mode = st->user_forced_mode; - } - - /* Override the chosen mode to make sure we meet the requested frame size */ - if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100) - st->mode = MODE_CELT_ONLY; - if (st->lfe) - st->mode = MODE_CELT_ONLY; - /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */ - if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8)) - st->mode = MODE_CELT_ONLY; - - if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 - && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) - { - /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ - st->silk_mode.toMono = 1; - st->stream_channels = 2; - } else { - st->silk_mode.toMono = 0; - } - - if (st->prev_mode > 0 && - ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || - (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) - { - redundancy = 1; - celt_to_silk = (st->mode != MODE_CELT_ONLY); - if (!celt_to_silk) - { - /* Switch to SILK/hybrid if frame size is 10 ms or more*/ - if (frame_size >= st->Fs/100) - { - st->mode = st->prev_mode; - to_celt = 1; - } else { - redundancy=0; - } - } - } - /* For the first frame at a new SILK bandwidth */ - if (st->silk_bw_switch) - { - redundancy = 1; - celt_to_silk = 1; - st->silk_bw_switch = 0; - prefill=1; - } - - if (redundancy) - { - /* Fair share of the max size allowed */ - redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200)); - /* For VBR, target the actual bitrate (subject to the limit above) */ - if (st->use_vbr) - redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600); - } - - if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) - { - silk_EncControlStruct dummy; - silk_InitEncoder( silk_enc, st->arch, &dummy); - prefill=1; - } - - /* Automatic (rate-dependent) bandwidth selection */ - if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) - { - const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; - opus_int32 bandwidth_thresholds[8]; - int bandwidth = OPUS_BANDWIDTH_FULLBAND; - opus_int32 equiv_rate2; - - equiv_rate2 = equiv_rate; - if (st->mode != MODE_CELT_ONLY) - { - /* Adjust the threshold +/- 10% depending on complexity */ - equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50; - /* CBR is less efficient by ~1 kb/s */ - if (!st->use_vbr) - equiv_rate2 -= 1000; - } - if (st->channels==2 && st->force_channels!=1) - { - voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; - music_bandwidth_thresholds = stereo_music_bandwidth_thresholds; - } else { - voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds; - music_bandwidth_thresholds = mono_music_bandwidth_thresholds; - } - /* Interpolate bandwidth thresholds depending on voice estimation */ - for (i=0;i<8;i++) - { - bandwidth_thresholds[i] = music_bandwidth_thresholds[i] - + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14); - } - do { - int threshold, hysteresis; - threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; - hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; - if (!st->first) - { - if (st->bandwidth >= bandwidth) - threshold -= hysteresis; - else - threshold += hysteresis; - } - if (equiv_rate2 >= threshold) - break; - } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); - st->bandwidth = bandwidth; - /* Prevents any transition to SWB/FB until the SILK layer has fully - switched to WB mode and turned the variable LP filter off */ - if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; - } - - if (st->bandwidth>st->max_bandwidth) - st->bandwidth = st->max_bandwidth; - - if (st->user_bandwidth != OPUS_AUTO) - st->bandwidth = st->user_bandwidth; - - /* This prevents us from using hybrid at unsafe CBR/max rates */ - if (st->mode != MODE_CELT_ONLY && max_rate < 15000) - { - st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND); - } - - /* Prevents Opus from wasting bits on frequencies that are above - the Nyquist rate of the input signal */ - if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND) - st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; - if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; - if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND) - st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND) - st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; -#ifndef DISABLE_FLOAT_API - /* Use detected bandwidth to reduce the encoded bandwidth. */ - if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO) - { - int min_detected_bandwidth; - /* Makes bandwidth detection more conservative just in case the detector - gets it wrong when we could have coded a high bandwidth transparently. - When operating in SILK/hybrid mode, we don't go below wideband to avoid - more complicated switches that require redundancy. */ - if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY) - min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; - else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY) - min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - else if (equiv_rate <= 30000*st->stream_channels) - min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; - else if (equiv_rate <= 44000*st->stream_channels) - min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; - else - min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; - - st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth); - st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); - } -#endif - celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); - - /* CELT mode doesn't support mediumband, use wideband instead */ - if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; - if (st->lfe) - st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; - - /* Can't support higher than wideband for >20 ms frames */ - if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) - { - VARDECL(unsigned char, tmp_data); - int nb_frames; - int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; - VARDECL(OpusRepacketizer, rp); - opus_int32 bytes_per_frame; - opus_int32 repacketize_len; - -#ifndef DISABLE_FLOAT_API - if (analysis_read_pos_bak!= -1) - { - st->analysis.read_pos = analysis_read_pos_bak; - st->analysis.read_subframe = analysis_read_subframe_bak; - } -#endif - - nb_frames = frame_size > st->Fs/25 ? 3 : 2; - bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); - - ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); - - ALLOC(rp, 1, OpusRepacketizer); - opus_repacketizer_init(rp); - - bak_mode = st->user_forced_mode; - bak_bandwidth = st->user_bandwidth; - bak_channels = st->force_channels; - - st->user_forced_mode = st->mode; - st->user_bandwidth = st->bandwidth; - st->force_channels = st->stream_channels; - bak_to_mono = st->silk_mode.toMono; - - if (bak_to_mono) - st->force_channels = 1; - else - st->prev_channels = st->stream_channels; - for (i=0;i<nb_frames;i++) - { - int tmp_len; - st->silk_mode.toMono = 0; - /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ - if (to_celt && i==nb_frames-1) - st->user_forced_mode = MODE_CELT_ONLY; - tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, - tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, - NULL, 0, c1, c2, analysis_channels, downmix, float_api); - if (tmp_len<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); - if (ret<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - if (st->use_vbr) - repacketize_len = out_data_bytes; - else - repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes); - ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); - if (ret<0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - st->user_forced_mode = bak_mode; - st->user_bandwidth = bak_bandwidth; - st->force_channels = bak_channels; - st->silk_mode.toMono = bak_to_mono; - RESTORE_STACK; - return ret; - } - curr_bandwidth = st->bandwidth; - - /* Chooses the appropriate mode for speech - *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ - if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_HYBRID; - if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) - st->mode = MODE_SILK_ONLY; - - /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ - bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; - - data += 1; - - ec_enc_init(&enc, data, max_data_bytes-1); - - ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); - OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels); - - if (st->mode == MODE_CELT_ONLY) - hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); - else - hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15; - - st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15, - hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); - - /* convert from log scale to Hertz */ - cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) ); - - if (st->application == OPUS_APPLICATION_VOIP) - { - hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); - } else { - dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); - } -#ifndef FIXED_POINT - if (float_api) - { - opus_val32 sum; - sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch); - /* This should filter out both NaNs and ridiculous signals that could - cause NaNs further down. */ - if (!(sum < 1e9f) || celt_isnan(sum)) - { - OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels); - st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0; - } - } -#endif - - - /* SILK processing */ - HB_gain = Q15ONE; - if (st->mode != MODE_CELT_ONLY) - { - opus_int32 total_bitRate, celt_rate; -#ifdef FIXED_POINT - const opus_int16 *pcm_silk; -#else - VARDECL(opus_int16, pcm_silk); - ALLOC(pcm_silk, st->channels*frame_size, opus_int16); -#endif - - /* Distribute bits between SILK and CELT */ - total_bitRate = 8 * bytes_target * frame_rate; - if( st->mode == MODE_HYBRID ) { - int HB_gain_ref; - /* Base rate for SILK */ - st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) ); - if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { - /* SILK gets 2/3 of the remaining bits */ - st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3; - } else { /* FULLBAND */ - /* SILK gets 3/5 of the remaining bits */ - st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5; - } - /* Don't let SILK use more than 80% */ - if( st->silk_mode.bitRate > total_bitRate * 4/5 ) { - st->silk_mode.bitRate = total_bitRate * 4/5; - } - if (!st->energy_masking) - { - /* Increasingly attenuate high band when it gets allocated fewer bits */ - celt_rate = total_bitRate - st->silk_mode.bitRate; - HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600; - HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6); - HB_gain = HB_gain < (opus_val32)Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE; - } - } else { - /* SILK gets all bits */ - st->silk_mode.bitRate = total_bitRate; - } - - /* Surround masking for SILK */ - if (st->energy_masking && st->use_vbr && !st->lfe) - { - opus_val32 mask_sum=0; - opus_val16 masking_depth; - opus_int32 rate_offset; - int c; - int end = 17; - opus_int16 srate = 16000; - if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) - { - end = 13; - srate = 8000; - } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - { - end = 15; - srate = 12000; - } - for (c=0;c<st->channels;c++) - { - for(i=0;i<end;i++) - { - opus_val16 mask; - mask = MAX16(MIN16(st->energy_masking[21*c+i], - QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); - if (mask > 0) - mask = HALF16(mask); - mask_sum += mask; - } - } - /* Conservative rate reduction, we cut the masking in half */ - masking_depth = mask_sum / end*st->channels; - masking_depth += QCONST16(.2f, DB_SHIFT); - rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); - rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); - /* Split the rate change between the SILK and CELT part for hybrid. */ - if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND) - st->silk_mode.bitRate += 3*rate_offset/5; - else - st->silk_mode.bitRate += rate_offset; - bytes_target += rate_offset * frame_size / (8 * st->Fs); - } - - st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; - st->silk_mode.nChannelsAPI = st->channels; - st->silk_mode.nChannelsInternal = st->stream_channels; - if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { - st->silk_mode.desiredInternalSampleRate = 8000; - } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { - st->silk_mode.desiredInternalSampleRate = 12000; - } else { - silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); - st->silk_mode.desiredInternalSampleRate = 16000; - } - if( st->mode == MODE_HYBRID ) { - /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ - st->silk_mode.minInternalSampleRate = 16000; - } else { - st->silk_mode.minInternalSampleRate = 8000; - } - - if (st->mode == MODE_SILK_ONLY) - { - opus_int32 effective_max_rate = max_rate; - st->silk_mode.maxInternalSampleRate = 16000; - if (frame_rate > 50) - effective_max_rate = effective_max_rate*2/3; - if (effective_max_rate < 13000) - { - st->silk_mode.maxInternalSampleRate = 12000; - st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); - } - if (effective_max_rate < 9600) - { - st->silk_mode.maxInternalSampleRate = 8000; - st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); - } - } else { - st->silk_mode.maxInternalSampleRate = 16000; - } - - st->silk_mode.useCBR = !st->use_vbr; - - /* Call SILK encoder for the low band */ - nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes); - - st->silk_mode.maxBits = nBytes*8; - /* Only allow up to 90% of the bits for hybrid mode*/ - if (st->mode == MODE_HYBRID) - st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10; - if (st->silk_mode.useCBR) - { - st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8; - /* Reduce the initial target to make it easier to reach the CBR rate */ - st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000); - } - - if (prefill) - { - opus_int32 zero=0; - int prefill_offset; - /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode - a discontinuity. The exact location is what we need to avoid leaving any "gap" - in the audio when mixing with the redundant CELT frame. Here we can afford to - overwrite st->delay_buffer because the only thing that uses it before it gets - rewritten is tmp_prefill[] and even then only the part after the ramp really - gets used (rather than sent to the encoder and discarded) */ - prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); - gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, - 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); - OPUS_CLEAR(st->delay_buffer, prefill_offset); -#ifdef FIXED_POINT - pcm_silk = st->delay_buffer; -#else - for (i=0;i<st->encoder_buffer*st->channels;i++) - pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); -#endif - silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); - } - -#ifdef FIXED_POINT - pcm_silk = pcm_buf+total_buffer*st->channels; -#else - for (i=0;i<frame_size*st->channels;i++) - pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); -#endif - ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); - if( ret ) { - /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ - /* Handle error */ - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - if (nBytes==0) - { - st->rangeFinal = 0; - data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); - RESTORE_STACK; - return 1; - } - /* Extract SILK internal bandwidth for signaling in first byte */ - if( st->mode == MODE_SILK_ONLY ) { - if( st->silk_mode.internalSampleRate == 8000 ) { - curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; - } else if( st->silk_mode.internalSampleRate == 12000 ) { - curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; - } else if( st->silk_mode.internalSampleRate == 16000 ) { - curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; - } - } else { - silk_assert( st->silk_mode.internalSampleRate == 16000 ); - } - - st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; - /* FIXME: How do we allocate the redundancy for CBR? */ - if (st->silk_mode.opusCanSwitch) - { - redundancy = 1; - celt_to_silk = 0; - st->silk_bw_switch = 1; - } - } - - /* CELT processing */ - { - int endband=21; - - switch(curr_bandwidth) - { - case OPUS_BANDWIDTH_NARROWBAND: - endband = 13; - break; - case OPUS_BANDWIDTH_MEDIUMBAND: - case OPUS_BANDWIDTH_WIDEBAND: - endband = 17; - break; - case OPUS_BANDWIDTH_SUPERWIDEBAND: - endband = 19; - break; - case OPUS_BANDWIDTH_FULLBAND: - endband = 21; - break; - } - celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); - celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); - } - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); - if (st->mode != MODE_SILK_ONLY) - { - opus_val32 celt_pred=2; - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - /* We may still decide to disable prediction later */ - if (st->silk_mode.reducedDependency) - celt_pred = 0; - celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred)); - - if (st->mode == MODE_HYBRID) - { - int len; - - len = (ec_tell(&enc)+7)>>3; - if (redundancy) - len += st->mode == MODE_HYBRID ? 3 : 1; - if( st->use_vbr ) { - nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); - } else { - /* check if SILK used up too much */ - nb_compr_bytes = len > bytes_target ? len : bytes_target; - } - } else { - if (st->use_vbr) - { - opus_int32 bonus=0; -#ifndef DISABLE_FLOAT_API - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) - { - bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); - if (analysis_info.valid) - bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); - } -#endif - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); - celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); - nb_compr_bytes = max_data_bytes-1-redundancy_bytes; - } else { - nb_compr_bytes = bytes_target; - } - } - - } else { - nb_compr_bytes = 0; - } - - ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); - if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) - { - OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400); - } - - if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0) - { - OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer)); - OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)], - &pcm_buf[0], - (frame_size+total_buffer)*st->channels); - } else { - OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels); - } - /* gain_fade() and stereo_fade() need to be after the buffer copying - because we don't want any of this to affect the SILK part */ - if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { - gain_fade(pcm_buf, pcm_buf, - st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); - } - st->prev_HB_gain = HB_gain; - if (st->mode != MODE_HYBRID || st->stream_channels==1) - st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000)); - if( !st->energy_masking && st->channels == 2 ) { - /* Apply stereo width reduction (at low bitrates) */ - if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { - opus_val16 g1, g2; - g1 = st->hybrid_stereo_width_Q14; - g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); -#ifdef FIXED_POINT - g1 = g1==16384 ? Q15ONE : SHL16(g1,1); - g2 = g2==16384 ? Q15ONE : SHL16(g2,1); -#else - g1 *= (1.f/16384); - g2 *= (1.f/16384); -#endif - stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, - frame_size, st->channels, celt_mode->window, st->Fs); - st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; - } - } - - if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) - { - /* For SILK mode, the redundancy is inferred from the length */ - if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes)) - ec_enc_bit_logp(&enc, redundancy, 12); - if (redundancy) - { - int max_redundancy; - ec_enc_bit_logp(&enc, celt_to_silk, 1); - if (st->mode == MODE_HYBRID) - max_redundancy = (max_data_bytes-1)-nb_compr_bytes; - else - max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); - /* Target the same bit-rate for redundancy as for the rest, - up to a max of 257 bytes */ - redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600); - redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); - if (st->mode == MODE_HYBRID) - ec_enc_uint(&enc, redundancy_bytes-2, 256); - } - } else { - redundancy = 0; - } - - if (!redundancy) - { - st->silk_bw_switch = 0; - redundancy_bytes = 0; - } - if (st->mode != MODE_CELT_ONLY)start_band=17; - - if (st->mode == MODE_SILK_ONLY) - { - ret = (ec_tell(&enc)+7)>>3; - ec_enc_done(&enc); - nb_compr_bytes = ret; - } else { - nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes); - ec_enc_shrink(&enc, nb_compr_bytes); - } - -#ifndef DISABLE_FLOAT_API - if (redundancy || st->mode != MODE_SILK_ONLY) - celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); -#endif - - /* 5 ms redundant frame for CELT->SILK */ - if (redundancy && celt_to_silk) - { - int err; - celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); - if (err < 0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - } - - celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band)); - - if (st->mode != MODE_SILK_ONLY) - { - if (st->mode != st->prev_mode && st->prev_mode > 0) - { - unsigned char dummy[2]; - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - - /* Prefilling */ - celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); - celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); - } - /* If false, we already busted the budget and we'll end up with a "PLC packet" */ - if (ec_tell(&enc) <= 8*nb_compr_bytes) - { - ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); - if (ret < 0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - } - } - - /* 5 ms redundant frame for SILK->CELT */ - if (redundancy && !celt_to_silk) - { - int err; - unsigned char dummy[2]; - int N2, N4; - N2 = st->Fs/200; - N4 = st->Fs/400; - - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); - celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); - - /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ - celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); - - err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); - if (err < 0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); - } - - - - /* Signalling the mode in the first byte */ - data--; - data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); - - st->rangeFinal = enc.rng ^ redundant_rng; - - if (to_celt) - st->prev_mode = MODE_CELT_ONLY; - else - st->prev_mode = st->mode; - st->prev_channels = st->stream_channels; - st->prev_framesize = frame_size; - - st->first = 0; - - /* In the unlikely case that the SILK encoder busted its target, tell - the decoder to call the PLC */ - if (ec_tell(&enc) > (max_data_bytes-1)*8) - { - if (max_data_bytes < 2) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - data[1] = 0; - ret = 1; - st->rangeFinal = 0; - } else if (st->mode==MODE_SILK_ONLY&&!redundancy) - { - /*When in LPC only mode it's perfectly - reasonable to strip off trailing zero bytes as - the required range decoder behavior is to - fill these in. This can't be done when the MDCT - modes are used because the decoder needs to know - the actual length for allocation purposes.*/ - while(ret>2&&data[ret]==0)ret--; - } - /* Count ToC and redundancy */ - ret += 1+redundancy_bytes; - if (!st->use_vbr) - { - if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK) - - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - ret = max_data_bytes; - } - RESTORE_STACK; - return ret; -} - -#ifdef FIXED_POINT - -#ifndef DISABLE_FLOAT_API -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 max_data_bytes) -{ - int i, ret; - int frame_size; - int delay_compensation; - VARDECL(opus_int16, in); - ALLOC_STACK; - - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_float, st->analysis.subframe_mem); - - ALLOC(in, frame_size*st->channels, opus_int16); - - for (i=0;i<frame_size*st->channels;i++) - in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); - RESTORE_STACK; - return ret; -} -#endif - -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 out_data_bytes) -{ - int frame_size; - int delay_compensation; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_int -#ifndef DISABLE_FLOAT_API - , st->analysis.subframe_mem -#endif - ); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); -} - -#else -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 max_data_bytes) -{ - int i, ret; - int frame_size; - int delay_compensation; - VARDECL(float, in); - ALLOC_STACK; - - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_int, st->analysis.subframe_mem); - - ALLOC(in, frame_size*st->channels, float); - - for (i=0;i<frame_size*st->channels;i++) - in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); - RESTORE_STACK; - return ret; -} -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, - unsigned char *data, opus_int32 out_data_bytes) -{ - int frame_size; - int delay_compensation; - if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) - delay_compensation = 0; - else - delay_compensation = st->delay_compensation; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, st->channels, st->Fs, st->bitrate_bps, - delay_compensation, downmix_float, st->analysis.subframe_mem); - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, - pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); -} -#endif - - -int opus_encoder_ctl(OpusEncoder *st, int request, ...) -{ - int ret; - CELTEncoder *celt_enc; - va_list ap; - - ret = OPUS_OK; - va_start(ap, request); - - celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); - - switch (request) - { - case OPUS_SET_APPLICATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO - && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY) - || (!st->first && st->application != value)) - { - ret = OPUS_BAD_ARG; - break; - } - st->application = value; - } - break; - case OPUS_GET_APPLICATION_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->application; - } - break; - case OPUS_SET_BITRATE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) - { - if (value <= 0) - goto bad_arg; - else if (value <= 500) - value = 500; - else if (value > (opus_int32)300000*st->channels) - value = (opus_int32)300000*st->channels; - } - st->user_bitrate_bps = value; - } - break; - case OPUS_GET_BITRATE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276); - } - break; - case OPUS_SET_FORCE_CHANNELS_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if((value<1 || value>st->channels) && value != OPUS_AUTO) - { - goto bad_arg; - } - st->force_channels = value; - } - break; - case OPUS_GET_FORCE_CHANNELS_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->force_channels; - } - break; - case OPUS_SET_MAX_BANDWIDTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) - { - goto bad_arg; - } - st->max_bandwidth = value; - if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { - st->silk_mode.maxInternalSampleRate = 8000; - } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { - st->silk_mode.maxInternalSampleRate = 12000; - } else { - st->silk_mode.maxInternalSampleRate = 16000; - } - } - break; - case OPUS_GET_MAX_BANDWIDTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->max_bandwidth; - } - break; - case OPUS_SET_BANDWIDTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO) - { - goto bad_arg; - } - st->user_bandwidth = value; - if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { - st->silk_mode.maxInternalSampleRate = 8000; - } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { - st->silk_mode.maxInternalSampleRate = 12000; - } else { - st->silk_mode.maxInternalSampleRate = 16000; - } - } - break; - case OPUS_GET_BANDWIDTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->bandwidth; - } - break; - case OPUS_SET_DTX_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->silk_mode.useDTX = value; - } - break; - case OPUS_GET_DTX_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.useDTX; - } - break; - case OPUS_SET_COMPLEXITY_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>10) - { - goto bad_arg; - } - st->silk_mode.complexity = value; - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value)); - } - break; - case OPUS_GET_COMPLEXITY_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.complexity; - } - break; - case OPUS_SET_INBAND_FEC_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->silk_mode.useInBandFEC = value; - } - break; - case OPUS_GET_INBAND_FEC_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.useInBandFEC; - } - break; - case OPUS_SET_PACKET_LOSS_PERC_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value < 0 || value > 100) - { - goto bad_arg; - } - st->silk_mode.packetLossPercentage = value; - celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value)); - } - break; - case OPUS_GET_PACKET_LOSS_PERC_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->silk_mode.packetLossPercentage; - } - break; - case OPUS_SET_VBR_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->use_vbr = value; - st->silk_mode.useCBR = 1-value; - } - break; - case OPUS_GET_VBR_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->use_vbr; - } - break; - case OPUS_SET_VOICE_RATIO_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<-1 || value>100) - { - goto bad_arg; - } - st->voice_ratio = value; - } - break; - case OPUS_GET_VOICE_RATIO_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->voice_ratio; - } - break; - case OPUS_SET_VBR_CONSTRAINT_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value<0 || value>1) - { - goto bad_arg; - } - st->vbr_constraint = value; - } - break; - case OPUS_GET_VBR_CONSTRAINT_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->vbr_constraint; - } - break; - case OPUS_SET_SIGNAL_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC) - { - goto bad_arg; - } - st->signal_type = value; - } - break; - case OPUS_GET_SIGNAL_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->signal_type; - } - break; - case OPUS_GET_LOOKAHEAD_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->Fs/400; - if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) - *value += st->delay_compensation; - } - break; - case OPUS_GET_SAMPLE_RATE_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->Fs; - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - opus_uint32 *value = va_arg(ap, opus_uint32*); - if (!value) - { - goto bad_arg; - } - *value = st->rangeFinal; - } - break; - case OPUS_SET_LSB_DEPTH_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<8 || value>24) - { - goto bad_arg; - } - st->lsb_depth=value; - } - break; - case OPUS_GET_LSB_DEPTH_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->lsb_depth; - } - break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && - value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && - value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && - value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) - { - goto bad_arg; - } - st->variable_duration = value; - celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); - } - break; - case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->variable_duration; - } - break; - case OPUS_SET_PREDICTION_DISABLED_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value > 1 || value < 0) - goto bad_arg; - st->silk_mode.reducedDependency = value; - } - break; - case OPUS_GET_PREDICTION_DISABLED_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - goto bad_arg; - *value = st->silk_mode.reducedDependency; - } - break; - case OPUS_RESET_STATE: - { - void *silk_enc; - silk_EncControlStruct dummy; - char *start; - silk_enc = (char*)st+st->silk_enc_offset; -#ifndef DISABLE_FLOAT_API - tonality_analysis_reset(&st->analysis); -#endif - - start = (char*)&st->OPUS_ENCODER_RESET_START; - OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st)); - - celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); - silk_InitEncoder( silk_enc, st->arch, &dummy ); - st->stream_channels = st->channels; - st->hybrid_stereo_width_Q14 = 1 << 14; - st->prev_HB_gain = Q15ONE; - st->first = 1; - st->mode = MODE_HYBRID; - st->bandwidth = OPUS_BANDWIDTH_FULLBAND; - st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); - } - break; - case OPUS_SET_FORCE_MODE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO) - { - goto bad_arg; - } - st->user_forced_mode = value; - } - break; - case OPUS_SET_LFE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->lfe = value; - ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); - } - break; - case OPUS_SET_ENERGY_MASK_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_masking = value; - ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); - } - break; - - case CELT_GET_MODE_REQUEST: - { - const CELTMode ** value = va_arg(ap, const CELTMode**); - if (!value) - { - goto bad_arg; - } - ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value)); - } - break; - default: - /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/ - ret = OPUS_UNIMPLEMENTED; - break; - } - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - -void opus_encoder_destroy(OpusEncoder *st) -{ - opus_free(st); -} diff --git a/thirdparty/opus/opus_multistream.c b/thirdparty/opus/opus_multistream.c deleted file mode 100644 index 09c3639b7f..0000000000 --- a/thirdparty/opus/opus_multistream.c +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_multistream.h" -#include "opus.h" -#include "opus_private.h" -#include "stack_alloc.h" -#include <stdarg.h> -#include "float_cast.h" -#include "os_support.h" - - -int validate_layout(const ChannelLayout *layout) -{ - int i, max_channel; - - max_channel = layout->nb_streams+layout->nb_coupled_streams; - if (max_channel>255) - return 0; - for (i=0;i<layout->nb_channels;i++) - { - if (layout->mapping[i] >= max_channel && layout->mapping[i] != 255) - return 0; - } - return 1; -} - - -int get_left_channel(const ChannelLayout *layout, int stream_id, int prev) -{ - int i; - i = (prev<0) ? 0 : prev+1; - for (;i<layout->nb_channels;i++) - { - if (layout->mapping[i]==stream_id*2) - return i; - } - return -1; -} - -int get_right_channel(const ChannelLayout *layout, int stream_id, int prev) -{ - int i; - i = (prev<0) ? 0 : prev+1; - for (;i<layout->nb_channels;i++) - { - if (layout->mapping[i]==stream_id*2+1) - return i; - } - return -1; -} - -int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev) -{ - int i; - i = (prev<0) ? 0 : prev+1; - for (;i<layout->nb_channels;i++) - { - if (layout->mapping[i]==stream_id+layout->nb_coupled_streams) - return i; - } - return -1; -} - diff --git a/thirdparty/opus/opus_multistream_decoder.c b/thirdparty/opus/opus_multistream_decoder.c deleted file mode 100644 index b95eaa6eac..0000000000 --- a/thirdparty/opus/opus_multistream_decoder.c +++ /dev/null @@ -1,537 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_multistream.h" -#include "opus.h" -#include "opus_private.h" -#include "stack_alloc.h" -#include <stdarg.h> -#include "float_cast.h" -#include "os_support.h" - -struct OpusMSDecoder { - ChannelLayout layout; - /* Decoder states go here */ -}; - - - - -/* DECODER */ - -opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams) -{ - int coupled_size; - int mono_size; - - if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0; - coupled_size = opus_decoder_get_size(2); - mono_size = opus_decoder_get_size(1); - return align(sizeof(OpusMSDecoder)) - + nb_coupled_streams * align(coupled_size) - + (nb_streams-nb_coupled_streams) * align(mono_size); -} - -int opus_multistream_decoder_init( - OpusMSDecoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping -) -{ - int coupled_size; - int mono_size; - int i, ret; - char *ptr; - - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - return OPUS_BAD_ARG; - - st->layout.nb_channels = channels; - st->layout.nb_streams = streams; - st->layout.nb_coupled_streams = coupled_streams; - - for (i=0;i<st->layout.nb_channels;i++) - st->layout.mapping[i] = mapping[i]; - if (!validate_layout(&st->layout)) - return OPUS_BAD_ARG; - - ptr = (char*)st + align(sizeof(OpusMSDecoder)); - coupled_size = opus_decoder_get_size(2); - mono_size = opus_decoder_get_size(1); - - for (i=0;i<st->layout.nb_coupled_streams;i++) - { - ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 2); - if(ret!=OPUS_OK)return ret; - ptr += align(coupled_size); - } - for (;i<st->layout.nb_streams;i++) - { - ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 1); - if(ret!=OPUS_OK)return ret; - ptr += align(mono_size); - } - return OPUS_OK; -} - - -OpusMSDecoder *opus_multistream_decoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int *error -) -{ - int ret; - OpusMSDecoder *st; - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusMSDecoder *)opus_alloc(opus_multistream_decoder_get_size(streams, coupled_streams)); - if (st==NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_multistream_decoder_init(st, Fs, channels, streams, coupled_streams, mapping); - if (error) - *error = ret; - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - return st; -} - -typedef void (*opus_copy_channel_out_func)( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size -); - -static int opus_multistream_packet_validate(const unsigned char *data, - opus_int32 len, int nb_streams, opus_int32 Fs) -{ - int s; - int count; - unsigned char toc; - opus_int16 size[48]; - int samples=0; - opus_int32 packet_offset; - - for (s=0;s<nb_streams;s++) - { - int tmp_samples; - if (len<=0) - return OPUS_INVALID_PACKET; - count = opus_packet_parse_impl(data, len, s!=nb_streams-1, &toc, NULL, - size, NULL, &packet_offset); - if (count<0) - return count; - tmp_samples = opus_packet_get_nb_samples(data, packet_offset, Fs); - if (s!=0 && samples != tmp_samples) - return OPUS_INVALID_PACKET; - samples = tmp_samples; - data += packet_offset; - len -= packet_offset; - } - return samples; -} - -static int opus_multistream_decode_native( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - void *pcm, - opus_copy_channel_out_func copy_channel_out, - int frame_size, - int decode_fec, - int soft_clip -) -{ - opus_int32 Fs; - int coupled_size; - int mono_size; - int s, c; - char *ptr; - int do_plc=0; - VARDECL(opus_val16, buf); - ALLOC_STACK; - - /* Limit frame_size to avoid excessive stack allocations. */ - opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs)); - frame_size = IMIN(frame_size, Fs/25*3); - ALLOC(buf, 2*frame_size, opus_val16); - ptr = (char*)st + align(sizeof(OpusMSDecoder)); - coupled_size = opus_decoder_get_size(2); - mono_size = opus_decoder_get_size(1); - - if (len==0) - do_plc = 1; - if (len < 0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - if (!do_plc && len < 2*st->layout.nb_streams-1) - { - RESTORE_STACK; - return OPUS_INVALID_PACKET; - } - if (!do_plc) - { - int ret = opus_multistream_packet_validate(data, len, st->layout.nb_streams, Fs); - if (ret < 0) - { - RESTORE_STACK; - return ret; - } else if (ret > frame_size) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - } - for (s=0;s<st->layout.nb_streams;s++) - { - OpusDecoder *dec; - int packet_offset, ret; - - dec = (OpusDecoder*)ptr; - ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size); - - if (!do_plc && len<=0) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - packet_offset = 0; - ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip); - data += packet_offset; - len -= packet_offset; - if (ret <= 0) - { - RESTORE_STACK; - return ret; - } - frame_size = ret; - if (s < st->layout.nb_coupled_streams) - { - int chan, prev; - prev = -1; - /* Copy "left" audio to the channel(s) where it belongs */ - while ( (chan = get_left_channel(&st->layout, s, prev)) != -1) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf, 2, frame_size); - prev = chan; - } - prev = -1; - /* Copy "right" audio to the channel(s) where it belongs */ - while ( (chan = get_right_channel(&st->layout, s, prev)) != -1) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf+1, 2, frame_size); - prev = chan; - } - } else { - int chan, prev; - prev = -1; - /* Copy audio to the channel(s) where it belongs */ - while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, chan, - buf, 1, frame_size); - prev = chan; - } - } - } - /* Handle muted channels */ - for (c=0;c<st->layout.nb_channels;c++) - { - if (st->layout.mapping[c] == 255) - { - (*copy_channel_out)(pcm, st->layout.nb_channels, c, - NULL, 0, frame_size); - } - } - RESTORE_STACK; - return frame_size; -} - -#if !defined(DISABLE_FLOAT_API) -static void opus_copy_channel_out_float( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size -) -{ - float *float_dst; - opus_int32 i; - float_dst = (float*)dst; - if (src != NULL) - { - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride]; -#else - float_dst[i*dst_stride+dst_channel] = src[i*src_stride]; -#endif - } - else - { - for (i=0;i<frame_size;i++) - float_dst[i*dst_stride+dst_channel] = 0; - } -} -#endif - -static void opus_copy_channel_out_short( - void *dst, - int dst_stride, - int dst_channel, - const opus_val16 *src, - int src_stride, - int frame_size -) -{ - opus_int16 *short_dst; - opus_int32 i; - short_dst = (opus_int16*)dst; - if (src != NULL) - { - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - short_dst[i*dst_stride+dst_channel] = src[i*src_stride]; -#else - short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]); -#endif - } - else - { - for (i=0;i<frame_size;i++) - short_dst[i*dst_stride+dst_channel] = 0; - } -} - - - -#ifdef FIXED_POINT -int opus_multistream_decode( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - opus_int16 *pcm, - int frame_size, - int decode_fec -) -{ - return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0); -} - -#ifndef DISABLE_FLOAT_API -int opus_multistream_decode_float(OpusMSDecoder *st, const unsigned char *data, - opus_int32 len, float *pcm, int frame_size, int decode_fec) -{ - return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0); -} -#endif - -#else - -int opus_multistream_decode(OpusMSDecoder *st, const unsigned char *data, - opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec) -{ - return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1); -} - -int opus_multistream_decode_float( - OpusMSDecoder *st, - const unsigned char *data, - opus_int32 len, - float *pcm, - int frame_size, - int decode_fec -) -{ - return opus_multistream_decode_native(st, data, len, - pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0); -} -#endif - -int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) -{ - va_list ap; - int coupled_size, mono_size; - char *ptr; - int ret = OPUS_OK; - - va_start(ap, request); - - coupled_size = opus_decoder_get_size(2); - mono_size = opus_decoder_get_size(1); - ptr = (char*)st + align(sizeof(OpusMSDecoder)); - switch (request) - { - case OPUS_GET_BANDWIDTH_REQUEST: - case OPUS_GET_SAMPLE_RATE_REQUEST: - case OPUS_GET_GAIN_REQUEST: - case OPUS_GET_LAST_PACKET_DURATION_REQUEST: - { - OpusDecoder *dec; - /* For int32* GET params, just query the first stream */ - opus_int32 *value = va_arg(ap, opus_int32*); - dec = (OpusDecoder*)ptr; - ret = opus_decoder_ctl(dec, request, value); - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - int s; - opus_uint32 *value = va_arg(ap, opus_uint32*); - opus_uint32 tmp; - if (!value) - { - goto bad_arg; - } - *value = 0; - for (s=0;s<st->layout.nb_streams;s++) - { - OpusDecoder *dec; - dec = (OpusDecoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_decoder_ctl(dec, request, &tmp); - if (ret != OPUS_OK) break; - *value ^= tmp; - } - } - break; - case OPUS_RESET_STATE: - { - int s; - for (s=0;s<st->layout.nb_streams;s++) - { - OpusDecoder *dec; - - dec = (OpusDecoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_decoder_ctl(dec, OPUS_RESET_STATE); - if (ret != OPUS_OK) - break; - } - } - break; - case OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST: - { - int s; - opus_int32 stream_id; - OpusDecoder **value; - stream_id = va_arg(ap, opus_int32); - if (stream_id<0 || stream_id >= st->layout.nb_streams) - ret = OPUS_BAD_ARG; - value = va_arg(ap, OpusDecoder**); - if (!value) - { - goto bad_arg; - } - for (s=0;s<stream_id;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - *value = (OpusDecoder*)ptr; - } - break; - case OPUS_SET_GAIN_REQUEST: - { - int s; - /* This works for int32 params */ - opus_int32 value = va_arg(ap, opus_int32); - for (s=0;s<st->layout.nb_streams;s++) - { - OpusDecoder *dec; - - dec = (OpusDecoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_decoder_ctl(dec, request, value); - if (ret != OPUS_OK) - break; - } - } - break; - default: - ret = OPUS_UNIMPLEMENTED; - break; - } - - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - - -void opus_multistream_decoder_destroy(OpusMSDecoder *st) -{ - opus_free(st); -} diff --git a/thirdparty/opus/opus_multistream_encoder.c b/thirdparty/opus/opus_multistream_encoder.c deleted file mode 100644 index 1698223a16..0000000000 --- a/thirdparty/opus/opus_multistream_encoder.c +++ /dev/null @@ -1,1351 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_multistream.h" -#include "opus.h" -#include "opus_private.h" -#include "stack_alloc.h" -#include <stdarg.h> -#include "float_cast.h" -#include "os_support.h" -#include "mathops.h" -#include "mdct.h" -#include "modes.h" -#include "bands.h" -#include "quant_bands.h" -#include "pitch.h" - -typedef struct { - int nb_streams; - int nb_coupled_streams; - unsigned char mapping[8]; -} VorbisLayout; - -/* Index is nb_channel-1*/ -static const VorbisLayout vorbis_mappings[8] = { - {1, 0, {0}}, /* 1: mono */ - {1, 1, {0, 1}}, /* 2: stereo */ - {2, 1, {0, 2, 1}}, /* 3: 1-d surround */ - {2, 2, {0, 1, 2, 3}}, /* 4: quadraphonic surround */ - {3, 2, {0, 4, 1, 2, 3}}, /* 5: 5-channel surround */ - {4, 2, {0, 4, 1, 2, 3, 5}}, /* 6: 5.1 surround */ - {4, 3, {0, 4, 1, 2, 3, 5, 6}}, /* 7: 6.1 surround */ - {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ -}; - -typedef void (*opus_copy_channel_in_func)( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -); - -typedef enum { - MAPPING_TYPE_NONE, - MAPPING_TYPE_SURROUND -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - , /* Do not include comma at end of enumerator list */ - MAPPING_TYPE_AMBISONICS -#endif -} MappingType; - -struct OpusMSEncoder { - ChannelLayout layout; - int arch; - int lfe_stream; - int application; - int variable_duration; - MappingType mapping_type; - opus_int32 bitrate_bps; - float subframe_mem[3]; - /* Encoder states go here */ - /* then opus_val32 window_mem[channels*120]; */ - /* then opus_val32 preemph_mem[channels]; */ -}; - -static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) -{ - int s; - char *ptr; - int coupled_size, mono_size; - - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;s<st->layout.nb_streams;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - /* void* cast avoids clang -Wcast-align warning */ - return (opus_val32*)(void*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); -} - -static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) -{ - int s; - char *ptr; - int coupled_size, mono_size; - - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;s<st->layout.nb_streams;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - /* void* cast avoids clang -Wcast-align warning */ - return (opus_val32*)(void*)ptr; -} - -static int validate_encoder_layout(const ChannelLayout *layout) -{ - int s; - for (s=0;s<layout->nb_streams;s++) - { - if (s < layout->nb_coupled_streams) - { - if (get_left_channel(layout, s, -1)==-1) - return 0; - if (get_right_channel(layout, s, -1)==-1) - return 0; - } else { - if (get_mono_channel(layout, s, -1)==-1) - return 0; - } - } - return 1; -} - -static void channel_pos(int channels, int pos[8]) -{ - /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ - if (channels==4) - { - pos[0]=1; - pos[1]=3; - pos[2]=1; - pos[3]=3; - } else if (channels==3||channels==5||channels==6) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=0; - } else if (channels==7) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=2; - pos[6]=0; - } else if (channels==8) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=1; - pos[6]=3; - pos[7]=0; - } -} - -#if 1 -/* Computes a rough approximation of log2(2^a + 2^b) */ -static opus_val16 logSum(opus_val16 a, opus_val16 b) -{ - opus_val16 max; - opus_val32 diff; - opus_val16 frac; - static const opus_val16 diff_table[17] = { - QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), - QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), - QCONST16(0.0028123f, DB_SHIFT) - }; - int low; - if (a>b) - { - max = a; - diff = SUB32(EXTEND32(a),EXTEND32(b)); - } else { - max = b; - diff = SUB32(EXTEND32(b),EXTEND32(a)); - } - if (!(diff < QCONST16(8.f, DB_SHIFT))) /* inverted to catch NaNs */ - return max; -#ifdef FIXED_POINT - low = SHR32(diff, DB_SHIFT-1); - frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); -#else - low = (int)floor(2*diff); - frac = 2*diff - low; -#endif - return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); -} -#else -opus_val16 logSum(opus_val16 a, opus_val16 b) -{ - return log2(pow(4, a)+ pow(4, b))/2; -} -#endif - -void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, - int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch -) -{ - int c; - int i; - int LM; - int pos[8] = {0}; - int upsample; - int frame_size; - opus_val16 channel_offset; - opus_val32 bandE[21]; - opus_val16 maskLogE[3][21]; - VARDECL(opus_val32, in); - VARDECL(opus_val16, x); - VARDECL(opus_val32, freq); - SAVE_STACK; - - upsample = resampling_factor(rate); - frame_size = len*upsample; - - /* LM = log2(frame_size / 120) */ - for (LM=0;LM<celt_mode->maxLM;LM++) - if (celt_mode->shortMdctSize<<LM==frame_size) - break; - - ALLOC(in, frame_size+overlap, opus_val32); - ALLOC(x, len, opus_val16); - ALLOC(freq, frame_size, opus_val32); - - channel_pos(channels, pos); - - for (c=0;c<3;c++) - for (i=0;i<21;i++) - maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT); - - for (c=0;c<channels;c++) - { - OPUS_COPY(in, mem+c*overlap, overlap); - (*copy_channel_in)(x, 1, pcm, channels, c, len); - celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); -#ifndef FIXED_POINT - { - opus_val32 sum; - sum = celt_inner_prod(in, in, frame_size+overlap, 0); - /* This should filter out both NaNs and ridiculous signals that could - cause NaNs further down. */ - if (!(sum < 1e18f) || celt_isnan(sum)) - { - OPUS_CLEAR(in, frame_size+overlap); - preemph_mem[c] = 0; - } - } -#endif - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, - overlap, celt_mode->maxLM-LM, 1, arch); - if (upsample != 1) - { - int bound = len; - for (i=0;i<bound;i++) - freq[i] *= upsample; - for (;i<frame_size;i++) - freq[i] = 0; - } - - compute_band_energies(celt_mode, freq, bandE, 21, 1, LM); - amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1); - /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ - for (i=1;i<21;i++) - bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT)); - for (i=19;i>=0;i--) - bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT)); - if (pos[c]==1) - { - for (i=0;i<21;i++) - maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]); - } else if (pos[c]==3) - { - for (i=0;i<21;i++) - maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]); - } else if (pos[c]==2) - { - for (i=0;i<21;i++) - { - maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); - maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); - } - } -#if 0 - for (i=0;i<21;i++) - printf("%f ", bandLogE[21*c+i]); - float sum=0; - for (i=0;i<21;i++) - sum += bandLogE[21*c+i]; - printf("%f ", sum/21); -#endif - OPUS_COPY(mem+c*overlap, in+frame_size, overlap); - } - for (i=0;i<21;i++) - maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]); - channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1))); - for (c=0;c<3;c++) - for (i=0;i<21;i++) - maskLogE[c][i] += channel_offset; -#if 0 - for (c=0;c<3;c++) - { - for (i=0;i<21;i++) - printf("%f ", maskLogE[c][i]); - } -#endif - for (c=0;c<channels;c++) - { - opus_val16 *mask; - if (pos[c]!=0) - { - mask = &maskLogE[pos[c]-1][0]; - for (i=0;i<21;i++) - bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i]; - } else { - for (i=0;i<21;i++) - bandLogE[21*c+i] = 0; - } -#if 0 - for (i=0;i<21;i++) - printf("%f ", bandLogE[21*c+i]); - printf("\n"); -#endif -#if 0 - float sum=0; - for (i=0;i<21;i++) - sum += bandLogE[21*c+i]; - printf("%f ", sum/(float)QCONST32(21.f, DB_SHIFT)); - printf("\n"); -#endif - } - RESTORE_STACK; -} - -opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams) -{ - int coupled_size; - int mono_size; - - if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0; - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - return align(sizeof(OpusMSEncoder)) - + nb_coupled_streams * align(coupled_size) - + (nb_streams-nb_coupled_streams) * align(mono_size); -} - -opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family) -{ - int nb_streams; - int nb_coupled_streams; - opus_int32 size; - - if (mapping_family==0) - { - if (channels==1) - { - nb_streams=1; - nb_coupled_streams=0; - } else if (channels==2) - { - nb_streams=1; - nb_coupled_streams=1; - } else - return 0; - } else if (mapping_family==1 && channels<=8 && channels>=1) - { - nb_streams=vorbis_mappings[channels-1].nb_streams; - nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; - } else if (mapping_family==255) - { - nb_streams=channels; - nb_coupled_streams=0; -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - } else if (mapping_family==254) - { - nb_streams=channels; - nb_coupled_streams=0; -#endif - } else - return 0; - size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); - if (channels>2) - { - size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32)); - } - return size; -} - -static int opus_multistream_encoder_init_impl( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application, - MappingType mapping_type -) -{ - int coupled_size; - int mono_size; - int i, ret; - char *ptr; - - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - return OPUS_BAD_ARG; - - st->arch = opus_select_arch(); - st->layout.nb_channels = channels; - st->layout.nb_streams = streams; - st->layout.nb_coupled_streams = coupled_streams; - st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; - if (mapping_type != MAPPING_TYPE_SURROUND) - st->lfe_stream = -1; - st->bitrate_bps = OPUS_AUTO; - st->application = application; - st->variable_duration = OPUS_FRAMESIZE_ARG; - for (i=0;i<st->layout.nb_channels;i++) - st->layout.mapping[i] = mapping[i]; - if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout)) - return OPUS_BAD_ARG; - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - - for (i=0;i<st->layout.nb_coupled_streams;i++) - { - ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application); - if(ret!=OPUS_OK)return ret; - if (i==st->lfe_stream) - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); - ptr += align(coupled_size); - } - for (;i<st->layout.nb_streams;i++) - { - ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application); - if (i==st->lfe_stream) - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); - if(ret!=OPUS_OK)return ret; - ptr += align(mono_size); - } - if (mapping_type == MAPPING_TYPE_SURROUND) - { - OPUS_CLEAR(ms_get_preemph_mem(st), channels); - OPUS_CLEAR(ms_get_window_mem(st), channels*120); - } - st->mapping_type = mapping_type; - return OPUS_OK; -} - -int opus_multistream_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application -) -{ - return opus_multistream_encoder_init_impl(st, Fs, channels, streams, - coupled_streams, mapping, - application, MAPPING_TYPE_NONE); -} - -int opus_multistream_surround_encoder_init( - OpusMSEncoder *st, - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application -) -{ - MappingType mapping_type; - - if ((channels>255) || (channels<1)) - return OPUS_BAD_ARG; - st->lfe_stream = -1; - if (mapping_family==0) - { - if (channels==1) - { - *streams=1; - *coupled_streams=0; - mapping[0]=0; - } else if (channels==2) - { - *streams=1; - *coupled_streams=1; - mapping[0]=0; - mapping[1]=1; - } else - return OPUS_UNIMPLEMENTED; - } else if (mapping_family==1 && channels<=8 && channels>=1) - { - int i; - *streams=vorbis_mappings[channels-1].nb_streams; - *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; - for (i=0;i<channels;i++) - mapping[i] = vorbis_mappings[channels-1].mapping[i]; - if (channels>=6) - st->lfe_stream = *streams-1; - } else if (mapping_family==255) - { - int i; - *streams=channels; - *coupled_streams=0; - for(i=0;i<channels;i++) - mapping[i] = i; -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - } else if (mapping_family==254) - { - int i; - *streams=channels; - *coupled_streams=0; - for(i=0;i<channels;i++) - mapping[i] = i; -#endif - } else - return OPUS_UNIMPLEMENTED; - - if (channels>2 && mapping_family==1) { - mapping_type = MAPPING_TYPE_SURROUND; -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - } else if (mapping_family==254) - { - mapping_type = MAPPING_TYPE_AMBISONICS; -#endif - } else - { - mapping_type = MAPPING_TYPE_NONE; - } - return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, - *coupled_streams, mapping, - application, mapping_type); -} - -OpusMSEncoder *opus_multistream_encoder_create( - opus_int32 Fs, - int channels, - int streams, - int coupled_streams, - const unsigned char *mapping, - int application, - int *error -) -{ - int ret; - OpusMSEncoder *st; - if ((channels>255) || (channels<1) || (coupled_streams>streams) || - (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - st = (OpusMSEncoder *)opus_alloc(opus_multistream_encoder_get_size(streams, coupled_streams)); - if (st==NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_multistream_encoder_init(st, Fs, channels, streams, coupled_streams, mapping, application); - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} - -OpusMSEncoder *opus_multistream_surround_encoder_create( - opus_int32 Fs, - int channels, - int mapping_family, - int *streams, - int *coupled_streams, - unsigned char *mapping, - int application, - int *error -) -{ - int ret; - opus_int32 size; - OpusMSEncoder *st; - if ((channels>255) || (channels<1)) - { - if (error) - *error = OPUS_BAD_ARG; - return NULL; - } - size = opus_multistream_surround_encoder_get_size(channels, mapping_family); - if (!size) - { - if (error) - *error = OPUS_UNIMPLEMENTED; - return NULL; - } - st = (OpusMSEncoder *)opus_alloc(size); - if (st==NULL) - { - if (error) - *error = OPUS_ALLOC_FAIL; - return NULL; - } - ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application); - if (ret != OPUS_OK) - { - opus_free(st); - st = NULL; - } - if (error) - *error = ret; - return st; -} - -static void surround_rate_allocation( - OpusMSEncoder *st, - opus_int32 *rate, - int frame_size, - opus_int32 Fs - ) -{ - int i; - opus_int32 channel_rate; - int stream_offset; - int lfe_offset; - int coupled_ratio; /* Q8 */ - int lfe_ratio; /* Q8 */ - - if (st->bitrate_bps > st->layout.nb_channels*40000) - stream_offset = 20000; - else - stream_offset = st->bitrate_bps/st->layout.nb_channels/2; - stream_offset += 60*(Fs/frame_size-50); - /* We start by giving each stream (coupled or uncoupled) the same bitrate. - This models the main saving of coupled channels over uncoupled. */ - /* The LFE stream is an exception to the above and gets fewer bits. */ - lfe_offset = 3500 + 60*(Fs/frame_size-50); - /* Coupled streams get twice the mono rate after the first 20 kb/s. */ - coupled_ratio = 512; - /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */ - lfe_ratio = 32; - - /* Compute bitrate allocation between streams */ - if (st->bitrate_bps==OPUS_AUTO) - { - channel_rate = Fs+60*Fs/frame_size; - } else if (st->bitrate_bps==OPUS_BITRATE_MAX) - { - channel_rate = 300000; - } else { - int nb_lfe; - int nb_uncoupled; - int nb_coupled; - int total; - nb_lfe = (st->lfe_stream!=-1); - nb_coupled = st->layout.nb_coupled_streams; - nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe; - total = (nb_uncoupled<<8) /* mono */ - + coupled_ratio*nb_coupled /* stereo */ - + nb_lfe*lfe_ratio; - channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total; - } -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus; - bonus = 60*(Fs/frame_size-50); - channel_rate += bonus; - } -#endif - - for (i=0;i<st->layout.nb_streams;i++) - { - if (i<st->layout.nb_coupled_streams) - rate[i] = stream_offset+(channel_rate*coupled_ratio>>8); - else if (i!=st->lfe_stream) - rate[i] = stream_offset+channel_rate; - else - rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); - } -} - -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS -static void ambisonics_rate_allocation( - OpusMSEncoder *st, - opus_int32 *rate, - int frame_size, - opus_int32 Fs - ) -{ - int i; - int non_mono_rate; - int total_rate; - - /* The mono channel gets (rate_ratio_num / rate_ratio_den) times as many bits - * as all other channels */ - const int rate_ratio_num = 4; - const int rate_ratio_den = 3; - const int num_channels = st->layout.nb_streams; - - if (st->bitrate_bps==OPUS_AUTO) - { - total_rate = num_channels * (20000 + st->layout.nb_streams*(Fs+60*Fs/frame_size)); - } else if (st->bitrate_bps==OPUS_BITRATE_MAX) - { - total_rate = num_channels * 320000; - } else { - total_rate = st->bitrate_bps; - } - - /* Let y be the non-mono rate and let p, q be integers such that the mono - * channel rate is (p/q) * y. - * Also let T be the total bitrate to allocate. Then - * (n - 1) y + (p/q) y = T - * y = (T q) / (qn - q + p) - */ - non_mono_rate = - total_rate * rate_ratio_den - / (rate_ratio_den*num_channels + rate_ratio_num - rate_ratio_den); - -#ifndef FIXED_POINT - if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) - { - opus_int32 bonus = 60*(Fs/frame_size-50); - non_mono_rate += bonus; - } -#endif - - rate[0] = total_rate - (num_channels - 1) * non_mono_rate; - for (i=1;i<st->layout.nb_streams;i++) - { - rate[i] = non_mono_rate; - } -} -#endif /* ENABLE_EXPERIMENTAL_AMBISONICS */ - -static opus_int32 rate_allocation( - OpusMSEncoder *st, - opus_int32 *rate, - int frame_size - ) -{ - int i; - opus_int32 rate_sum=0; - opus_int32 Fs; - char *ptr; - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); - -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - if (st->mapping_type == MAPPING_TYPE_AMBISONICS) { - ambisonics_rate_allocation(st, rate, frame_size, Fs); - } else -#endif - { - surround_rate_allocation(st, rate, frame_size, Fs); - } - - for (i=0;i<st->layout.nb_streams;i++) - { - rate[i] = IMAX(rate[i], 500); - rate_sum += rate[i]; - } - return rate_sum; -} - -/* Max size in case the encoder decides to return three frames */ -#define MS_FRAME_TMP (3*1275+7) -static int opus_multistream_encode_native -( - OpusMSEncoder *st, - opus_copy_channel_in_func copy_channel_in, - const void *pcm, - int analysis_frame_size, - unsigned char *data, - opus_int32 max_data_bytes, - int lsb_depth, - downmix_func downmix, - int float_api -) -{ - opus_int32 Fs; - int coupled_size; - int mono_size; - int s; - char *ptr; - int tot_size; - VARDECL(opus_val16, buf); - VARDECL(opus_val16, bandSMR); - unsigned char tmp_data[MS_FRAME_TMP]; - OpusRepacketizer rp; - opus_int32 vbr; - const CELTMode *celt_mode; - opus_int32 bitrates[256]; - opus_val16 bandLogE[42]; - opus_val32 *mem = NULL; - opus_val32 *preemph_mem=NULL; - int frame_size; - opus_int32 rate_sum; - opus_int32 smallest_packet; - ALLOC_STACK; - - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - preemph_mem = ms_get_preemph_mem(st); - mem = ms_get_window_mem(st); - } - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr)); - opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode)); - - { - opus_int32 delay_compensation; - int channels; - - channels = st->layout.nb_streams + st->layout.nb_coupled_streams; - opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); - delay_compensation -= Fs/400; - frame_size = compute_frame_size(pcm, analysis_frame_size, - st->variable_duration, channels, Fs, st->bitrate_bps, - delay_compensation, downmix -#ifndef DISABLE_FLOAT_API - , st->subframe_mem -#endif - ); - } - - if (400*frame_size < Fs) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - /* Validate frame_size before using it to allocate stack space. - This mirrors the checks in opus_encode[_float](). */ - if (400*frame_size != Fs && 200*frame_size != Fs && - 100*frame_size != Fs && 50*frame_size != Fs && - 25*frame_size != Fs && 50*frame_size != 3*Fs) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } - - /* Smallest packet the encoder can produce. */ - smallest_packet = st->layout.nb_streams*2-1; - if (max_data_bytes < smallest_packet) - { - RESTORE_STACK; - return OPUS_BUFFER_TOO_SMALL; - } - ALLOC(buf, 2*frame_size, opus_val16); - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - - ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch); - } - - /* Compute bitrate allocation between streams (this could be a lot better) */ - rate_sum = rate_allocation(st, bitrates, frame_size); - - if (!vbr) - { - if (st->bitrate_bps == OPUS_AUTO) - { - max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size)); - } else if (st->bitrate_bps != OPUS_BITRATE_MAX) - { - max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet, - 3*st->bitrate_bps/(3*8*Fs/frame_size))); - } - } - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;s<st->layout.nb_streams;s++) - { - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - opus_int32 equiv_rate; - equiv_rate = st->bitrate_bps; - if (frame_size*50 < Fs) - equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels; - if (equiv_rate > 10000*st->layout.nb_channels) - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); - else if (equiv_rate > 7000*st->layout.nb_channels) - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); - else if (equiv_rate > 5000*st->layout.nb_channels) - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); - else - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); - if (s < st->layout.nb_coupled_streams) - { - /* To preserve the spatial image, force stereo CELT on coupled streams */ - opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); - } - } -#ifdef ENABLE_EXPERIMENTAL_AMBISONICS - else if (st->mapping_type == MAPPING_TYPE_AMBISONICS) { - opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - } -#endif - } - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - /* Counting ToC */ - tot_size = 0; - for (s=0;s<st->layout.nb_streams;s++) - { - OpusEncoder *enc; - int len; - int curr_max; - int c1, c2; - int ret; - - opus_repacketizer_init(&rp); - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - { - int i; - int left, right; - left = get_left_channel(&st->layout, s, -1); - right = get_right_channel(&st->layout, s, -1); - (*copy_channel_in)(buf, 2, - pcm, st->layout.nb_channels, left, frame_size); - (*copy_channel_in)(buf+1, 2, - pcm, st->layout.nb_channels, right, frame_size); - ptr += align(coupled_size); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - for (i=0;i<21;i++) - { - bandLogE[i] = bandSMR[21*left+i]; - bandLogE[21+i] = bandSMR[21*right+i]; - } - } - c1 = left; - c2 = right; - } else { - int i; - int chan = get_mono_channel(&st->layout, s, -1); - (*copy_channel_in)(buf, 1, - pcm, st->layout.nb_channels, chan, frame_size); - ptr += align(mono_size); - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - for (i=0;i<21;i++) - bandLogE[i] = bandSMR[21*chan+i]; - } - c1 = chan; - c2 = -1; - } - if (st->mapping_type == MAPPING_TYPE_SURROUND) - opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); - /* number of bytes left (+Toc) */ - curr_max = max_data_bytes - tot_size; - /* Reserve one byte for the last stream and two for the others */ - curr_max -= IMAX(0,2*(st->layout.nb_streams-s-1)-1); - curr_max = IMIN(curr_max,MS_FRAME_TMP); - /* Repacketizer will add one or two bytes for self-delimited frames */ - if (s != st->layout.nb_streams-1) curr_max -= curr_max>253 ? 2 : 1; - if (!vbr && s == st->layout.nb_streams-1) - opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size))); - len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, - pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix, float_api); - if (len<0) - { - RESTORE_STACK; - return len; - } - /* We need to use the repacketizer to add the self-delimiting lengths - while taking into account the fact that the encoder can now return - more than one frame at a time (e.g. 60 ms CELT-only) */ - ret = opus_repacketizer_cat(&rp, tmp_data, len); - /* If the opus_repacketizer_cat() fails, then something's seriously wrong - with the encoder. */ - if (ret != OPUS_OK) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp), - data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1); - data += len; - tot_size += len; - } - /*printf("\n");*/ - RESTORE_STACK; - return tot_size; -} - -#if !defined(DISABLE_FLOAT_API) -static void opus_copy_channel_in_float( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -) -{ - const float *float_src; - opus_int32 i; - float_src = (const float *)src; - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - dst[i*dst_stride] = FLOAT2INT16(float_src[i*src_stride+src_channel]); -#else - dst[i*dst_stride] = float_src[i*src_stride+src_channel]; -#endif -} -#endif - -static void opus_copy_channel_in_short( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -) -{ - const opus_int16 *short_src; - opus_int32 i; - short_src = (const opus_int16 *)src; - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - dst[i*dst_stride] = short_src[i*src_stride+src_channel]; -#else - dst[i*dst_stride] = (1/32768.f)*short_src[i*src_stride+src_channel]; -#endif -} - - -#ifdef FIXED_POINT -int opus_multistream_encode( - OpusMSEncoder *st, - const opus_val16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) -{ - return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); -} - -#ifndef DISABLE_FLOAT_API -int opus_multistream_encode_float( - OpusMSEncoder *st, - const float *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) -{ - return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16, downmix_float, 1); -} -#endif - -#else - -int opus_multistream_encode_float -( - OpusMSEncoder *st, - const opus_val16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) -{ - return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float, 1); -} - -int opus_multistream_encode( - OpusMSEncoder *st, - const opus_int16 *pcm, - int frame_size, - unsigned char *data, - opus_int32 max_data_bytes -) -{ - return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, 0); -} -#endif - -int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) -{ - va_list ap; - int coupled_size, mono_size; - char *ptr; - int ret = OPUS_OK; - - va_start(ap, request); - - coupled_size = opus_encoder_get_size(2); - mono_size = opus_encoder_get_size(1); - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - switch (request) - { - case OPUS_SET_BITRATE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX) - { - goto bad_arg; - } - st->bitrate_bps = value; - } - break; - case OPUS_GET_BITRATE_REQUEST: - { - int s; - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = 0; - for (s=0;s<st->layout.nb_streams;s++) - { - opus_int32 rate; - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - opus_encoder_ctl(enc, request, &rate); - *value += rate; - } - } - break; - case OPUS_GET_LSB_DEPTH_REQUEST: - case OPUS_GET_VBR_REQUEST: - case OPUS_GET_APPLICATION_REQUEST: - case OPUS_GET_BANDWIDTH_REQUEST: - case OPUS_GET_COMPLEXITY_REQUEST: - case OPUS_GET_PACKET_LOSS_PERC_REQUEST: - case OPUS_GET_DTX_REQUEST: - case OPUS_GET_VOICE_RATIO_REQUEST: - case OPUS_GET_VBR_CONSTRAINT_REQUEST: - case OPUS_GET_SIGNAL_REQUEST: - case OPUS_GET_LOOKAHEAD_REQUEST: - case OPUS_GET_SAMPLE_RATE_REQUEST: - case OPUS_GET_INBAND_FEC_REQUEST: - case OPUS_GET_FORCE_CHANNELS_REQUEST: - case OPUS_GET_PREDICTION_DISABLED_REQUEST: - { - OpusEncoder *enc; - /* For int32* GET params, just query the first stream */ - opus_int32 *value = va_arg(ap, opus_int32*); - enc = (OpusEncoder*)ptr; - ret = opus_encoder_ctl(enc, request, value); - } - break; - case OPUS_GET_FINAL_RANGE_REQUEST: - { - int s; - opus_uint32 *value = va_arg(ap, opus_uint32*); - opus_uint32 tmp; - if (!value) - { - goto bad_arg; - } - *value=0; - for (s=0;s<st->layout.nb_streams;s++) - { - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_encoder_ctl(enc, request, &tmp); - if (ret != OPUS_OK) break; - *value ^= tmp; - } - } - break; - case OPUS_SET_LSB_DEPTH_REQUEST: - case OPUS_SET_COMPLEXITY_REQUEST: - case OPUS_SET_VBR_REQUEST: - case OPUS_SET_VBR_CONSTRAINT_REQUEST: - case OPUS_SET_MAX_BANDWIDTH_REQUEST: - case OPUS_SET_BANDWIDTH_REQUEST: - case OPUS_SET_SIGNAL_REQUEST: - case OPUS_SET_APPLICATION_REQUEST: - case OPUS_SET_INBAND_FEC_REQUEST: - case OPUS_SET_PACKET_LOSS_PERC_REQUEST: - case OPUS_SET_DTX_REQUEST: - case OPUS_SET_FORCE_MODE_REQUEST: - case OPUS_SET_FORCE_CHANNELS_REQUEST: - case OPUS_SET_PREDICTION_DISABLED_REQUEST: - { - int s; - /* This works for int32 params */ - opus_int32 value = va_arg(ap, opus_int32); - for (s=0;s<st->layout.nb_streams;s++) - { - OpusEncoder *enc; - - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_encoder_ctl(enc, request, value); - if (ret != OPUS_OK) - break; - } - } - break; - case OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST: - { - int s; - opus_int32 stream_id; - OpusEncoder **value; - stream_id = va_arg(ap, opus_int32); - if (stream_id<0 || stream_id >= st->layout.nb_streams) - ret = OPUS_BAD_ARG; - value = va_arg(ap, OpusEncoder**); - if (!value) - { - goto bad_arg; - } - for (s=0;s<stream_id;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - *value = (OpusEncoder*)ptr; - } - break; - case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->variable_duration = value; - } - break; - case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: - { - opus_int32 *value = va_arg(ap, opus_int32*); - if (!value) - { - goto bad_arg; - } - *value = st->variable_duration; - } - break; - case OPUS_RESET_STATE: - { - int s; - st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0; - if (st->mapping_type == MAPPING_TYPE_SURROUND) - { - OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels); - OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120); - } - for (s=0;s<st->layout.nb_streams;s++) - { - OpusEncoder *enc; - enc = (OpusEncoder*)ptr; - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - ret = opus_encoder_ctl(enc, OPUS_RESET_STATE); - if (ret != OPUS_OK) - break; - } - } - break; - default: - ret = OPUS_UNIMPLEMENTED; - break; - } - - va_end(ap); - return ret; -bad_arg: - va_end(ap); - return OPUS_BAD_ARG; -} - -void opus_multistream_encoder_destroy(OpusMSEncoder *st) -{ - opus_free(st); -} diff --git a/thirdparty/opus/opus_private.h b/thirdparty/opus/opus_private.h deleted file mode 100644 index 3b62eed096..0000000000 --- a/thirdparty/opus/opus_private.h +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2012 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#ifndef OPUS_PRIVATE_H -#define OPUS_PRIVATE_H - -#include "arch.h" -#include "opus.h" -#include "celt.h" - -#include <stddef.h> /* offsetof */ - -struct OpusRepacketizer { - unsigned char toc; - int nb_frames; - const unsigned char *frames[48]; - opus_int16 len[48]; - int framesize; -}; - -typedef struct ChannelLayout { - int nb_channels; - int nb_streams; - int nb_coupled_streams; - unsigned char mapping[256]; -} ChannelLayout; - -int validate_layout(const ChannelLayout *layout); -int get_left_channel(const ChannelLayout *layout, int stream_id, int prev); -int get_right_channel(const ChannelLayout *layout, int stream_id, int prev); -int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); - - - -#define MODE_SILK_ONLY 1000 -#define MODE_HYBRID 1001 -#define MODE_CELT_ONLY 1002 - -#define OPUS_SET_VOICE_RATIO_REQUEST 11018 -#define OPUS_GET_VOICE_RATIO_REQUEST 11019 - -/** Configures the encoder's expected percentage of voice - * opposed to music or other signals. - * - * @note This interface is currently more aspiration than actuality. It's - * ultimately expected to bias an automatic signal classifier, but it currently - * just shifts the static bitrate to mode mapping around a little bit. - * - * @param[in] x <tt>int</tt>: Voice percentage in the range 0-100, inclusive. - * @hideinitializer */ -#define OPUS_SET_VOICE_RATIO(x) OPUS_SET_VOICE_RATIO_REQUEST, __opus_check_int(x) -/** Gets the encoder's configured voice ratio value, @see OPUS_SET_VOICE_RATIO - * - * @param[out] x <tt>int*</tt>: Voice percentage in the range 0-100, inclusive. - * @hideinitializer */ -#define OPUS_GET_VOICE_RATIO(x) OPUS_GET_VOICE_RATIO_REQUEST, __opus_check_int_ptr(x) - - -#define OPUS_SET_FORCE_MODE_REQUEST 11002 -#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) - -typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); -void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); -void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); - -int encode_size(int size, unsigned char *data); - -opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); - -opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, - int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, downmix_func downmix -#ifndef DISABLE_FLOAT_API - , float *subframe_mem -#endif - ); - -opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, - int analysis_channels, downmix_func downmix, int float_api); - -int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, - opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, - opus_int32 *packet_offset, int soft_clip); - -/* Make sure everything is properly aligned. */ -static OPUS_INLINE int align(int i) -{ - struct foo {char c; union { void* p; opus_int32 i; opus_val32 v; } u;}; - - unsigned int alignment = offsetof(struct foo, u); - - /* Optimizing compilers should optimize div and multiply into and - for all sensible alignment values. */ - return ((i + alignment - 1) / alignment) * alignment; -} - -int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, - int self_delimited, unsigned char *out_toc, - const unsigned char *frames[48], opus_int16 size[48], - int *payload_offset, opus_int32 *packet_offset); - -opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, - unsigned char *data, opus_int32 maxlen, int self_delimited, int pad); - -int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len); - -#endif /* OPUS_PRIVATE_H */ diff --git a/thirdparty/opus/opusfile.c b/thirdparty/opus/opusfile.c deleted file mode 100644 index b8b3a354cf..0000000000 --- a/thirdparty/opus/opusfile.c +++ /dev/null @@ -1,3266 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $ - - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <limits.h> -#include <string.h> -#include <math.h> - -#include "opusfile.h" - -/*This implementation is largely based off of libvorbisfile. - All of the Ogg bits work roughly the same, though I have made some - "improvements" that have not been folded back there, yet.*/ - -/*A 'chained bitstream' is an Ogg Opus bitstream that contains more than one - logical bitstream arranged end to end (the only form of Ogg multiplexing - supported by this library. - Grouping (parallel multiplexing) is not supported, except to the extent that - if there are multiple logical Ogg streams in a single link of the chain, we - will ignore all but the first Opus stream we find.*/ - -/*An Ogg Opus file can be played beginning to end (streamed) without worrying - ahead of time about chaining (see opusdec from the opus-tools package). - If we have the whole file, however, and want random access - (seeking/scrubbing) or desire to know the total length/time of a file, we - need to account for the possibility of chaining.*/ - -/*We can handle things a number of ways. - We can determine the entire bitstream structure right off the bat, or find - pieces on demand. - This library determines and caches structure for the entire bitstream, but - builds a virtual decoder on the fly when moving between links in the chain.*/ - -/*There are also different ways to implement seeking. - Enough information exists in an Ogg bitstream to seek to sample-granularity - positions in the output. - Or, one can seek by picking some portion of the stream roughly in the desired - area if we only want coarse navigation through the stream. - We implement and expose both strategies.*/ - -/*The maximum number of bytes in a page (including the page headers).*/ -#define OP_PAGE_SIZE_MAX (65307) -/*The default amount to seek backwards per step when trying to find the - previous page. - This must be at least as large as the maximum size of a page.*/ -#define OP_CHUNK_SIZE (65536) -/*The maximum amount to seek backwards per step when trying to find the - previous page.*/ -#define OP_CHUNK_SIZE_MAX (1024*(opus_int32)1024) -/*A smaller read size is needed for low-rate streaming.*/ -#define OP_READ_SIZE (2048) - -int op_test(OpusHead *_head, - const unsigned char *_initial_data,size_t _initial_bytes){ - ogg_sync_state oy; - char *data; - int err; - /*The first page of a normal Opus file will be at most 57 bytes (27 Ogg - page header bytes + 1 lacing value + 21 Opus header bytes + 8 channel - mapping bytes). - It will be at least 47 bytes (27 Ogg page header bytes + 1 lacing value + - 19 Opus header bytes using channel mapping family 0). - If we don't have at least that much data, give up now.*/ - if(_initial_bytes<47)return OP_FALSE; - /*Only proceed if we start with the magic OggS string. - This is to prevent us spending a lot of time allocating memory and looking - for Ogg pages in non-Ogg files.*/ - if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT; - ogg_sync_init(&oy); - data=ogg_sync_buffer(&oy,_initial_bytes); - if(data!=NULL){ - ogg_stream_state os; - ogg_page og; - int ret; - memcpy(data,_initial_data,_initial_bytes); - ogg_sync_wrote(&oy,_initial_bytes); - ogg_stream_init(&os,-1); - err=OP_FALSE; - do{ - ogg_packet op; - ret=ogg_sync_pageout(&oy,&og); - /*Ignore holes.*/ - if(ret<0)continue; - /*Stop if we run out of data.*/ - if(!ret)break; - ogg_stream_reset_serialno(&os,ogg_page_serialno(&og)); - ogg_stream_pagein(&os,&og); - /*Only process the first packet on this page (if it's a BOS packet, - it's required to be the only one).*/ - if(ogg_stream_packetout(&os,&op)==1){ - if(op.b_o_s){ - ret=opus_head_parse(_head,op.packet,op.bytes); - /*If this didn't look like Opus, keep going.*/ - if(ret==OP_ENOTFORMAT)continue; - /*Otherwise we're done, one way or another.*/ - err=ret; - } - /*We finished parsing the headers. - There is no Opus to be found.*/ - else err=OP_ENOTFORMAT; - } - } - while(err==OP_FALSE); - ogg_stream_clear(&os); - } - else err=OP_EFAULT; - ogg_sync_clear(&oy); - return err; -} - -/*Many, many internal helpers. - The intention is not to be confusing. - Rampant duplication and monolithic function implementation (though we do have - some large, omnibus functions still) would be harder to understand anyway. - The high level functions are last. - Begin grokking near the end of the file if you prefer to read things - top-down.*/ - -/*The read/seek functions track absolute position within the stream.*/ - -/*Read a little more data from the file/pipe into the ogg_sync framer. - _nbytes: The maximum number of bytes to read. - Return: A positive number of bytes read on success, 0 on end-of-file, or a - negative value on failure.*/ -static int op_get_data(OggOpusFile *_of,int _nbytes){ - unsigned char *buffer; - int nbytes; - OP_ASSERT(_nbytes>0); - buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes); - nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes); - OP_ASSERT(nbytes<=_nbytes); - if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes); - return nbytes; -} - -/*Save a tiny smidge of verbosity to make the code more readable.*/ -static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){ - if(_offset==_of->offset)return 0; - if(_of->callbacks.seek==NULL - ||(*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){ - return OP_EREAD; - } - _of->offset=_offset; - ogg_sync_reset(&_of->oy); - return 0; -} - -/*Get the current position indicator of the underlying source. - This should be the same as the value reported by tell().*/ -static opus_int64 op_position(const OggOpusFile *_of){ - /*The current position indicator is _not_ simply offset. - We may also have unprocessed, buffered data in the sync state.*/ - return _of->offset+_of->oy.fill-_of->oy.returned; -} - -/*From the head of the stream, get the next page. - _boundary specifies if the function is allowed to fetch more data from the - stream (and how much) or only use internally buffered data. - _boundary: -1: Unbounded search. - 0: Read no additional data. - Use only cached data. - n: Search for the start of a new page up to file position n. - Return: n>=0: Found a page at absolute offset n. - OP_FALSE: Hit the _boundary limit. - OP_EREAD: An underlying read operation failed. - OP_BADLINK: We hit end-of-file before reaching _boundary.*/ -static opus_int64 op_get_next_page(OggOpusFile *_of,ogg_page *_og, - opus_int64 _boundary){ - while(_boundary<=0||_of->offset<_boundary){ - int more; - more=ogg_sync_pageseek(&_of->oy,_og); - /*Skipped (-more) bytes.*/ - if(OP_UNLIKELY(more<0))_of->offset-=more; - else if(more==0){ - int read_nbytes; - int ret; - /*Send more paramedics.*/ - if(!_boundary)return OP_FALSE; - if(_boundary<0)read_nbytes=OP_READ_SIZE; - else{ - opus_int64 position; - position=op_position(_of); - if(position>=_boundary)return OP_FALSE; - read_nbytes=(int)OP_MIN(_boundary-position,OP_READ_SIZE); - } - ret=op_get_data(_of,read_nbytes); - if(OP_UNLIKELY(ret<0))return OP_EREAD; - if(OP_UNLIKELY(ret==0)){ - /*Only fail cleanly on EOF if we didn't have a known boundary. - Otherwise, we should have been able to reach that boundary, and this - is a fatal error.*/ - return OP_UNLIKELY(_boundary<0)?OP_FALSE:OP_EBADLINK; - } - } - else{ - /*Got a page. - Return the page start offset and advance the internal offset past the - page end.*/ - opus_int64 page_offset; - page_offset=_of->offset; - _of->offset+=more; - OP_ASSERT(page_offset>=0); - return page_offset; - } - } - return OP_FALSE; -} - -static int op_add_serialno(const ogg_page *_og, - ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){ - ogg_uint32_t *serialnos; - int nserialnos; - int cserialnos; - ogg_uint32_t s; - s=ogg_page_serialno(_og); - serialnos=*_serialnos; - nserialnos=*_nserialnos; - cserialnos=*_cserialnos; - if(OP_UNLIKELY(nserialnos>=cserialnos)){ - if(OP_UNLIKELY(cserialnos>INT_MAX/(int)sizeof(*serialnos)-1>>1)){ - return OP_EFAULT; - } - cserialnos=2*cserialnos+1; - OP_ASSERT(nserialnos<cserialnos); - serialnos=(ogg_uint32_t *)_ogg_realloc(serialnos, - sizeof(*serialnos)*cserialnos); - if(OP_UNLIKELY(serialnos==NULL))return OP_EFAULT; - } - serialnos[nserialnos++]=s; - *_serialnos=serialnos; - *_nserialnos=nserialnos; - *_cserialnos=cserialnos; - return 0; -} - -/*Returns nonzero if found.*/ -static int op_lookup_serialno(ogg_uint32_t _s, - const ogg_uint32_t *_serialnos,int _nserialnos){ - int i; - for(i=0;i<_nserialnos&&_serialnos[i]!=_s;i++); - return i<_nserialnos; -} - -static int op_lookup_page_serialno(const ogg_page *_og, - const ogg_uint32_t *_serialnos,int _nserialnos){ - return op_lookup_serialno(ogg_page_serialno(_og),_serialnos,_nserialnos); -} - -typedef struct OpusSeekRecord OpusSeekRecord; - -/*We use this to remember the pages we found while enumerating the links of a - chained stream. - We keep track of the starting and ending offsets, as well as the point we - started searching from, so we know where to bisect. - We also keep the serial number, so we can tell if the page belonged to the - current link or not, as well as the granule position, to aid in estimating - the start of the link.*/ -struct OpusSeekRecord{ - /*The earliest byte we know of such that reading forward from it causes - capture to be regained at this page.*/ - opus_int64 search_start; - /*The offset of this page.*/ - opus_int64 offset; - /*The size of this page.*/ - opus_int32 size; - /*The serial number of this page.*/ - ogg_uint32_t serialno; - /*The granule position of this page.*/ - ogg_int64_t gp; -}; - -/*Find the last page beginning before _offset with a valid granule position. - There is no '_boundary' parameter as it will always have to read more data. - This is much dirtier than the above, as Ogg doesn't have any backward search - linkage. - This search prefers pages of the specified serial number. - If a page of the specified serial number is spotted during the - seek-back-and-read-forward, it will return the info of last page of the - matching serial number, instead of the very last page, unless the very last - page belongs to a different link than preferred serial number. - If no page of the specified serial number is seen, it will return the info of - the last page. - [out] _sr: Returns information about the page that was found on success. - _offset: The _offset before which to find a page. - Any page returned will consist of data entirely before _offset. - _serialno: The preferred serial number. - If a page with this serial number is found, it will be returned - even if another page in the same link is found closer to - _offset. - This is purely opportunistic: there is no guarantee such a page - will be found if it exists. - _serialnos: The list of serial numbers in the link that contains the - preferred serial number. - _nserialnos: The number of serial numbers in the current link. - Return: 0 on success, or a negative value on failure. - OP_EREAD: Failed to read more data (error or EOF). - OP_EBADLINK: We couldn't find a page even after seeking back to the - start of the stream.*/ -static int op_get_prev_page_serial(OggOpusFile *_of,OpusSeekRecord *_sr, - opus_int64 _offset,ogg_uint32_t _serialno, - const ogg_uint32_t *_serialnos,int _nserialnos){ - OpusSeekRecord preferred_sr; - ogg_page og; - opus_int64 begin; - opus_int64 end; - opus_int64 original_end; - opus_int32 chunk_size; - int preferred_found; - original_end=end=begin=_offset; - preferred_found=0; - _offset=-1; - chunk_size=OP_CHUNK_SIZE; - do{ - opus_int64 search_start; - int ret; - OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX); - begin=OP_MAX(begin-chunk_size,0); - ret=op_seek_helper(_of,begin); - if(OP_UNLIKELY(ret<0))return ret; - search_start=begin; - while(_of->offset<end){ - opus_int64 llret; - ogg_uint32_t serialno; - llret=op_get_next_page(_of,&og,end); - if(OP_UNLIKELY(llret<OP_FALSE))return (int)llret; - else if(llret==OP_FALSE)break; - serialno=ogg_page_serialno(&og); - /*Save the information for this page. - We're not interested in the page itself... just the serial number, byte - offset, page size, and granule position.*/ - _sr->search_start=search_start; - _sr->offset=_offset=llret; - _sr->serialno=serialno; - OP_ASSERT(_of->offset-_offset>=0); - OP_ASSERT(_of->offset-_offset<=OP_PAGE_SIZE_MAX); - _sr->size=(opus_int32)(_of->offset-_offset); - _sr->gp=ogg_page_granulepos(&og); - /*If this page is from the stream we're looking for, remember it.*/ - if(serialno==_serialno){ - preferred_found=1; - *&preferred_sr=*_sr; - } - if(!op_lookup_serialno(serialno,_serialnos,_nserialnos)){ - /*We fell off the end of the link, which means we seeked back too far - and shouldn't have been looking in that link to begin with. - If we found the preferred serial number, forget that we saw it.*/ - preferred_found=0; - } - search_start=llret+1; - } - /*We started from the beginning of the stream and found nothing. - This should be impossible unless the contents of the source changed out - from under us after we read from it.*/ - if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK; - /*Bump up the chunk size. - This is mildly helpful when seeks are very expensive (http).*/ - chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); - /*Avoid quadratic complexity if we hit an invalid patch of the file.*/ - end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end); - } - while(_offset<0); - if(preferred_found)*_sr=*&preferred_sr; - return 0; -} - -/*Find the last page beginning before _offset with the given serial number and - a valid granule position. - Unlike the above search, this continues until it finds such a page, but does - not stray outside the current link. - We could implement it (inefficiently) by calling op_get_prev_page_serial() - repeatedly until it returned a page that had both our preferred serial - number and a valid granule position, but doing it with a separate function - allows us to avoid repeatedly re-scanning valid pages from other streams as - we seek-back-and-read-forward. - [out] _gp: Returns the granule position of the page that was found on - success. - _offset: The _offset before which to find a page. - Any page returned will consist of data entirely before _offset. - _serialno: The target serial number. - _serialnos: The list of serial numbers in the link that contains the - preferred serial number. - _nserialnos: The number of serial numbers in the current link. - Return: The offset of the page on success, or a negative value on failure. - OP_EREAD: Failed to read more data (error or EOF). - OP_EBADLINK: We couldn't find a page even after seeking back past the - beginning of the link.*/ -static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp, - opus_int64 _offset,ogg_uint32_t _serialno, - const ogg_uint32_t *_serialnos,int _nserialnos){ - ogg_page og; - ogg_int64_t gp; - opus_int64 begin; - opus_int64 end; - opus_int64 original_end; - opus_int32 chunk_size; - /*The target serial number must belong to the current link.*/ - OP_ASSERT(op_lookup_serialno(_serialno,_serialnos,_nserialnos)); - original_end=end=begin=_offset; - _offset=-1; - /*We shouldn't have to initialize gp, but gcc is too dumb to figure out that - ret>=0 implies we entered the if(page_gp!=-1) block at least once.*/ - gp=-1; - chunk_size=OP_CHUNK_SIZE; - do{ - int left_link; - int ret; - OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX); - begin=OP_MAX(begin-chunk_size,0); - ret=op_seek_helper(_of,begin); - if(OP_UNLIKELY(ret<0))return ret; - left_link=0; - while(_of->offset<end){ - opus_int64 llret; - ogg_uint32_t serialno; - llret=op_get_next_page(_of,&og,end); - if(OP_UNLIKELY(llret<OP_FALSE))return llret; - else if(llret==OP_FALSE)break; - serialno=ogg_page_serialno(&og); - if(serialno==_serialno){ - ogg_int64_t page_gp; - /*The page is from the right stream...*/ - page_gp=ogg_page_granulepos(&og); - if(page_gp!=-1){ - /*And has a valid granule position. - Let's remember it.*/ - _offset=llret; - gp=page_gp; - } - } - else if(OP_UNLIKELY(!op_lookup_serialno(serialno, - _serialnos,_nserialnos))){ - /*We fell off the start of the link, which means we don't need to keep - seeking any farther back.*/ - left_link=1; - } - } - /*We started from at or before the beginning of the link and found nothing. - This should be impossible unless the contents of the source changed out - from under us after we read from it.*/ - if((OP_UNLIKELY(left_link)||OP_UNLIKELY(!begin))&&OP_UNLIKELY(_offset<0)){ - return OP_EBADLINK; - } - /*Bump up the chunk size. - This is mildly helpful when seeks are very expensive (http).*/ - chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); - /*Avoid quadratic complexity if we hit an invalid patch of the file.*/ - end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end); - } - while(_offset<0); - *_gp=gp; - return _offset; -} - -/*Uses the local ogg_stream storage in _of. - This is important for non-streaming input sources.*/ -static int op_fetch_headers_impl(OggOpusFile *_of,OpusHead *_head, - OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos, - int *_cserialnos,ogg_page *_og){ - ogg_packet op; - int ret; - if(_serialnos!=NULL)*_nserialnos=0; - /*Extract the serialnos of all BOS pages plus the first set of Opus headers - we see in the link.*/ - while(ogg_page_bos(_og)){ - if(_serialnos!=NULL){ - if(OP_UNLIKELY(op_lookup_page_serialno(_og,*_serialnos,*_nserialnos))){ - /*A dupe serialnumber in an initial header packet set==invalid stream.*/ - return OP_EBADHEADER; - } - ret=op_add_serialno(_og,_serialnos,_nserialnos,_cserialnos); - if(OP_UNLIKELY(ret<0))return ret; - } - if(_of->ready_state<OP_STREAMSET){ - /*We don't have an Opus stream in this link yet, so begin prospective - stream setup. - We need a stream to get packets.*/ - ogg_stream_reset_serialno(&_of->os,ogg_page_serialno(_og)); - ogg_stream_pagein(&_of->os,_og); - if(OP_LIKELY(ogg_stream_packetout(&_of->os,&op)>0)){ - ret=opus_head_parse(_head,op.packet,op.bytes); - /*Found a valid Opus header. - Continue setup.*/ - if(OP_LIKELY(ret>=0))_of->ready_state=OP_STREAMSET; - /*If it's just a stream type we don't recognize, ignore it. - Everything else is fatal.*/ - else if(ret!=OP_ENOTFORMAT)return ret; - } - /*TODO: Should a BOS page with no packets be an error?*/ - } - /*Get the next page. - No need to clamp the boundary offset against _of->end, as all errors - become OP_ENOTFORMAT or OP_EBADHEADER.*/ - if(OP_UNLIKELY(op_get_next_page(_of,_og, - OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ - return _of->ready_state<OP_STREAMSET?OP_ENOTFORMAT:OP_EBADHEADER; - } - } - if(OP_UNLIKELY(_of->ready_state!=OP_STREAMSET))return OP_ENOTFORMAT; - /*If the first non-header page belonged to our Opus stream, submit it.*/ - if(_of->os.serialno==ogg_page_serialno(_og))ogg_stream_pagein(&_of->os,_og); - /*Loop getting packets.*/ - for(;;){ - switch(ogg_stream_packetout(&_of->os,&op)){ - case 0:{ - /*Loop getting pages.*/ - for(;;){ - /*No need to clamp the boundary offset against _of->end, as all - errors become OP_EBADHEADER.*/ - if(OP_UNLIKELY(op_get_next_page(_of,_og, - OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ - return OP_EBADHEADER; - } - /*If this page belongs to the correct stream, go parse it.*/ - if(_of->os.serialno==ogg_page_serialno(_og)){ - ogg_stream_pagein(&_of->os,_og); - break; - } - /*If the link ends before we see the Opus comment header, abort.*/ - if(OP_UNLIKELY(ogg_page_bos(_og)))return OP_EBADHEADER; - /*Otherwise, keep looking.*/ - } - }break; - /*We shouldn't get a hole in the headers!*/ - case -1:return OP_EBADHEADER; - default:{ - /*Got a packet. - It should be the comment header.*/ - ret=opus_tags_parse(_tags,op.packet,op.bytes); - if(OP_UNLIKELY(ret<0))return ret; - /*Make sure the page terminated at the end of the comment header. - If there is another packet on the page, or part of a packet, then - reject the stream. - Otherwise seekable sources won't be able to seek back to the start - properly.*/ - ret=ogg_stream_packetout(&_of->os,&op); - if(OP_UNLIKELY(ret!=0) - ||OP_UNLIKELY(_og->header[_og->header_len-1]==255)){ - /*If we fail, the caller assumes our tags are uninitialized.*/ - opus_tags_clear(_tags); - return OP_EBADHEADER; - } - return 0; - } - } - } -} - -static int op_fetch_headers(OggOpusFile *_of,OpusHead *_head, - OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos, - int *_cserialnos,ogg_page *_og){ - ogg_page og; - int ret; - if(!_og){ - /*No need to clamp the boundary offset against _of->end, as all errors - become OP_ENOTFORMAT.*/ - if(OP_UNLIKELY(op_get_next_page(_of,&og, - OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ - return OP_ENOTFORMAT; - } - _og=&og; - } - _of->ready_state=OP_OPENED; - ret=op_fetch_headers_impl(_of,_head,_tags,_serialnos,_nserialnos, - _cserialnos,_og); - /*Revert back from OP_STREAMSET to OP_OPENED on failure, to prevent - double-free of the tags in an unseekable stream.*/ - if(OP_UNLIKELY(ret<0))_of->ready_state=OP_OPENED; - return ret; -} - -/*Granule position manipulation routines. - A granule position is defined to be an unsigned 64-bit integer, with the - special value -1 in two's complement indicating an unset or invalid granule - position. - We are not guaranteed to have an unsigned 64-bit type, so we construct the - following routines that - a) Properly order negative numbers as larger than positive numbers, and - b) Check for underflow or overflow past the special -1 value. - This lets us operate on the full, valid range of granule positions in a - consistent and safe manner. - This full range is organized into distinct regions: - [ -1 (invalid) ][ 0 ... OP_INT64_MAX ][ OP_INT64_MIN ... -2 ][-1 (invalid) ] - - No one should actually use granule positions so large that they're negative, - even if they are technically valid, as very little software handles them - correctly (including most of Xiph.Org's). - This library also refuses to support durations so large they won't fit in a - signed 64-bit integer (to avoid exposing this mess to the application, and - to simplify a good deal of internal arithmetic), so the only way to use them - successfully is if pcm_start is very large. - This means there isn't anything you can do with negative granule positions - that you couldn't have done with purely non-negative ones. - The main purpose of these routines is to allow us to think very explicitly - about the possible failure cases of all granule position manipulations.*/ - -/*Safely adds a small signed integer to a valid (not -1) granule position. - The result can use the full 64-bit range of values (both positive and - negative), but will fail on overflow (wrapping past -1; wrapping past - OP_INT64_MAX is explicitly okay). - [out] _dst_gp: The resulting granule position. - Only modified on success. - _src_gp: The granule position to add to. - This must not be -1. - _delta: The amount to add. - This is allowed to be up to 32 bits to support the maximum - duration of a single Ogg page (255 packets * 120 ms per - packet == 1,468,800 samples at 48 kHz). - Return: 0 on success, or OP_EINVAL if the result would wrap around past -1.*/ -static int op_granpos_add(ogg_int64_t *_dst_gp,ogg_int64_t _src_gp, - opus_int32 _delta){ - /*The code below handles this case correctly, but there's no reason we - should ever be called with these values, so make sure we aren't.*/ - OP_ASSERT(_src_gp!=-1); - if(_delta>0){ - /*Adding this amount to the granule position would overflow its 64-bit - range.*/ - if(OP_UNLIKELY(_src_gp<0)&&OP_UNLIKELY(_src_gp>=-1-_delta))return OP_EINVAL; - if(OP_UNLIKELY(_src_gp>OP_INT64_MAX-_delta)){ - /*Adding this amount to the granule position would overflow the positive - half of its 64-bit range. - Since signed overflow is undefined in C, do it in a way the compiler - isn't allowed to screw up.*/ - _delta-=(opus_int32)(OP_INT64_MAX-_src_gp)+1; - _src_gp=OP_INT64_MIN; - } - } - else if(_delta<0){ - /*Subtracting this amount from the granule position would underflow its - 64-bit range.*/ - if(_src_gp>=0&&OP_UNLIKELY(_src_gp<-_delta))return OP_EINVAL; - if(OP_UNLIKELY(_src_gp<OP_INT64_MIN-_delta)){ - /*Subtracting this amount from the granule position would underflow the - negative half of its 64-bit range. - Since signed underflow is undefined in C, do it in a way the compiler - isn't allowed to screw up.*/ - _delta+=(opus_int32)(_src_gp-OP_INT64_MIN)+1; - _src_gp=OP_INT64_MAX; - } - } - *_dst_gp=_src_gp+_delta; - return 0; -} - -/*Safely computes the difference between two granule positions. - The difference must fit in a signed 64-bit integer, or the function fails. - It correctly handles the case where the granule position has wrapped around - from positive values to negative ones. - [out] _delta: The difference between the granule positions. - Only modified on success. - _gp_a: The granule position to subtract from. - This must not be -1. - _gp_b: The granule position to subtract. - This must not be -1. - Return: 0 on success, or OP_EINVAL if the result would not fit in a signed - 64-bit integer.*/ -static int op_granpos_diff(ogg_int64_t *_delta, - ogg_int64_t _gp_a,ogg_int64_t _gp_b){ - int gp_a_negative; - int gp_b_negative; - /*The code below handles these cases correctly, but there's no reason we - should ever be called with these values, so make sure we aren't.*/ - OP_ASSERT(_gp_a!=-1); - OP_ASSERT(_gp_b!=-1); - gp_a_negative=OP_UNLIKELY(_gp_a<0); - gp_b_negative=OP_UNLIKELY(_gp_b<0); - if(OP_UNLIKELY(gp_a_negative^gp_b_negative)){ - ogg_int64_t da; - ogg_int64_t db; - if(gp_a_negative){ - /*_gp_a has wrapped to a negative value but _gp_b hasn't: the difference - should be positive.*/ - /*Step 1: Handle wrapping.*/ - /*_gp_a < 0 => da < 0.*/ - da=(OP_INT64_MIN-_gp_a)-1; - /*_gp_b >= 0 => db >= 0.*/ - db=OP_INT64_MAX-_gp_b; - /*Step 2: Check for overflow.*/ - if(OP_UNLIKELY(OP_INT64_MAX+da<db))return OP_EINVAL; - *_delta=db-da; - } - else{ - /*_gp_b has wrapped to a negative value but _gp_a hasn't: the difference - should be negative.*/ - /*Step 1: Handle wrapping.*/ - /*_gp_a >= 0 => da <= 0*/ - da=_gp_a+OP_INT64_MIN; - /*_gp_b < 0 => db <= 0*/ - db=OP_INT64_MIN-_gp_b; - /*Step 2: Check for overflow.*/ - if(OP_UNLIKELY(da<OP_INT64_MIN-db))return OP_EINVAL; - *_delta=da+db; - } - } - else *_delta=_gp_a-_gp_b; - return 0; -} - -static int op_granpos_cmp(ogg_int64_t _gp_a,ogg_int64_t _gp_b){ - /*The invalid granule position -1 should behave like NaN: neither greater - than nor less than any other granule position, nor equal to any other - granule position, including itself. - However, that means there isn't anything we could sensibly return from this - function for it.*/ - OP_ASSERT(_gp_a!=-1); - OP_ASSERT(_gp_b!=-1); - /*Handle the wrapping cases.*/ - if(OP_UNLIKELY(_gp_a<0)){ - if(_gp_b>=0)return 1; - /*Else fall through.*/ - } - else if(OP_UNLIKELY(_gp_b<0))return -1; - /*No wrapping case.*/ - return (_gp_a>_gp_b)-(_gp_b>_gp_a); -} - -/*Returns the duration of the packet (in samples at 48 kHz), or a negative - value on error.*/ -static int op_get_packet_duration(const unsigned char *_data,int _len){ - int nframes; - int frame_size; - int nsamples; - nframes=opus_packet_get_nb_frames(_data,_len); - if(OP_UNLIKELY(nframes<0))return OP_EBADPACKET; - frame_size=opus_packet_get_samples_per_frame(_data,48000); - nsamples=nframes*frame_size; - if(OP_UNLIKELY(nsamples>120*48))return OP_EBADPACKET; - return nsamples; -} - -/*This function more properly belongs in info.c, but we define it here to allow - the static granule position manipulation functions to remain static.*/ -ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp){ - opus_int32 pre_skip; - pre_skip=_head->pre_skip; - if(_gp!=-1&&op_granpos_add(&_gp,_gp,-pre_skip))_gp=-1; - return _gp; -} - -/*Grab all the packets currently in the stream state, and compute their - durations. - _of->op_count is set to the number of packets collected. - [out] _durations: Returns the durations of the individual packets. - Return: The total duration of all packets, or OP_HOLE if there was a hole.*/ -static opus_int32 op_collect_audio_packets(OggOpusFile *_of, - int _durations[255]){ - opus_int32 total_duration; - int op_count; - /*Count the durations of all packets in the page.*/ - op_count=0; - total_duration=0; - for(;;){ - int ret; - /*This takes advantage of undocumented libogg behavior that returned - ogg_packet buffers are valid at least until the next page is - submitted. - Relying on this is not too terrible, as _none_ of the Ogg memory - ownership/lifetime rules are well-documented. - But I can read its code and know this will work.*/ - ret=ogg_stream_packetout(&_of->os,_of->op+op_count); - if(!ret)break; - if(OP_UNLIKELY(ret<0)){ - /*We shouldn't get holes in the middle of pages.*/ - OP_ASSERT(op_count==0); - /*Set the return value and break out of the loop. - We want to make sure op_count gets set to 0, because we've ingested a - page, so any previously loaded packets are now invalid.*/ - total_duration=OP_HOLE; - break; - } - /*Unless libogg is broken, we can't get more than 255 packets from a - single page.*/ - OP_ASSERT(op_count<255); - _durations[op_count]=op_get_packet_duration(_of->op[op_count].packet, - _of->op[op_count].bytes); - if(OP_LIKELY(_durations[op_count]>0)){ - /*With at most 255 packets on a page, this can't overflow.*/ - total_duration+=_durations[op_count++]; - } - /*Ignore packets with an invalid TOC sequence.*/ - else if(op_count>0){ - /*But save the granule position, if there was one.*/ - _of->op[op_count-1].granulepos=_of->op[op_count].granulepos; - } - } - _of->op_pos=0; - _of->op_count=op_count; - return total_duration; -} - -/*Starting from current cursor position, get the initial PCM offset of the next - page. - This also validates the granule position on the first page with a completed - audio data packet, as required by the spec. - If this link is completely empty (no pages with completed packets), then this - function sets pcm_start=pcm_end=0 and returns the BOS page of the next link - (if any). - In the seekable case, we initialize pcm_end=-1 before calling this function, - so that later we can detect that the link was empty before calling - op_find_final_pcm_offset(). - [inout] _link: The link for which to find pcm_start. - [out] _og: Returns the BOS page of the next link if this link was empty. - In the unseekable case, we can then feed this to - op_fetch_headers() to start the next link. - The caller may pass NULL (e.g., for seekable streams), in - which case this page will be discarded. - Return: 0 on success, 1 if there is a buffered BOS page available, or a - negative value on unrecoverable error.*/ -static int op_find_initial_pcm_offset(OggOpusFile *_of, - OggOpusLink *_link,ogg_page *_og){ - ogg_page og; - opus_int64 page_offset; - ogg_int64_t pcm_start; - ogg_int64_t prev_packet_gp; - ogg_int64_t cur_page_gp; - ogg_uint32_t serialno; - opus_int32 total_duration; - int durations[255]; - int cur_page_eos; - int op_count; - int pi; - if(_og==NULL)_og=&og; - serialno=_of->os.serialno; - op_count=0; - /*We shouldn't have to initialize total_duration, but gcc is too dumb to - figure out that op_count>0 implies we've been through the whole loop at - least once.*/ - total_duration=0; - do{ - page_offset=op_get_next_page(_of,_og,_of->end); - /*We should get a page unless the file is truncated or mangled. - Otherwise there are no audio data packets in the whole logical stream.*/ - if(OP_UNLIKELY(page_offset<0)){ - /*Fail if there was a read error.*/ - if(page_offset<OP_FALSE)return (int)page_offset; - /*Fail if the pre-skip is non-zero, since it's asking us to skip more - samples than exist.*/ - if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; - /*Set pcm_end and end_offset so we can skip the call to - op_find_final_pcm_offset().*/ - _link->pcm_start=_link->pcm_end=0; - _link->end_offset=_link->data_offset; - return 0; - } - /*Similarly, if we hit the next link in the chain, we've gone too far.*/ - if(OP_UNLIKELY(ogg_page_bos(_og))){ - if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; - /*Set pcm_end and end_offset so we can skip the call to - op_find_final_pcm_offset().*/ - _link->pcm_end=_link->pcm_start=0; - _link->end_offset=_link->data_offset; - /*Tell the caller we've got a buffered page for them.*/ - return 1; - } - /*Ignore pages from other streams (not strictly necessary, because of the - checks in ogg_stream_pagein(), but saves some work).*/ - if(serialno!=(ogg_uint32_t)ogg_page_serialno(_og))continue; - ogg_stream_pagein(&_of->os,_og); - /*Bitrate tracking: add the header's bytes here. - The body bytes are counted when we consume the packets.*/ - _of->bytes_tracked+=_og->header_len; - /*Count the durations of all packets in the page.*/ - do total_duration=op_collect_audio_packets(_of,durations); - /*Ignore holes.*/ - while(OP_UNLIKELY(total_duration<0)); - op_count=_of->op_count; - } - while(op_count<=0); - /*We found the first page with a completed audio data packet: actually look - at the granule position. - RFC 3533 says, "A special value of -1 (in two's complement) indicates that - no packets finish on this page," which does not say that a granule - position that is NOT -1 indicates that some packets DO finish on that page - (even though this was the intention, libogg itself violated this intention - for years before we fixed it). - The Ogg Opus specification only imposes its start-time requirements - on the granule position of the first page with completed packets, - so we ignore any set granule positions until then.*/ - cur_page_gp=_of->op[op_count-1].granulepos; - /*But getting a packet without a valid granule position on the page is not - okay.*/ - if(cur_page_gp==-1)return OP_EBADTIMESTAMP; - cur_page_eos=_of->op[op_count-1].e_o_s; - if(OP_LIKELY(!cur_page_eos)){ - /*The EOS flag wasn't set. - Work backwards from the provided granule position to get the starting PCM - offset.*/ - if(OP_UNLIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){ - /*The starting granule position MUST not be smaller than the amount of - audio on the first page with completed packets.*/ - return OP_EBADTIMESTAMP; - } - } - else{ - /*The first page with completed packets was also the last.*/ - if(OP_LIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){ - /*If there's less audio on the page than indicated by the granule - position, then we're doing end-trimming, and the starting PCM offset - is zero by spec mandate.*/ - pcm_start=0; - /*However, the end-trimming MUST not ask us to trim more samples than - exist after applying the pre-skip.*/ - if(OP_UNLIKELY(op_granpos_cmp(cur_page_gp,_link->head.pre_skip)<0)){ - return OP_EBADTIMESTAMP; - } - } - } - /*Timestamp the individual packets.*/ - prev_packet_gp=pcm_start; - for(pi=0;pi<op_count;pi++){ - if(cur_page_eos){ - ogg_int64_t diff; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp)); - diff=durations[pi]-diff; - /*If we have samples to trim...*/ - if(diff>0){ - /*If we trimmed the entire packet, stop (the spec says encoders - shouldn't do this, but we support it anyway).*/ - if(OP_UNLIKELY(diff>durations[pi]))break; - _of->op[pi].granulepos=prev_packet_gp=cur_page_gp; - /*Move the EOS flag to this packet, if necessary, so we'll trim the - samples.*/ - _of->op[pi].e_o_s=1; - continue; - } - } - /*Update the granule position as normal.*/ - OP_ALWAYS_TRUE(!op_granpos_add(&_of->op[pi].granulepos, - prev_packet_gp,durations[pi])); - prev_packet_gp=_of->op[pi].granulepos; - } - /*Update the packet count after end-trimming.*/ - _of->op_count=pi; - _of->cur_discard_count=_link->head.pre_skip; - _of->prev_packet_gp=_link->pcm_start=pcm_start; - _of->prev_page_offset=page_offset; - return 0; -} - -/*Starting from current cursor position, get the final PCM offset of the - previous page. - This also validates the duration of the link, which, while not strictly - required by the spec, we need to ensure duration calculations don't - overflow. - This is only done for seekable sources. - We must validate that op_find_initial_pcm_offset() succeeded for this link - before calling this function, otherwise it will scan the entire stream - backwards until it reaches the start, and then fail.*/ -static int op_find_final_pcm_offset(OggOpusFile *_of, - const ogg_uint32_t *_serialnos,int _nserialnos,OggOpusLink *_link, - opus_int64 _offset,ogg_uint32_t _end_serialno,ogg_int64_t _end_gp, - ogg_int64_t *_total_duration){ - ogg_int64_t total_duration; - ogg_int64_t duration; - ogg_uint32_t cur_serialno; - /*For the time being, fetch end PCM offset the simple way.*/ - cur_serialno=_link->serialno; - if(_end_serialno!=cur_serialno||_end_gp==-1){ - _offset=op_get_last_page(_of,&_end_gp,_offset, - cur_serialno,_serialnos,_nserialnos); - if(OP_UNLIKELY(_offset<0))return (int)_offset; - } - /*At worst we should have found the first page with completed packets.*/ - if(OP_UNLIKELY(_offset<_link->data_offset))return OP_EBADLINK; - /*This implementation requires that the difference between the first and last - granule positions in each link be representable in a signed, 64-bit - number, and that each link also have at least as many samples as the - pre-skip requires.*/ - if(OP_UNLIKELY(op_granpos_diff(&duration,_end_gp,_link->pcm_start)<0) - ||OP_UNLIKELY(duration<_link->head.pre_skip)){ - return OP_EBADTIMESTAMP; - } - /*We also require that the total duration be representable in a signed, - 64-bit number.*/ - duration-=_link->head.pre_skip; - total_duration=*_total_duration; - if(OP_UNLIKELY(OP_INT64_MAX-duration<total_duration))return OP_EBADTIMESTAMP; - *_total_duration=total_duration+duration; - _link->pcm_end=_end_gp; - _link->end_offset=_offset; - return 0; -} - -/*Rescale the number _x from the range [0,_from] to [0,_to]. - _from and _to must be positive.*/ -static opus_int64 op_rescale64(opus_int64 _x,opus_int64 _from,opus_int64 _to){ - opus_int64 frac; - opus_int64 ret; - int i; - if(_x>=_from)return _to; - if(_x<=0)return 0; - frac=0; - for(i=0;i<63;i++){ - frac<<=1; - OP_ASSERT(_x<=_from); - if(_x>=_from>>1){ - _x-=_from-_x; - frac|=1; - } - else _x<<=1; - } - ret=0; - for(i=0;i<63;i++){ - if(frac&1)ret=(ret&_to&1)+(ret>>1)+(_to>>1); - else ret>>=1; - frac>>=1; - } - return ret; -} - -/*The minimum granule position spacing allowed for making predictions. - This corresponds to about 1 second of audio at 48 kHz for both Opus and - Vorbis, or one keyframe interval in Theora with the default keyframe spacing - of 256.*/ -#define OP_GP_SPACING_MIN (48000) - -/*Try to estimate the location of the next link using the current seek - records, assuming the initial granule position of any streams we've found is - 0.*/ -static opus_int64 op_predict_link_start(const OpusSeekRecord *_sr,int _nsr, - opus_int64 _searched,opus_int64 _end_searched,opus_int32 _bias){ - opus_int64 bisect; - int sri; - int srj; - /*Require that we be at least OP_CHUNK_SIZE from the end. - We don't require that we be at least OP_CHUNK_SIZE from the beginning, - because if we are we'll just scan forward without seeking.*/ - _end_searched-=OP_CHUNK_SIZE; - if(_searched>=_end_searched)return -1; - bisect=_end_searched; - for(sri=0;sri<_nsr;sri++){ - ogg_int64_t gp1; - ogg_int64_t gp2_min; - ogg_uint32_t serialno1; - opus_int64 offset1; - /*If the granule position is negative, either it's invalid or we'd cause - overflow.*/ - gp1=_sr[sri].gp; - if(gp1<0)continue; - /*We require some minimum distance between granule positions to make an - estimate. - We don't actually know what granule position scheme is being used, - because we have no idea what kind of stream these came from. - Therefore we require a minimum spacing between them, with the - expectation that while bitrates and granule position increments might - vary locally in quite complex ways, they are globally smooth.*/ - if(OP_UNLIKELY(op_granpos_add(&gp2_min,gp1,OP_GP_SPACING_MIN)<0)){ - /*No granule position would satisfy us.*/ - continue; - } - offset1=_sr[sri].offset; - serialno1=_sr[sri].serialno; - for(srj=sri;srj-->0;){ - ogg_int64_t gp2; - opus_int64 offset2; - opus_int64 num; - ogg_int64_t den; - ogg_int64_t ipart; - gp2=_sr[srj].gp; - if(gp2<gp2_min)continue; - /*Oh, and also make sure these came from the same stream.*/ - if(_sr[srj].serialno!=serialno1)continue; - offset2=_sr[srj].offset; - /*For once, we can subtract with impunity.*/ - den=gp2-gp1; - ipart=gp2/den; - num=offset2-offset1; - OP_ASSERT(num>0); - if(ipart>0&&(offset2-_searched)/ipart<num)continue; - offset2-=ipart*num; - gp2-=ipart*den; - offset2-=op_rescale64(gp2,den,num)-_bias; - if(offset2<_searched)continue; - bisect=OP_MIN(bisect,offset2); - break; - } - } - return bisect>=_end_searched?-1:bisect; -} - -/*Finds each bitstream link, one at a time, using a bisection search. - This has to begin by knowing the offset of the first link's initial page.*/ -static int op_bisect_forward_serialno(OggOpusFile *_of, - opus_int64 _searched,OpusSeekRecord *_sr,int _csr, - ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){ - ogg_page og; - OggOpusLink *links; - int nlinks; - int clinks; - ogg_uint32_t *serialnos; - int nserialnos; - ogg_int64_t total_duration; - int nsr; - int ret; - links=_of->links; - nlinks=clinks=_of->nlinks; - total_duration=0; - /*We start with one seek record, for the last page in the file. - We build up a list of records for places we seek to during link - enumeration. - This list is kept sorted in reverse order. - We only care about seek locations that were _not_ in the current link, - therefore we can add them one at a time to the end of the list as we - improve the lower bound on the location where the next link starts.*/ - nsr=1; - for(;;){ - opus_int64 end_searched; - opus_int64 bisect; - opus_int64 next; - opus_int64 last; - ogg_int64_t end_offset; - ogg_int64_t end_gp; - int sri; - serialnos=*_serialnos; - nserialnos=*_nserialnos; - if(OP_UNLIKELY(nlinks>=clinks)){ - if(OP_UNLIKELY(clinks>INT_MAX-1>>1))return OP_EFAULT; - clinks=2*clinks+1; - OP_ASSERT(nlinks<clinks); - links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*clinks); - if(OP_UNLIKELY(links==NULL))return OP_EFAULT; - _of->links=links; - } - /*Invariants: - We have the headers and serial numbers for the link beginning at 'begin'. - We have the offset and granule position of the last page in the file - (potentially not a page we care about).*/ - /*Scan the seek records we already have to save us some bisection.*/ - for(sri=0;sri<nsr;sri++){ - if(op_lookup_serialno(_sr[sri].serialno,serialnos,nserialnos))break; - } - /*Is the last page in our current list of serial numbers?*/ - if(sri<=0)break; - /*Last page wasn't found. - We have at least one more link.*/ - last=-1; - end_searched=_sr[sri-1].search_start; - next=_sr[sri-1].offset; - end_gp=-1; - if(sri<nsr){ - _searched=_sr[sri].offset+_sr[sri].size; - if(_sr[sri].serialno==links[nlinks-1].serialno){ - end_gp=_sr[sri].gp; - end_offset=_sr[sri].offset; - } - } - nsr=sri; - bisect=-1; - /*If we've already found the end of at least one link, try to pick the - first bisection point at twice the average link size. - This is a good choice for files with lots of links that are all about the - same size.*/ - if(nlinks>1){ - opus_int64 last_offset; - opus_int64 avg_link_size; - opus_int64 upper_limit; - last_offset=links[nlinks-1].offset; - avg_link_size=last_offset/(nlinks-1); - upper_limit=end_searched-OP_CHUNK_SIZE-avg_link_size; - if(OP_LIKELY(last_offset>_searched-avg_link_size) - &&OP_LIKELY(last_offset<upper_limit)){ - bisect=last_offset+avg_link_size; - if(OP_LIKELY(bisect<upper_limit))bisect+=avg_link_size; - } - } - /*We guard against garbage separating the last and first pages of two - links below.*/ - while(_searched<end_searched){ - opus_int32 next_bias; - /*If we don't have a better estimate, use simple bisection.*/ - if(bisect==-1)bisect=_searched+(end_searched-_searched>>1); - /*If we're within OP_CHUNK_SIZE of the start, scan forward.*/ - if(bisect-_searched<OP_CHUNK_SIZE)bisect=_searched; - /*Otherwise we're skipping data. - Forget the end page, if we saw one, as we might miss a later one.*/ - else end_gp=-1; - ret=op_seek_helper(_of,bisect); - if(OP_UNLIKELY(ret<0))return ret; - last=op_get_next_page(_of,&og,_sr[nsr-1].offset); - if(OP_UNLIKELY(last<OP_FALSE))return (int)last; - next_bias=0; - if(last==OP_FALSE)end_searched=bisect; - else{ - ogg_uint32_t serialno; - ogg_int64_t gp; - serialno=ogg_page_serialno(&og); - gp=ogg_page_granulepos(&og); - if(!op_lookup_serialno(serialno,serialnos,nserialnos)){ - end_searched=bisect; - next=last; - /*In reality we should always have enough room, but be paranoid.*/ - if(OP_LIKELY(nsr<_csr)){ - _sr[nsr].search_start=bisect; - _sr[nsr].offset=last; - OP_ASSERT(_of->offset-last>=0); - OP_ASSERT(_of->offset-last<=OP_PAGE_SIZE_MAX); - _sr[nsr].size=(opus_int32)(_of->offset-last); - _sr[nsr].serialno=serialno; - _sr[nsr].gp=gp; - nsr++; - } - } - else{ - _searched=_of->offset; - next_bias=OP_CHUNK_SIZE; - if(serialno==links[nlinks-1].serialno){ - /*This page was from the stream we want, remember it. - If it's the last such page in the link, we won't have to go back - looking for it later.*/ - end_gp=gp; - end_offset=last; - } - } - } - bisect=op_predict_link_start(_sr,nsr,_searched,end_searched,next_bias); - } - /*Bisection point found. - Get the final granule position of the previous link, assuming - op_find_initial_pcm_offset() didn't already determine the link was - empty.*/ - if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){ - if(end_gp==-1){ - /*If we don't know where the end page is, we'll have to seek back and - look for it, starting from the end of the link.*/ - end_offset=next; - /*Also forget the last page we read. - It won't be available after the seek.*/ - last=-1; - } - ret=op_find_final_pcm_offset(_of,serialnos,nserialnos, - links+nlinks-1,end_offset,links[nlinks-1].serialno,end_gp, - &total_duration); - if(OP_UNLIKELY(ret<0))return ret; - } - if(last!=next){ - /*The last page we read was not the first page the next link. - Move the cursor position to the offset of that first page. - This only performs an actual seek if the first page of the next link - does not start at the end of the last page from the current Opus - stream with a valid granule position.*/ - ret=op_seek_helper(_of,next); - if(OP_UNLIKELY(ret<0))return ret; - } - ret=op_fetch_headers(_of,&links[nlinks].head,&links[nlinks].tags, - _serialnos,_nserialnos,_cserialnos,last!=next?NULL:&og); - if(OP_UNLIKELY(ret<0))return ret; - links[nlinks].offset=next; - links[nlinks].data_offset=_of->offset; - links[nlinks].serialno=_of->os.serialno; - links[nlinks].pcm_end=-1; - /*This might consume a page from the next link, however the next bisection - always starts with a seek.*/ - ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL); - if(OP_UNLIKELY(ret<0))return ret; - _searched=_of->offset; - /*Mark the current link count so it can be cleaned up on error.*/ - _of->nlinks=++nlinks; - } - /*Last page is in the starting serialno list, so we've reached the last link. - Now find the last granule position for it (if we didn't the first time we - looked at the end of the stream, and if op_find_initial_pcm_offset() - didn't already determine the link was empty).*/ - if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){ - ret=op_find_final_pcm_offset(_of,serialnos,nserialnos, - links+nlinks-1,_sr[0].offset,_sr[0].serialno,_sr[0].gp,&total_duration); - if(OP_UNLIKELY(ret<0))return ret; - } - /*Trim back the links array if necessary.*/ - links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*nlinks); - if(OP_LIKELY(links!=NULL))_of->links=links; - /*We also don't need these anymore.*/ - _ogg_free(*_serialnos); - *_serialnos=NULL; - *_cserialnos=*_nserialnos=0; - return 0; -} - -static void op_update_gain(OggOpusFile *_of){ - OpusHead *head; - opus_int32 gain_q8; - int li; - /*If decode isn't ready, then we'll apply the gain when we initialize the - decoder.*/ - if(_of->ready_state<OP_INITSET)return; - gain_q8=_of->gain_offset_q8; - li=_of->seekable?_of->cur_link:0; - head=&_of->links[li].head; - /*We don't have to worry about overflow here because the header gain and - track gain must lie in the range [-32768,32767], and the user-supplied - offset has been pre-clamped to [-98302,98303].*/ - switch(_of->gain_type){ - case OP_ALBUM_GAIN:{ - int album_gain_q8; - album_gain_q8=0; - opus_tags_get_album_gain(&_of->links[li].tags,&album_gain_q8); - gain_q8+=album_gain_q8; - gain_q8+=head->output_gain; - }break; - case OP_TRACK_GAIN:{ - int track_gain_q8; - track_gain_q8=0; - opus_tags_get_track_gain(&_of->links[li].tags,&track_gain_q8); - gain_q8+=track_gain_q8; - gain_q8+=head->output_gain; - }break; - case OP_HEADER_GAIN:gain_q8+=head->output_gain;break; - case OP_ABSOLUTE_GAIN:break; - default:OP_ASSERT(0); - } - gain_q8=OP_CLAMP(-32768,gain_q8,32767); - OP_ASSERT(_of->od!=NULL); -#if defined(OPUS_SET_GAIN) - opus_multistream_decoder_ctl(_of->od,OPUS_SET_GAIN(gain_q8)); -#else -/*A fallback that works with both float and fixed-point is a bunch of work, - so just force people to use a sufficiently new version. - This is deployed well enough at this point that this shouldn't be a burden.*/ -# error "libopus 1.0.1 or later required" -#endif -} - -static int op_make_decode_ready(OggOpusFile *_of){ - const OpusHead *head; - int li; - int stream_count; - int coupled_count; - int channel_count; - if(_of->ready_state>OP_STREAMSET)return 0; - if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET))return OP_EFAULT; - li=_of->seekable?_of->cur_link:0; - head=&_of->links[li].head; - stream_count=head->stream_count; - coupled_count=head->coupled_count; - channel_count=head->channel_count; - /*Check to see if the current decoder is compatible with the current link.*/ - if(_of->od!=NULL&&_of->od_stream_count==stream_count - &&_of->od_coupled_count==coupled_count&&_of->od_channel_count==channel_count - &&memcmp(_of->od_mapping,head->mapping, - sizeof(*head->mapping)*channel_count)==0){ - opus_multistream_decoder_ctl(_of->od,OPUS_RESET_STATE); - } - else{ - int err; - opus_multistream_decoder_destroy(_of->od); - _of->od=opus_multistream_decoder_create(48000,channel_count, - stream_count,coupled_count,head->mapping,&err); - if(_of->od==NULL)return OP_EFAULT; - _of->od_stream_count=stream_count; - _of->od_coupled_count=coupled_count; - _of->od_channel_count=channel_count; - memcpy(_of->od_mapping,head->mapping,sizeof(*head->mapping)*channel_count); - } - _of->ready_state=OP_INITSET; - _of->bytes_tracked=0; - _of->samples_tracked=0; -#if !defined(OP_FIXED_POINT) - _of->state_channel_count=0; - /*Use the serial number for the PRNG seed to get repeatable output for - straight play-throughs.*/ - _of->dither_seed=_of->links[li].serialno; -#endif - op_update_gain(_of); - return 0; -} - -static int op_open_seekable2_impl(OggOpusFile *_of){ - /*64 seek records should be enough for anybody. - Actually, with a bisection search in a 63-bit range down to OP_CHUNK_SIZE - granularity, much more than enough.*/ - OpusSeekRecord sr[64]; - opus_int64 data_offset; - int ret; - /*We can seek, so set out learning all about this file.*/ - (*_of->callbacks.seek)(_of->source,0,SEEK_END); - _of->offset=_of->end=(*_of->callbacks.tell)(_of->source); - if(OP_UNLIKELY(_of->end<0))return OP_EREAD; - data_offset=_of->links[0].data_offset; - if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK; - /*Get the offset of the last page of the physical bitstream, or, if we're - lucky, the last Opus page of the first link, as most Ogg Opus files will - contain a single logical bitstream.*/ - ret=op_get_prev_page_serial(_of,sr,_of->end, - _of->links[0].serialno,_of->serialnos,_of->nserialnos); - if(OP_UNLIKELY(ret<0))return ret; - /*If there's any trailing junk, forget about it.*/ - _of->end=sr[0].offset+sr[0].size; - if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK; - /*Now enumerate the bitstream structure.*/ - return op_bisect_forward_serialno(_of,data_offset,sr,sizeof(sr)/sizeof(*sr), - &_of->serialnos,&_of->nserialnos,&_of->cserialnos); -} - -static int op_open_seekable2(OggOpusFile *_of){ - ogg_sync_state oy_start; - ogg_stream_state os_start; - ogg_packet *op_start; - opus_int64 prev_page_offset; - opus_int64 start_offset; - int start_op_count; - int ret; - /*We're partially open and have a first link header state in storage in _of. - Save off that stream state so we can come back to it. - It would be simpler to just dump all this state and seek back to - links[0].data_offset when we're done. - But we do the extra work to allow us to seek back to _exactly_ the same - stream position we're at now. - This allows, e.g., the HTTP backend to continue reading from the original - connection (if it's still available), instead of opening a new one. - This means we can open and start playing a normal Opus file with a single - link and reasonable packet sizes using only two HTTP requests.*/ - start_op_count=_of->op_count; - /*This is a bit too large to put on the stack unconditionally.*/ - op_start=(ogg_packet *)_ogg_malloc(sizeof(*op_start)*start_op_count); - if(op_start==NULL)return OP_EFAULT; - *&oy_start=_of->oy; - *&os_start=_of->os; - prev_page_offset=_of->prev_page_offset; - start_offset=_of->offset; - memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count); - OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of)); - ogg_sync_init(&_of->oy); - ogg_stream_init(&_of->os,-1); - ret=op_open_seekable2_impl(_of); - /*Restore the old stream state.*/ - ogg_stream_clear(&_of->os); - ogg_sync_clear(&_of->oy); - *&_of->oy=*&oy_start; - *&_of->os=*&os_start; - _of->offset=start_offset; - _of->op_count=start_op_count; - memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count); - _ogg_free(op_start); - _of->prev_packet_gp=_of->links[0].pcm_start; - _of->prev_page_offset=prev_page_offset; - _of->cur_discard_count=_of->links[0].head.pre_skip; - if(OP_UNLIKELY(ret<0))return ret; - /*And restore the position indicator.*/ - ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET); - return OP_UNLIKELY(ret<0)?OP_EREAD:0; -} - -/*Clear out the current logical bitstream decoder.*/ -static void op_decode_clear(OggOpusFile *_of){ - /*We don't actually free the decoder. - We might be able to re-use it for the next link.*/ - _of->op_count=0; - _of->od_buffer_size=0; - _of->prev_packet_gp=-1; - _of->prev_page_offset=-1; - if(!_of->seekable){ - OP_ASSERT(_of->ready_state>=OP_INITSET); - opus_tags_clear(&_of->links[0].tags); - } - _of->ready_state=OP_OPENED; -} - -static void op_clear(OggOpusFile *_of){ - OggOpusLink *links; - _ogg_free(_of->od_buffer); - if(_of->od!=NULL)opus_multistream_decoder_destroy(_of->od); - links=_of->links; - if(!_of->seekable){ - if(_of->ready_state>OP_OPENED||_of->ready_state==OP_PARTOPEN){ - opus_tags_clear(&links[0].tags); - } - } - else if(OP_LIKELY(links!=NULL)){ - int nlinks; - int link; - nlinks=_of->nlinks; - for(link=0;link<nlinks;link++)opus_tags_clear(&links[link].tags); - } - _ogg_free(links); - _ogg_free(_of->serialnos); - ogg_stream_clear(&_of->os); - ogg_sync_clear(&_of->oy); - if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source); -} - -static int op_open1(OggOpusFile *_of, - void *_source,const OpusFileCallbacks *_cb, - const unsigned char *_initial_data,size_t _initial_bytes){ - ogg_page og; - ogg_page *pog; - int seekable; - int ret; - memset(_of,0,sizeof(*_of)); - _of->end=-1; - _of->source=_source; - *&_of->callbacks=*_cb; - /*At a minimum, we need to be able to read data.*/ - if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD; - /*Initialize the framing state.*/ - ogg_sync_init(&_of->oy); - /*Perhaps some data was previously read into a buffer for testing against - other stream types. - Allow initialization from this previously read data (especially as we may - be reading from a non-seekable stream). - This requires copying it into a buffer allocated by ogg_sync_buffer() and - doesn't support seeking, so this is not a good mechanism to use for - decoding entire files from RAM.*/ - if(_initial_bytes>0){ - char *buffer; - buffer=ogg_sync_buffer(&_of->oy,_initial_bytes); - memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer)); - ogg_sync_wrote(&_of->oy,_initial_bytes); - } - /*Can we seek? - Stevens suggests the seek test is portable.*/ - seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1; - /*If seek is implemented, tell must also be implemented.*/ - if(seekable){ - opus_int64 pos; - if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL; - pos=(*_of->callbacks.tell)(_of->source); - /*If the current position is not equal to the initial bytes consumed, - absolute seeking will not work.*/ - if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL; - } - _of->seekable=seekable; - /*Don't seek yet. - Set up a 'single' (current) logical bitstream entry for partial open.*/ - _of->links=(OggOpusLink *)_ogg_malloc(sizeof(*_of->links)); - /*The serialno gets filled in later by op_fetch_headers().*/ - ogg_stream_init(&_of->os,-1); - pog=NULL; - for(;;){ - /*Fetch all BOS pages, store the Opus header and all seen serial numbers, - and load subsequent Opus setup headers.*/ - ret=op_fetch_headers(_of,&_of->links[0].head,&_of->links[0].tags, - &_of->serialnos,&_of->nserialnos,&_of->cserialnos,pog); - if(OP_UNLIKELY(ret<0))break; - _of->nlinks=1; - _of->links[0].offset=0; - _of->links[0].data_offset=_of->offset; - _of->links[0].pcm_end=-1; - _of->links[0].serialno=_of->os.serialno; - /*Fetch the initial PCM offset.*/ - ret=op_find_initial_pcm_offset(_of,_of->links,&og); - if(seekable||OP_LIKELY(ret<=0))break; - /*This link was empty, but we already have the BOS page for the next one in - og. - We can't seek, so start processing the next link right now.*/ - opus_tags_clear(&_of->links[0].tags); - _of->nlinks=0; - if(!seekable)_of->cur_link++; - pog=&og; - } - if(OP_LIKELY(ret>=0))_of->ready_state=OP_PARTOPEN; - return ret; -} - -static int op_open2(OggOpusFile *_of){ - int ret; - OP_ASSERT(_of->ready_state==OP_PARTOPEN); - if(_of->seekable){ - _of->ready_state=OP_OPENED; - ret=op_open_seekable2(_of); - } - else ret=0; - if(OP_LIKELY(ret>=0)){ - /*We have buffered packets from op_find_initial_pcm_offset(). - Move to OP_INITSET so we can use them.*/ - _of->ready_state=OP_STREAMSET; - ret=op_make_decode_ready(_of); - if(OP_LIKELY(ret>=0))return 0; - } - /*Don't auto-close the stream on failure.*/ - _of->callbacks.close=NULL; - op_clear(_of); - return ret; -} - -OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb, - const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ - OggOpusFile *of; - int ret; - of=(OggOpusFile *)_ogg_malloc(sizeof(*of)); - ret=OP_EFAULT; - if(OP_LIKELY(of!=NULL)){ - ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes); - if(OP_LIKELY(ret>=0)){ - if(_error!=NULL)*_error=0; - return of; - } - /*Don't auto-close the stream on failure.*/ - of->callbacks.close=NULL; - op_clear(of); - _ogg_free(of); - } - if(_error!=NULL)*_error=ret; - return NULL; -} - -OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb, - const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ - OggOpusFile *of; - of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error); - if(OP_LIKELY(of!=NULL)){ - int ret; - ret=op_open2(of); - if(OP_LIKELY(ret>=0))return of; - if(_error!=NULL)*_error=ret; - _ogg_free(of); - } - return NULL; -} - -/*Convenience routine to clean up from failure for the open functions that - create their own streams.*/ -static OggOpusFile *op_open_close_on_failure(void *_source, - const OpusFileCallbacks *_cb,int *_error){ - OggOpusFile *of; - if(OP_UNLIKELY(_source==NULL)){ - if(_error!=NULL)*_error=OP_EFAULT; - return NULL; - } - of=op_open_callbacks(_source,_cb,NULL,0,_error); - if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); - return of; -} - -OggOpusFile *op_open_file(const char *_path,int *_error){ - OpusFileCallbacks cb; - return op_open_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error); -} - -OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size, - int *_error){ - OpusFileCallbacks cb; - return op_open_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb, - _error); -} - -/*Convenience routine to clean up from failure for the open functions that - create their own streams.*/ -static OggOpusFile *op_test_close_on_failure(void *_source, - const OpusFileCallbacks *_cb,int *_error){ - OggOpusFile *of; - if(OP_UNLIKELY(_source==NULL)){ - if(_error!=NULL)*_error=OP_EFAULT; - return NULL; - } - of=op_test_callbacks(_source,_cb,NULL,0,_error); - if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); - return of; -} - -OggOpusFile *op_test_file(const char *_path,int *_error){ - OpusFileCallbacks cb; - return op_test_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error); -} - -OggOpusFile *op_test_memory(const unsigned char *_data,size_t _size, - int *_error){ - OpusFileCallbacks cb; - return op_test_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb, - _error); -} - -int op_test_open(OggOpusFile *_of){ - int ret; - if(OP_UNLIKELY(_of->ready_state!=OP_PARTOPEN))return OP_EINVAL; - ret=op_open2(_of); - /*op_open2() will clear this structure on failure. - Reset its contents to prevent double-frees in op_free().*/ - if(OP_UNLIKELY(ret<0))memset(_of,0,sizeof(*_of)); - return ret; -} - -void op_free(OggOpusFile *_of){ - if(OP_LIKELY(_of!=NULL)){ - op_clear(_of); - _ogg_free(_of); - } -} - -int op_seekable(const OggOpusFile *_of){ - return _of->seekable; -} - -int op_link_count(const OggOpusFile *_of){ - return _of->nlinks; -} - -ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; - if(!_of->seekable)_li=0; - return _of->links[_li<0?_of->cur_link:_li].serialno; -} - -int op_channel_count(const OggOpusFile *_of,int _li){ - return op_head(_of,_li)->channel_count; -} - -opus_int64 op_raw_total(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_of->ready_state<OP_OPENED) - ||OP_UNLIKELY(!_of->seekable) - ||OP_UNLIKELY(_li>=_of->nlinks)){ - return OP_EINVAL; - } - if(_li<0)return _of->end-_of->links[0].offset; - return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset) - -_of->links[_li].offset; -} - -ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){ - OggOpusLink *links; - ogg_int64_t diff; - int nlinks; - nlinks=_of->nlinks; - if(OP_UNLIKELY(_of->ready_state<OP_OPENED) - ||OP_UNLIKELY(!_of->seekable) - ||OP_UNLIKELY(_li>=nlinks)){ - return OP_EINVAL; - } - links=_of->links; - /*We verify that the granule position differences are larger than the - pre-skip and that the total duration does not overflow during link - enumeration, so we don't have to check here.*/ - if(_li<0){ - ogg_int64_t pcm_total; - int li; - pcm_total=0; - for(li=0;li<nlinks;li++){ - OP_ALWAYS_TRUE(!op_granpos_diff(&diff, - links[li].pcm_end,links[li].pcm_start)); - pcm_total+=diff-links[li].head.pre_skip; - } - return pcm_total; - } - OP_ALWAYS_TRUE(!op_granpos_diff(&diff, - links[_li].pcm_end,links[_li].pcm_start)); - return diff-links[_li].head.pre_skip; -} - -const OpusHead *op_head(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; - if(!_of->seekable)_li=0; - return &_of->links[_li<0?_of->cur_link:_li].head; -} - -const OpusTags *op_tags(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; - if(!_of->seekable){ - if(_of->ready_state<OP_STREAMSET&&_of->ready_state!=OP_PARTOPEN){ - return NULL; - } - _li=0; - } - else if(_li<0)_li=_of->ready_state>=OP_STREAMSET?_of->cur_link:0; - return &_of->links[_li].tags; -} - -int op_current_link(const OggOpusFile *_of){ - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - return _of->cur_link; -} - -/*Compute an average bitrate given a byte and sample count. - Return: The bitrate in bits per second.*/ -static opus_int32 op_calc_bitrate(opus_int64 _bytes,ogg_int64_t _samples){ - /*These rates are absurd, but let's handle them anyway.*/ - if(OP_UNLIKELY(_bytes>(OP_INT64_MAX-(_samples>>1))/(48000*8))){ - ogg_int64_t den; - if(OP_UNLIKELY(_bytes/(OP_INT32_MAX/(48000*8))>=_samples)){ - return OP_INT32_MAX; - } - den=_samples/(48000*8); - return (opus_int32)((_bytes+(den>>1))/den); - } - if(OP_UNLIKELY(_samples<=0))return OP_INT32_MAX; - /*This can't actually overflow in normal operation: even with a pre-skip of - 545 2.5 ms frames with 8 streams running at 1282*8+1 bytes per packet - (1275 byte frames + Opus framing overhead + Ogg lacing values), that all - produce a single sample of decoded output, we still don't top 45 Mbps. - The only way to get bitrates larger than that is with excessive Opus - padding, more encoded streams than output channels, or lots and lots of - Ogg pages with no packets on them.*/ - return (opus_int32)OP_MIN((_bytes*48000*8+(_samples>>1))/_samples, - OP_INT32_MAX); -} - -opus_int32 op_bitrate(const OggOpusFile *_of,int _li){ - if(OP_UNLIKELY(_of->ready_state<OP_OPENED)||OP_UNLIKELY(!_of->seekable) - ||OP_UNLIKELY(_li>=_of->nlinks)){ - return OP_EINVAL; - } - return op_calc_bitrate(op_raw_total(_of,_li),op_pcm_total(_of,_li)); -} - -opus_int32 op_bitrate_instant(OggOpusFile *_of){ - ogg_int64_t samples_tracked; - opus_int32 ret; - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - samples_tracked=_of->samples_tracked; - if(OP_UNLIKELY(samples_tracked==0))return OP_FALSE; - ret=op_calc_bitrate(_of->bytes_tracked,samples_tracked); - _of->bytes_tracked=0; - _of->samples_tracked=0; - return ret; -} - -/*Fetch and process a page. - This handles the case where we're at a bitstream boundary and dumps the - decoding machine. - If the decoding machine is unloaded, it loads it. - It also keeps prev_packet_gp up to date (seek and read both use this). - Return: <0) Error, OP_HOLE (lost packet), or OP_EOF. - 0) Got at least one audio data packet.*/ -static int op_fetch_and_process_page(OggOpusFile *_of, - ogg_page *_og,opus_int64 _page_offset,int _spanp,int _ignore_holes){ - OggOpusLink *links; - ogg_uint32_t cur_serialno; - int seekable; - int cur_link; - int ret; - /*We shouldn't get here if we have unprocessed packets.*/ - OP_ASSERT(_of->ready_state<OP_INITSET||_of->op_pos>=_of->op_count); - seekable=_of->seekable; - links=_of->links; - cur_link=seekable?_of->cur_link:0; - cur_serialno=links[cur_link].serialno; - /*Handle one page.*/ - for(;;){ - ogg_page og; - OP_ASSERT(_of->ready_state>=OP_OPENED); - /*If we were given a page to use, use it.*/ - if(_og!=NULL){ - *&og=*_og; - _og=NULL; - } - /*Keep reading until we get a page with the correct serialno.*/ - else _page_offset=op_get_next_page(_of,&og,_of->end); - /*EOF: Leave uninitialized.*/ - if(_page_offset<0)return _page_offset<OP_FALSE?(int)_page_offset:OP_EOF; - if(OP_LIKELY(_of->ready_state>=OP_STREAMSET) - &&cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){ - /*Two possibilities: - 1) Another stream is multiplexed into this logical section, or*/ - if(OP_LIKELY(!ogg_page_bos(&og)))continue; - /* 2) Our decoding just traversed a bitstream boundary.*/ - if(!_spanp)return OP_EOF; - if(OP_LIKELY(_of->ready_state>=OP_INITSET))op_decode_clear(_of); - } - /*Bitrate tracking: add the header's bytes here. - The body bytes are counted when we consume the packets.*/ - else _of->bytes_tracked+=og.header_len; - /*Do we need to load a new machine before submitting the page? - This is different in the seekable and non-seekable cases. - In the seekable case, we already have all the header information loaded - and cached. - We just initialize the machine with it and continue on our merry way. - In the non-seekable (streaming) case, we'll only be at a boundary if we - just left the previous logical bitstream, and we're now nominally at the - header of the next bitstream.*/ - if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET)){ - if(seekable){ - ogg_uint32_t serialno; - int nlinks; - int li; - serialno=ogg_page_serialno(&og); - /*Match the serialno to bitstream section. - We use this rather than offset positions to avoid problems near - logical bitstream boundaries.*/ - nlinks=_of->nlinks; - for(li=0;li<nlinks&&links[li].serialno!=serialno;li++); - /*Not a desired Opus bitstream section. - Keep trying.*/ - if(li>=nlinks)continue; - cur_serialno=serialno; - _of->cur_link=cur_link=li; - ogg_stream_reset_serialno(&_of->os,serialno); - _of->ready_state=OP_STREAMSET; - /*If we're at the start of this link, initialize the granule position - and pre-skip tracking.*/ - if(_page_offset<=links[cur_link].data_offset){ - _of->prev_packet_gp=links[cur_link].pcm_start; - _of->prev_page_offset=-1; - _of->cur_discard_count=links[cur_link].head.pre_skip; - /*Ignore a hole at the start of a new link (this is common for - streams joined in the middle) or after seeking.*/ - _ignore_holes=1; - } - } - else{ - do{ - /*We're streaming. - Fetch the two header packets, build the info struct.*/ - ret=op_fetch_headers(_of,&links[0].head,&links[0].tags, - NULL,NULL,NULL,&og); - if(OP_UNLIKELY(ret<0))return ret; - /*op_find_initial_pcm_offset() will suppress any initial hole for us, - so no need to set _ignore_holes.*/ - ret=op_find_initial_pcm_offset(_of,links,&og); - if(OP_UNLIKELY(ret<0))return ret; - _of->links[0].serialno=cur_serialno=_of->os.serialno; - _of->cur_link++; - } - /*If the link was empty, keep going, because we already have the - BOS page of the next one in og.*/ - while(OP_UNLIKELY(ret>0)); - /*If we didn't get any packets out of op_find_initial_pcm_offset(), - keep going (this is possible if end-trimming trimmed them all).*/ - if(_of->op_count<=0)continue; - /*Otherwise, we're done. - TODO: This resets bytes_tracked, which misses the header bytes - already processed by op_find_initial_pcm_offset().*/ - ret=op_make_decode_ready(_of); - if(OP_UNLIKELY(ret<0))return ret; - return 0; - } - } - /*The buffered page is the data we want, and we're ready for it. - Add it to the stream state.*/ - if(OP_UNLIKELY(_of->ready_state==OP_STREAMSET)){ - ret=op_make_decode_ready(_of); - if(OP_UNLIKELY(ret<0))return ret; - } - /*Extract all the packets from the current page.*/ - ogg_stream_pagein(&_of->os,&og); - if(OP_LIKELY(_of->ready_state>=OP_INITSET)){ - opus_int32 total_duration; - int durations[255]; - int op_count; - total_duration=op_collect_audio_packets(_of,durations); - if(OP_UNLIKELY(total_duration<0)){ - /*Drain the packets from the page anyway.*/ - total_duration=op_collect_audio_packets(_of,durations); - OP_ASSERT(total_duration>=0); - /*Report holes to the caller.*/ - if(!_ignore_holes)return OP_HOLE; - } - op_count=_of->op_count; - /*If we found at least one audio data packet, compute per-packet granule - positions for them.*/ - if(op_count>0){ - ogg_int64_t diff; - ogg_int64_t prev_packet_gp; - ogg_int64_t cur_packet_gp; - ogg_int64_t cur_page_gp; - int cur_page_eos; - int pi; - cur_page_gp=_of->op[op_count-1].granulepos; - cur_page_eos=_of->op[op_count-1].e_o_s; - prev_packet_gp=_of->prev_packet_gp; - if(OP_UNLIKELY(prev_packet_gp==-1)){ - opus_int32 cur_discard_count; - /*This is the first call after a raw seek. - Try to reconstruct prev_packet_gp from scratch.*/ - OP_ASSERT(seekable); - if(OP_UNLIKELY(cur_page_eos)){ - /*If the first page we hit after our seek was the EOS page, and - we didn't start from data_offset or before, we don't have - enough information to do end-trimming. - Proceed to the next link, rather than risk playing back some - samples that shouldn't have been played.*/ - _of->op_count=0; - continue; - } - /*By default discard 80 ms of data after a seek, unless we seek - into the pre-skip region.*/ - cur_discard_count=80*48; - cur_page_gp=_of->op[op_count-1].granulepos; - /*Try to initialize prev_packet_gp. - If the current page had packets but didn't have a granule - position, or the granule position it had was too small (both - illegal), just use the starting granule position for the link.*/ - prev_packet_gp=links[cur_link].pcm_start; - if(OP_LIKELY(cur_page_gp!=-1)){ - op_granpos_add(&prev_packet_gp,cur_page_gp,-total_duration); - } - if(OP_LIKELY(!op_granpos_diff(&diff, - prev_packet_gp,links[cur_link].pcm_start))){ - opus_int32 pre_skip; - /*If we start at the beginning of the pre-skip region, or we're - at least 80 ms from the end of the pre-skip region, we discard - to the end of the pre-skip region. - Otherwise, we still use the 80 ms default, which will discard - past the end of the pre-skip region.*/ - pre_skip=links[cur_link].head.pre_skip; - if(diff>=0&&diff<=OP_MAX(0,pre_skip-80*48)){ - cur_discard_count=pre_skip-(int)diff; - } - } - _of->cur_discard_count=cur_discard_count; - } - if(OP_UNLIKELY(cur_page_gp==-1)){ - /*This page had completed packets but didn't have a valid granule - position. - This is illegal, but we'll try to handle it by continuing to count - forwards from the previous page.*/ - if(op_granpos_add(&cur_page_gp,prev_packet_gp,total_duration)<0){ - /*The timestamp for this page overflowed.*/ - cur_page_gp=links[cur_link].pcm_end; - } - } - /*If we hit the last page, handle end-trimming.*/ - if(OP_UNLIKELY(cur_page_eos) - &&OP_LIKELY(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp)) - &&OP_LIKELY(diff<total_duration)){ - cur_packet_gp=prev_packet_gp; - for(pi=0;pi<op_count;pi++){ - diff=durations[pi]-diff; - /*If we have samples to trim...*/ - if(diff>0){ - /*If we trimmed the entire packet, stop (the spec says encoders - shouldn't do this, but we support it anyway).*/ - if(OP_UNLIKELY(diff>durations[pi]))break; - cur_packet_gp=cur_page_gp; - /*Move the EOS flag to this packet, if necessary, so we'll trim - the samples during decode.*/ - _of->op[pi].e_o_s=1; - } - else{ - /*Update the granule position as normal.*/ - OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp, - cur_packet_gp,durations[pi])); - } - _of->op[pi].granulepos=cur_packet_gp; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,cur_packet_gp)); - } - } - else{ - /*Propagate timestamps to earlier packets. - op_granpos_add(&prev_packet_gp,prev_packet_gp,total_duration) - should succeed and give prev_packet_gp==cur_page_gp. - But we don't bother to check that, as there isn't much we can do - if it's not true, and it actually will not be true on the first - page after a seek, if there was a continued packet. - The only thing we guarantee is that the start and end granule - positions of the packets are valid, and that they are monotonic - within a page. - They might be completely out of range for this link (we'll check - that elsewhere), or non-monotonic between pages.*/ - if(OP_UNLIKELY(op_granpos_add(&prev_packet_gp, - cur_page_gp,-total_duration)<0)){ - /*The starting timestamp for the first packet on this page - underflowed. - This is illegal, but we ignore it.*/ - prev_packet_gp=0; - } - for(pi=0;pi<op_count;pi++){ - if(OP_UNLIKELY(op_granpos_add(&cur_packet_gp, - cur_page_gp,-total_duration)<0)){ - /*The start timestamp for this packet underflowed. - This is illegal, but we ignore it.*/ - cur_packet_gp=0; - } - total_duration-=durations[pi]; - OP_ASSERT(total_duration>=0); - OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp, - cur_packet_gp,durations[pi])); - _of->op[pi].granulepos=cur_packet_gp; - } - OP_ASSERT(total_duration==0); - } - _of->prev_packet_gp=prev_packet_gp; - _of->prev_page_offset=_page_offset; - _of->op_count=pi; - /*If end-trimming didn't trim all the packets, we're done.*/ - if(OP_LIKELY(pi>0))return 0; - } - } - } -} - -int op_raw_seek(OggOpusFile *_of,opus_int64 _pos){ - int ret; - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - /*Don't dump the decoder state if we can't seek.*/ - if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK; - if(OP_UNLIKELY(_pos<0)||OP_UNLIKELY(_pos>_of->end))return OP_EINVAL; - /*Clear out any buffered, decoded data.*/ - op_decode_clear(_of); - _of->bytes_tracked=0; - _of->samples_tracked=0; - ret=op_seek_helper(_of,_pos); - if(OP_UNLIKELY(ret<0))return OP_EREAD; - ret=op_fetch_and_process_page(_of,NULL,-1,1,1); - /*If we hit EOF, op_fetch_and_process_page() leaves us uninitialized. - Instead, jump to the end.*/ - if(ret==OP_EOF){ - int cur_link; - op_decode_clear(_of); - cur_link=_of->nlinks-1; - _of->cur_link=cur_link; - _of->prev_packet_gp=_of->links[cur_link].pcm_end; - _of->cur_discard_count=0; - ret=0; - } - return ret; -} - -/*Convert a PCM offset relative to the start of the whole stream to a granule - position in an individual link.*/ -static ogg_int64_t op_get_granulepos(const OggOpusFile *_of, - ogg_int64_t _pcm_offset,int *_li){ - const OggOpusLink *links; - ogg_int64_t duration; - int nlinks; - int li; - OP_ASSERT(_pcm_offset>=0); - nlinks=_of->nlinks; - links=_of->links; - for(li=0;OP_LIKELY(li<nlinks);li++){ - ogg_int64_t pcm_start; - opus_int32 pre_skip; - pcm_start=links[li].pcm_start; - pre_skip=links[li].head.pre_skip; - OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li].pcm_end,pcm_start)); - duration-=pre_skip; - if(_pcm_offset<duration){ - _pcm_offset+=pre_skip; - if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){ - /*Adding this amount to the granule position would overflow the positive - half of its 64-bit range. - Since signed overflow is undefined in C, do it in a way the compiler - isn't allowed to screw up.*/ - _pcm_offset-=OP_INT64_MAX-pcm_start+1; - pcm_start=OP_INT64_MIN; - } - pcm_start+=_pcm_offset; - *_li=li; - return pcm_start; - } - _pcm_offset-=duration; - } - return -1; -} - -/*A small helper to determine if an Ogg page contains data that continues onto - a subsequent page.*/ -static int op_page_continues(const ogg_page *_og){ - int nlacing; - OP_ASSERT(_og->header_len>=27); - nlacing=_og->header[26]; - OP_ASSERT(_og->header_len>=27+nlacing); - /*This also correctly handles the (unlikely) case of nlacing==0, because - 0!=255.*/ - return _og->header[27+nlacing-1]==255; -} - -/*A small helper to buffer the continued packet data from a page.*/ -static void op_buffer_continued_data(OggOpusFile *_of,ogg_page *_og){ - ogg_packet op; - ogg_stream_pagein(&_of->os,_og); - /*Drain any packets that did end on this page (and ignore holes). - We only care about the continued packet data.*/ - while(ogg_stream_packetout(&_of->os,&op)); -} - -/*This controls how close the target has to be to use the current stream - position to subdivide the initial range. - Two minutes seems to be a good default.*/ -#define OP_CUR_TIME_THRESH (120*48*(opus_int32)1000) - -/*Note: The OP_SMALL_FOOTPRINT #define doesn't (currently) save much code size, - but it's meant to serve as documentation for portions of the seeking - algorithm that are purely optional, to aid others learning from/porting this - code to other contexts.*/ -/*#define OP_SMALL_FOOTPRINT (1)*/ - -/*Search within link _li for the page with the highest granule position - preceding (or equal to) _target_gp. - There is a danger here: missing pages or incorrect frame number information - in the bitstream could make our task impossible. - Account for that (and report it as an error condition).*/ -static int op_pcm_seek_page(OggOpusFile *_of, - ogg_int64_t _target_gp,int _li){ - const OggOpusLink *link; - ogg_page og; - ogg_int64_t pcm_pre_skip; - ogg_int64_t pcm_start; - ogg_int64_t pcm_end; - ogg_int64_t best_gp; - ogg_int64_t diff; - ogg_uint32_t serialno; - opus_int32 pre_skip; - opus_int64 begin; - opus_int64 end; - opus_int64 boundary; - opus_int64 best; - opus_int64 best_start; - opus_int64 page_offset; - opus_int64 d0; - opus_int64 d1; - opus_int64 d2; - int force_bisect; - int buffering; - int ret; - _of->bytes_tracked=0; - _of->samples_tracked=0; - link=_of->links+_li; - best_gp=pcm_start=link->pcm_start; - pcm_end=link->pcm_end; - serialno=link->serialno; - best=best_start=begin=link->data_offset; - page_offset=-1; - buffering=0; - /*We discard the first 80 ms of data after a seek, so seek back that much - farther. - If we can't, simply seek to the beginning of the link.*/ - if(OP_UNLIKELY(op_granpos_add(&_target_gp,_target_gp,-80*48)<0) - ||OP_UNLIKELY(op_granpos_cmp(_target_gp,pcm_start)<0)){ - _target_gp=pcm_start; - } - /*Special case seeking to the start of the link.*/ - pre_skip=link->head.pre_skip; - OP_ALWAYS_TRUE(!op_granpos_add(&pcm_pre_skip,pcm_start,pre_skip)); - if(op_granpos_cmp(_target_gp,pcm_pre_skip)<0)end=boundary=begin; - else{ - end=boundary=link->end_offset; -#if !defined(OP_SMALL_FOOTPRINT) - /*If we were decoding from this link, we can narrow the range a bit.*/ - if(_li==_of->cur_link&&_of->ready_state>=OP_INITSET){ - opus_int64 offset; - int op_count; - op_count=_of->op_count; - /*The only way the offset can be invalid _and_ we can fail the granule - position checks below is if someone changed the contents of the last - page since we read it. - We'd be within our rights to just return OP_EBADLINK in that case, but - we'll simply ignore the current position instead.*/ - offset=_of->offset; - if(op_count>0&&OP_LIKELY(offset<=end)){ - ogg_int64_t gp; - /*Make sure the timestamp is valid. - The granule position might be -1 if we collected the packets from a - page without a granule position after reporting a hole.*/ - gp=_of->op[op_count-1].granulepos; - if(OP_LIKELY(gp!=-1)&&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<0) - &&OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)){ - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,gp,_target_gp)); - /*We only actually use the current time if either - a) We can cut off at least half the range, or - b) We're seeking sufficiently close to the current position that - it's likely to be informative. - Otherwise it appears using the whole link range to estimate the - first seek location gives better results, on average.*/ - if(diff<0){ - OP_ASSERT(offset>=begin); - if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){ - best=begin=offset; - best_gp=pcm_start=gp; - /*If we have buffered data from a continued packet, remember the - offset of the previous page's start, so that if we do wind up - having to seek back here later, we can prime the stream with - the continued packet data. - With no continued packet, we remember the end of the page.*/ - best_start=_of->os.body_returned<_of->os.body_fill? - _of->prev_page_offset:best; - /*If there's completed packets and data in the stream state, - prev_page_offset should always be set.*/ - OP_ASSERT(best_start>=0); - /*Buffer any continued packet data starting from here.*/ - buffering=1; - } - } - else{ - ogg_int64_t prev_page_gp; - /*We might get lucky and already have the packet with the target - buffered. - Worth checking. - For very small files (with all of the data in a single page, - generally 1 second or less), we can loop them continuously - without seeking at all.*/ - OP_ALWAYS_TRUE(!op_granpos_add(&prev_page_gp,_of->op[0].granulepos, - -op_get_packet_duration(_of->op[0].packet,_of->op[0].bytes))); - if(op_granpos_cmp(prev_page_gp,_target_gp)<=0){ - /*Don't call op_decode_clear(), because it will dump our - packets.*/ - _of->op_pos=0; - _of->od_buffer_size=0; - _of->prev_packet_gp=prev_page_gp; - /*_of->prev_page_offset already points to the right place.*/ - _of->ready_state=OP_STREAMSET; - return op_make_decode_ready(_of); - } - /*No such luck. - Check if we can cut off at least half the range, though.*/ - if(offset-begin<=end-begin>>1||diff<OP_CUR_TIME_THRESH){ - /*We really want the page start here, but this will do.*/ - end=boundary=offset; - pcm_end=gp; - } - } - } - } - } -#endif - } - /*This code was originally based on the "new search algorithm by HB (Nicholas - Vinen)" from libvorbisfile. - It has been modified substantially since.*/ - op_decode_clear(_of); - if(!buffering)ogg_stream_reset_serialno(&_of->os,serialno); - _of->cur_link=_li; - _of->ready_state=OP_STREAMSET; - /*Initialize the interval size history.*/ - d2=d1=d0=end-begin; - force_bisect=0; - while(begin<end){ - opus_int64 bisect; - opus_int64 next_boundary; - opus_int32 chunk_size; - if(end-begin<OP_CHUNK_SIZE)bisect=begin; - else{ - /*Update the interval size history.*/ - d0=d1>>1; - d1=d2>>1; - d2=end-begin>>1; - if(force_bisect)bisect=begin+(end-begin>>1); - else{ - ogg_int64_t diff2; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start)); - OP_ALWAYS_TRUE(!op_granpos_diff(&diff2,pcm_end,pcm_start)); - /*Take a (pretty decent) guess.*/ - bisect=begin+op_rescale64(diff,diff2,end-begin)-OP_CHUNK_SIZE; - } - if(bisect-OP_CHUNK_SIZE<begin)bisect=begin; - force_bisect=0; - } - if(bisect!=_of->offset){ - /*Discard any buffered continued packet data.*/ - if(buffering)ogg_stream_reset(&_of->os); - buffering=0; - page_offset=-1; - ret=op_seek_helper(_of,bisect); - if(OP_UNLIKELY(ret<0))return ret; - } - chunk_size=OP_CHUNK_SIZE; - next_boundary=boundary; - /*Now scan forward and figure out where we landed. - In the ideal case, we will see a page with a granule position at or - before our target, followed by a page with a granule position after our - target (or the end of the search interval). - Then we can just drop out and will have all of the data we need with no - additional seeking. - If we landed too far before, or after, we'll break out and do another - bisection.*/ - while(begin<end){ - page_offset=op_get_next_page(_of,&og,boundary); - if(page_offset<0){ - if(page_offset<OP_FALSE)return (int)page_offset; - /*There are no more pages in our interval from our stream with a valid - timestamp that start at position bisect or later.*/ - /*If we scanned the whole interval, we're done.*/ - if(bisect<=begin+1)end=begin; - else{ - /*Otherwise, back up one chunk. - First, discard any data from a continued packet.*/ - if(buffering)ogg_stream_reset(&_of->os); - buffering=0; - bisect=OP_MAX(bisect-chunk_size,begin); - ret=op_seek_helper(_of,bisect); - if(OP_UNLIKELY(ret<0))return ret; - /*Bump up the chunk size.*/ - chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); - /*If we did find a page from another stream or without a timestamp, - don't read past it.*/ - boundary=next_boundary; - } - } - else{ - ogg_int64_t gp; - int has_packets; - /*Save the offset of the first page we found after the seek, regardless - of the stream it came from or whether or not it has a timestamp.*/ - next_boundary=OP_MIN(page_offset,next_boundary); - if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue; - has_packets=ogg_page_packets(&og)>0; - /*Force the gp to -1 (as it should be per spec) if no packets end on - this page. - Otherwise we might get confused when we try to pull out a packet - with that timestamp and can't find it.*/ - gp=has_packets?ogg_page_granulepos(&og):-1; - if(gp==-1){ - if(buffering){ - if(OP_LIKELY(!has_packets))ogg_stream_pagein(&_of->os,&og); - else{ - /*If packets did end on this page, but we still didn't have a - valid granule position (in violation of the spec!), stop - buffering continued packet data. - Otherwise we might continue past the packet we actually - wanted.*/ - ogg_stream_reset(&_of->os); - buffering=0; - } - } - continue; - } - if(op_granpos_cmp(gp,_target_gp)<0){ - /*We found a page that ends before our target. - Advance to the raw offset of the next page.*/ - begin=_of->offset; - if(OP_UNLIKELY(op_granpos_cmp(pcm_start,gp)>0) - ||OP_UNLIKELY(op_granpos_cmp(pcm_end,gp)<0)){ - /*Don't let pcm_start get out of range! - That could happen with an invalid timestamp.*/ - break; - } - /*Save the byte offset of the end of the page with this granule - position.*/ - best=best_start=begin; - /*Buffer any data from a continued packet, if necessary. - This avoids the need to seek back here if the next timestamp we - encounter while scanning forward lies after our target.*/ - if(buffering)ogg_stream_reset(&_of->os); - if(op_page_continues(&og)){ - op_buffer_continued_data(_of,&og); - /*If we have a continued packet, remember the offset of this - page's start, so that if we do wind up having to seek back here - later, we can prime the stream with the continued packet data. - With no continued packet, we remember the end of the page.*/ - best_start=page_offset; - } - /*Then force buffering on, so that if a packet starts (but does not - end) on the next page, we still avoid the extra seek back.*/ - buffering=1; - best_gp=pcm_start=gp; - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start)); - /*If we're more than a second away from our target, break out and - do another bisection.*/ - if(diff>48000)break; - /*Otherwise, keep scanning forward (do NOT use begin+1).*/ - bisect=begin; - } - else{ - /*We found a page that ends after our target.*/ - /*If we scanned the whole interval before we found it, we're done.*/ - if(bisect<=begin+1)end=begin; - else{ - end=bisect; - /*In later iterations, don't read past the first page we found.*/ - boundary=next_boundary; - /*If we're not making much progress shrinking the interval size, - start forcing straight bisection to limit the worst case.*/ - force_bisect=end-begin>d0*2; - /*Don't let pcm_end get out of range! - That could happen with an invalid timestamp.*/ - if(OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0) - &&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<=0)){ - pcm_end=gp; - } - break; - } - } - } - } - } - /*Found our page.*/ - OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0); - /*Seek, if necessary. - If we were buffering data from a continued packet, we should be able to - continue to scan forward to get the rest of the data (even if - page_offset==-1). - Otherwise, we need to seek back to best_start.*/ - if(!buffering){ - if(best_start!=page_offset){ - page_offset=-1; - ret=op_seek_helper(_of,best_start); - if(OP_UNLIKELY(ret<0))return ret; - } - if(best_start<best){ - /*Retrieve the page at best_start, if we do not already have it.*/ - if(page_offset<0){ - page_offset=op_get_next_page(_of,&og,link->end_offset); - if(OP_UNLIKELY(page_offset<OP_FALSE))return (int)page_offset; - if(OP_UNLIKELY(page_offset!=best_start))return OP_EBADLINK; - } - op_buffer_continued_data(_of,&og); - page_offset=-1; - } - } - /*Update prev_packet_gp to allow per-packet granule position assignment.*/ - _of->prev_packet_gp=best_gp; - _of->prev_page_offset=best_start; - ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,0,1); - if(OP_UNLIKELY(ret<0))return OP_EBADLINK; - /*Verify result.*/ - if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){ - return OP_EBADLINK; - } - /*Our caller will set cur_discard_count to handle pre-roll.*/ - return 0; -} - -int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){ - const OggOpusLink *link; - ogg_int64_t pcm_start; - ogg_int64_t target_gp; - ogg_int64_t prev_packet_gp; - ogg_int64_t skip; - ogg_int64_t diff; - int op_count; - int op_pos; - int ret; - int li; - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK; - if(OP_UNLIKELY(_pcm_offset<0))return OP_EINVAL; - target_gp=op_get_granulepos(_of,_pcm_offset,&li); - if(OP_UNLIKELY(target_gp==-1))return OP_EINVAL; - link=_of->links+li; - pcm_start=link->pcm_start; - OP_ALWAYS_TRUE(!op_granpos_diff(&_pcm_offset,target_gp,pcm_start)); -#if !defined(OP_SMALL_FOOTPRINT) - /*For small (90 ms or less) forward seeks within the same link, just decode - forward. - This also optimizes the case of seeking to the current position.*/ - if(li==_of->cur_link&&_of->ready_state>=OP_INITSET){ - ogg_int64_t gp; - gp=_of->prev_packet_gp; - if(OP_LIKELY(gp!=-1)){ - int nbuffered; - nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); - OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); - /*We do _not_ add cur_discard_count to gp. - Otherwise the total amount to discard could grow without bound, and it - would be better just to do a full seek.*/ - if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){ - ogg_int64_t discard_count; - discard_count=_pcm_offset-diff; - /*We use a threshold of 90 ms instead of 80, since 80 ms is the - _minimum_ we would have discarded after a full seek. - Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/ - if(discard_count>=0&&OP_UNLIKELY(discard_count<90*48)){ - _of->cur_discard_count=(opus_int32)discard_count; - return 0; - } - } - } - } -#endif - ret=op_pcm_seek_page(_of,target_gp,li); - if(OP_UNLIKELY(ret<0))return ret; - /*Now skip samples until we actually get to our target.*/ - /*Figure out where we should skip to.*/ - if(_pcm_offset<=link->head.pre_skip)skip=0; - else skip=OP_MAX(_pcm_offset-80*48,0); - OP_ASSERT(_pcm_offset-skip>=0); - OP_ASSERT(_pcm_offset-skip<OP_INT32_MAX-120*48); - /*Skip packets until we find one with samples past our skip target.*/ - for(;;){ - op_count=_of->op_count; - prev_packet_gp=_of->prev_packet_gp; - for(op_pos=_of->op_pos;op_pos<op_count;op_pos++){ - ogg_int64_t cur_packet_gp; - cur_packet_gp=_of->op[op_pos].granulepos; - if(OP_LIKELY(!op_granpos_diff(&diff,cur_packet_gp,pcm_start)) - &&diff>skip){ - break; - } - prev_packet_gp=cur_packet_gp; - } - _of->prev_packet_gp=prev_packet_gp; - _of->op_pos=op_pos; - if(op_pos<op_count)break; - /*We skipped all the packets on this page. - Fetch another.*/ - ret=op_fetch_and_process_page(_of,NULL,-1,0,1); - if(OP_UNLIKELY(ret<0))return OP_EBADLINK; - } - OP_ALWAYS_TRUE(!op_granpos_diff(&diff,prev_packet_gp,pcm_start)); - /*We skipped too far. - Either the timestamps were illegal or there was a hole in the data.*/ - if(diff>skip)return OP_EBADLINK; - OP_ASSERT(_pcm_offset-diff<OP_INT32_MAX); - /*TODO: If there are further holes/illegal timestamps, we still won't decode - to the correct sample. - However, at least op_pcm_tell() will report the correct value immediately - after returning.*/ - _of->cur_discard_count=(opus_int32)(_pcm_offset-diff); - return 0; -} - -opus_int64 op_raw_tell(const OggOpusFile *_of){ - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - return _of->offset; -} - -/*Convert a granule position from a given link to a PCM offset relative to the - start of the whole stream. - For unseekable sources, this gets reset to 0 at the beginning of each link.*/ -static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of, - ogg_int64_t _gp,int _li){ - const OggOpusLink *links; - ogg_int64_t pcm_offset; - ogg_int64_t delta; - int li; - links=_of->links; - pcm_offset=0; - OP_ASSERT(_li<_of->nlinks); - for(li=0;li<_li;li++){ - OP_ALWAYS_TRUE(!op_granpos_diff(&delta, - links[li].pcm_end,links[li].pcm_start)); - delta-=links[li].head.pre_skip; - pcm_offset+=delta; - } - OP_ASSERT(_li>=0); - if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){ - _gp=links[_li].pcm_end; - } - if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){ - if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){ - /*This means an unseekable stream claimed to have a page from more than - 2 billion days after we joined.*/ - OP_ASSERT(!_of->seekable); - return OP_INT64_MAX; - } - if(delta<links[_li].head.pre_skip)delta=0; - else delta-=links[_li].head.pre_skip; - /*In the seekable case, _gp was limited by pcm_end. - In the unseekable case, pcm_offset should be 0.*/ - OP_ASSERT(pcm_offset<=OP_INT64_MAX-delta); - pcm_offset+=delta; - } - return pcm_offset; -} - -ogg_int64_t op_pcm_tell(const OggOpusFile *_of){ - ogg_int64_t gp; - int nbuffered; - int li; - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - gp=_of->prev_packet_gp; - if(gp==-1)return 0; - nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); - OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); - li=_of->seekable?_of->cur_link:0; - if(op_granpos_add(&gp,gp,_of->cur_discard_count)<0){ - gp=_of->links[li].pcm_end; - } - return op_get_pcm_offset(_of,gp,li); -} - -void op_set_decode_callback(OggOpusFile *_of, - op_decode_cb_func _decode_cb,void *_ctx){ - _of->decode_cb=_decode_cb; - _of->decode_cb_ctx=_ctx; -} - -int op_set_gain_offset(OggOpusFile *_of, - int _gain_type,opus_int32 _gain_offset_q8){ - if(_gain_type!=OP_HEADER_GAIN&&_gain_type!=OP_ALBUM_GAIN - &&_gain_type!=OP_TRACK_GAIN&&_gain_type!=OP_ABSOLUTE_GAIN){ - return OP_EINVAL; - } - _of->gain_type=_gain_type; - /*The sum of header gain and track gain lies in the range [-65536,65534]. - These bounds allow the offset to set the final value to anywhere in the - range [-32768,32767], which is what we'll clamp it to before applying.*/ - _of->gain_offset_q8=OP_CLAMP(-98302,_gain_offset_q8,98303); - op_update_gain(_of); - return 0; -} - -void op_set_dither_enabled(OggOpusFile *_of,int _enabled){ -#if !defined(OP_FIXED_POINT) - _of->dither_disabled=!_enabled; - if(!_enabled)_of->dither_mute=65; -#endif -} - -/*Allocate the decoder scratch buffer. - This is done lazily, since if the user provides large enough buffers, we'll - never need it.*/ -static int op_init_buffer(OggOpusFile *_of){ - int nchannels_max; - if(_of->seekable){ - const OggOpusLink *links; - int nlinks; - int li; - links=_of->links; - nlinks=_of->nlinks; - nchannels_max=1; - for(li=0;li<nlinks;li++){ - nchannels_max=OP_MAX(nchannels_max,links[li].head.channel_count); - } - } - else nchannels_max=OP_NCHANNELS_MAX; - _of->od_buffer=(op_sample *)_ogg_malloc( - sizeof(*_of->od_buffer)*nchannels_max*120*48); - if(_of->od_buffer==NULL)return OP_EFAULT; - return 0; -} - -/*Decode a single packet into the target buffer.*/ -static int op_decode(OggOpusFile *_of,op_sample *_pcm, - const ogg_packet *_op,int _nsamples,int _nchannels){ - int ret; - /*First we try using the application-provided decode callback.*/ - if(_of->decode_cb!=NULL){ -#if defined(OP_FIXED_POINT) - ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op, - _nsamples,_nchannels,OP_DEC_FORMAT_SHORT,_of->cur_link); -#else - ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op, - _nsamples,_nchannels,OP_DEC_FORMAT_FLOAT,_of->cur_link); -#endif - } - else ret=OP_DEC_USE_DEFAULT; - /*If the application didn't want to handle decoding, do it ourselves.*/ - if(ret==OP_DEC_USE_DEFAULT){ -#if defined(OP_FIXED_POINT) - ret=opus_multistream_decode(_of->od, - _op->packet,_op->bytes,_pcm,_nsamples,0); -#else - ret=opus_multistream_decode_float(_of->od, - _op->packet,_op->bytes,_pcm,_nsamples,0); -#endif - OP_ASSERT(ret<0||ret==_nsamples); - } - /*If the application returned a positive value other than 0 or - OP_DEC_USE_DEFAULT, fail.*/ - else if(OP_UNLIKELY(ret>0))return OP_EBADPACKET; - if(OP_UNLIKELY(ret<0))return OP_EBADPACKET; - return ret; -} - -/*Read more samples from the stream, using the same API as op_read() or - op_read_float().*/ -static int op_read_native(OggOpusFile *_of, - op_sample *_pcm,int _buf_size,int *_li){ - if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; - for(;;){ - int ret; - if(OP_LIKELY(_of->ready_state>=OP_INITSET)){ - int nchannels; - int od_buffer_pos; - int nsamples; - int op_pos; - nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count; - od_buffer_pos=_of->od_buffer_pos; - nsamples=_of->od_buffer_size-od_buffer_pos; - /*If we have buffered samples, return them.*/ - if(nsamples>0){ - if(nsamples*nchannels>_buf_size)nsamples=_buf_size/nchannels; - memcpy(_pcm,_of->od_buffer+nchannels*od_buffer_pos, - sizeof(*_pcm)*nchannels*nsamples); - od_buffer_pos+=nsamples; - _of->od_buffer_pos=od_buffer_pos; - if(_li!=NULL)*_li=_of->cur_link; - return nsamples; - } - /*If we have buffered packets, decode one.*/ - op_pos=_of->op_pos; - if(OP_LIKELY(op_pos<_of->op_count)){ - const ogg_packet *pop; - ogg_int64_t diff; - opus_int32 cur_discard_count; - int duration; - int trimmed_duration; - pop=_of->op+op_pos++; - _of->op_pos=op_pos; - cur_discard_count=_of->cur_discard_count; - duration=op_get_packet_duration(pop->packet,pop->bytes); - /*We don't buffer packets with an invalid TOC sequence.*/ - OP_ASSERT(duration>0); - trimmed_duration=duration; - /*Perform end-trimming.*/ - if(OP_UNLIKELY(pop->e_o_s)){ - if(OP_UNLIKELY(op_granpos_cmp(pop->granulepos, - _of->prev_packet_gp)<=0)){ - trimmed_duration=0; - } - else if(OP_LIKELY(!op_granpos_diff(&diff, - pop->granulepos,_of->prev_packet_gp))){ - trimmed_duration=(int)OP_MIN(diff,trimmed_duration); - } - } - _of->prev_packet_gp=pop->granulepos; - if(OP_UNLIKELY(duration*nchannels>_buf_size)){ - op_sample *buf; - /*If the user's buffer is too small, decode into a scratch buffer.*/ - buf=_of->od_buffer; - if(OP_UNLIKELY(buf==NULL)){ - ret=op_init_buffer(_of); - if(OP_UNLIKELY(ret<0))return ret; - buf=_of->od_buffer; - } - ret=op_decode(_of,buf,pop,duration,nchannels); - if(OP_UNLIKELY(ret<0))return ret; - /*Perform pre-skip/pre-roll.*/ - od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count); - cur_discard_count-=od_buffer_pos; - _of->cur_discard_count=cur_discard_count; - _of->od_buffer_pos=od_buffer_pos; - _of->od_buffer_size=trimmed_duration; - /*Update bitrate tracking based on the actual samples we used from - what was decoded.*/ - _of->bytes_tracked+=pop->bytes; - _of->samples_tracked+=trimmed_duration-od_buffer_pos; - } - else{ - /*Otherwise decode directly into the user's buffer.*/ - ret=op_decode(_of,_pcm,pop,duration,nchannels); - if(OP_UNLIKELY(ret<0))return ret; - if(OP_LIKELY(trimmed_duration>0)){ - /*Perform pre-skip/pre-roll.*/ - od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count); - cur_discard_count-=od_buffer_pos; - _of->cur_discard_count=cur_discard_count; - trimmed_duration-=od_buffer_pos; - if(OP_LIKELY(trimmed_duration>0) - &&OP_UNLIKELY(od_buffer_pos>0)){ - memmove(_pcm,_pcm+od_buffer_pos*nchannels, - sizeof(*_pcm)*trimmed_duration*nchannels); - } - /*Update bitrate tracking based on the actual samples we used from - what was decoded.*/ - _of->bytes_tracked+=pop->bytes; - _of->samples_tracked+=trimmed_duration; - if(OP_LIKELY(trimmed_duration>0)){ - if(_li!=NULL)*_li=_of->cur_link; - return trimmed_duration; - } - } - } - /*Don't grab another page yet. - This one might have more packets, or might have buffered data now.*/ - continue; - } - } - /*Suck in another page.*/ - ret=op_fetch_and_process_page(_of,NULL,-1,1,0); - if(OP_UNLIKELY(ret==OP_EOF)){ - if(_li!=NULL)*_li=_of->cur_link; - return 0; - } - if(OP_UNLIKELY(ret<0))return ret; - } -} - -/*A generic filter to apply to the decoded audio data. - _src is non-const because we will destructively modify the contents of the - source buffer that we consume in some cases.*/ -typedef int (*op_read_filter_func)(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels); - -/*Decode some samples and then apply a custom filter to them. - This is used to convert to different output formats.*/ -static int op_filter_read_native(OggOpusFile *_of,void *_dst,int _dst_sz, - op_read_filter_func _filter,int *_li){ - int ret; - /*Ensure we have some decoded samples in our buffer.*/ - ret=op_read_native(_of,NULL,0,_li); - /*Now apply the filter to them.*/ - if(OP_LIKELY(ret>=0)&&OP_LIKELY(_of->ready_state>=OP_INITSET)){ - int od_buffer_pos; - od_buffer_pos=_of->od_buffer_pos; - ret=_of->od_buffer_size-od_buffer_pos; - if(OP_LIKELY(ret>0)){ - int nchannels; - nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count; - ret=(*_filter)(_of,_dst,_dst_sz, - _of->od_buffer+nchannels*od_buffer_pos,ret,nchannels); - OP_ASSERT(ret>=0); - OP_ASSERT(ret<=_of->od_buffer_size-od_buffer_pos); - od_buffer_pos+=ret; - _of->od_buffer_pos=od_buffer_pos; - } - } - return ret; -} - -#if !defined(OP_FIXED_POINT)||!defined(OP_DISABLE_FLOAT_API) - -/*Matrices for downmixing from the supported channel counts to stereo. - The matrices with 5 or more channels are normalized to a total volume of 2.0, - since most mixes sound too quiet if normalized to 1.0 (as there is generally - little volume in the side/rear channels).*/ -static const float OP_STEREO_DOWNMIX[OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={ - /*3.0*/ - { - {0.5858F,0.0F},{0.4142F,0.4142F},{0.0F,0.5858F} - }, - /*quadrophonic*/ - { - {0.4226F,0.0F},{0.0F,0.4226F},{0.366F,0.2114F},{0.2114F,0.336F} - }, - /*5.0*/ - { - {0.651F,0.0F},{0.46F,0.46F},{0.0F,0.651F},{0.5636F,0.3254F}, - {0.3254F,0.5636F} - }, - /*5.1*/ - { - {0.529F,0.0F},{0.3741F,0.3741F},{0.0F,0.529F},{0.4582F,0.2645F}, - {0.2645F,0.4582F},{0.3741F,0.3741F} - }, - /*6.1*/ - { - {0.4553F,0.0F},{0.322F,0.322F},{0.0F,0.4553F},{0.3943F,0.2277F}, - {0.2277F,0.3943F},{0.2788F,0.2788F},{0.322F,0.322F} - }, - /*7.1*/ - { - {0.3886F,0.0F},{0.2748F,0.2748F},{0.0F,0.3886F},{0.3366F,0.1943F}, - {0.1943F,0.3366F},{0.3366F,0.1943F},{0.1943F,0.3366F},{0.2748F,0.2748F} - } -}; - -#endif - -#if defined(OP_FIXED_POINT) - -/*Matrices for downmixing from the supported channel counts to stereo. - The matrices with 5 or more channels are normalized to a total volume of 2.0, - since most mixes sound too quiet if normalized to 1.0 (as there is generally - little volume in the side/rear channels). - Hence we keep the coefficients in Q14, so the downmix values won't overflow a - 32-bit number.*/ -static const opus_int16 OP_STEREO_DOWNMIX_Q14 - [OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={ - /*3.0*/ - { - {9598,0},{6786,6786},{0,9598} - }, - /*quadrophonic*/ - { - {6924,0},{0,6924},{5996,3464},{3464,5996} - }, - /*5.0*/ - { - {10666,0},{7537,7537},{0,10666},{9234,5331},{5331,9234} - }, - /*5.1*/ - { - {8668,0},{6129,6129},{0,8668},{7507,4335},{4335,7507},{6129,6129} - }, - /*6.1*/ - { - {7459,0},{5275,5275},{0,7459},{6460,3731},{3731,6460},{4568,4568}, - {5275,5275} - }, - /*7.1*/ - { - {6368,0},{4502,4502},{0,6368},{5515,3183},{3183,5515},{5515,3183}, - {3183,5515},{4502,4502} - } -}; - -int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){ - return op_read_native(_of,_pcm,_buf_size,_li); -} - -static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels){ - (void)_of; - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src)); - else{ - opus_int16 *dst; - int i; - dst=(opus_int16 *)_dst; - if(_nchannels==1){ - for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i]; - } - else{ - for(i=0;i<_nsamples;i++){ - opus_int32 l; - opus_int32 r; - int ci; - l=r=0; - for(ci=0;ci<_nchannels;ci++){ - opus_int32 s; - s=_src[_nchannels*i+ci]; - l+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][0]*s; - r+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][1]*s; - } - /*TODO: For 5 or more channels, we should do soft clipping here.*/ - dst[2*i+0]=(opus_int16)OP_CLAMP(-32768,l+8192>>14,32767); - dst[2*i+1]=(opus_int16)OP_CLAMP(-32768,r+8192>>14,32767); - } - } - } - return _nsamples; -} - -int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){ - return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL); -} - -# if !defined(OP_DISABLE_FLOAT_API) - -static int op_short2float_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels){ - float *dst; - int i; - (void)_of; - dst=(float *)_dst; - if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels; - _dst_sz=_nsamples*_nchannels; - for(i=0;i<_dst_sz;i++)dst[i]=(1.0F/32768)*_src[i]; - return _nsamples; -} - -int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){ - return op_filter_read_native(_of,_pcm,_buf_size,op_short2float_filter,_li); -} - -static int op_short2float_stereo_filter(OggOpusFile *_of, - void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){ - float *dst; - int i; - dst=(float *)_dst; - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - if(_nchannels==1){ - _nsamples=op_short2float_filter(_of,dst,_nsamples,_src,_nsamples,1); - for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i]; - } - else if(_nchannels<5){ - /*For 3 or 4 channels, we can downmix in fixed point without risk of - clipping.*/ - if(_nchannels>2){ - _nsamples=op_stereo_filter(_of,_src,_nsamples*2, - _src,_nsamples,_nchannels); - } - return op_short2float_filter(_of,dst,_dst_sz,_src,_nsamples,2); - } - else{ - /*For 5 or more channels, we convert to floats and then downmix (so that we - don't risk clipping).*/ - for(i=0;i<_nsamples;i++){ - float l; - float r; - int ci; - l=r=0; - for(ci=0;ci<_nchannels;ci++){ - float s; - s=(1.0F/32768)*_src[_nchannels*i+ci]; - l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*s; - r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*s; - } - dst[2*i+0]=l; - dst[2*i+1]=r; - } - } - return _nsamples; -} - -int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){ - return op_filter_read_native(_of,_pcm,_buf_size, - op_short2float_stereo_filter,NULL); -} - -# endif - -#else - -# if defined(OP_HAVE_LRINTF) -# include <math.h> -# define op_float2int(_x) (lrintf(_x)) -# else -# define op_float2int(_x) ((int)((_x)+((_x)<0?-0.5F:0.5F))) -# endif - -/*The dithering code here is adapted from opusdec, part of opus-tools. - It was originally written by Greg Maxwell.*/ - -static opus_uint32 op_rand(opus_uint32 _seed){ - return _seed*96314165+907633515&0xFFFFFFFFU; -} - -/*This implements 16-bit quantization with full triangular dither and IIR noise - shaping. - The noise shaping filters were designed by Sebastian Gesemann, and are based - on the LAME ATH curves with flattening to limit their peak gain to 20 dB. - Everyone else's noise shaping filters are mildly crazy. - The 48 kHz version of this filter is just a warped version of the 44.1 kHz - filter and probably could be improved by shifting the HF shelf up in - frequency a little bit, since 48 kHz has a bit more room and being more - conservative against bat-ears is probably more important than more noise - suppression. - This process can increase the peak level of the signal (in theory by the peak - error of 1.5 +20 dB, though that is unobservably rare). - To avoid clipping, the signal is attenuated by a couple thousandths of a dB. - Initially, the approach taken here was to only attenuate by the 99.9th - percentile, making clipping rare but not impossible (like SoX), but the - limited gain of the filter means that the worst case was only two - thousandths of a dB more, so this just uses the worst case. - The attenuation is probably also helpful to prevent clipping in the DAC - reconstruction filters or downstream resampling, in any case.*/ - -# define OP_GAIN (32753.0F) - -# define OP_PRNG_GAIN (1.0F/0xFFFFFFFF) - -/*48 kHz noise shaping filter, sd=2.34.*/ - -static const float OP_FCOEF_B[4]={ - 2.2374F,-0.7339F,-0.1251F,-0.6033F -}; - -static const float OP_FCOEF_A[4]={ - 0.9030F,0.0116F,-0.5853F,-0.2571F -}; - -static int op_float2short_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - float *_src,int _nsamples,int _nchannels){ - opus_int16 *dst; - int ci; - int i; - dst=(opus_int16 *)_dst; - if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels; -# if defined(OP_SOFT_CLIP) - if(_of->state_channel_count!=_nchannels){ - for(ci=0;ci<_nchannels;ci++)_of->clip_state[ci]=0; - } - opus_pcm_soft_clip(_src,_nsamples,_nchannels,_of->clip_state); -# endif - if(_of->dither_disabled){ - for(i=0;i<_nchannels*_nsamples;i++){ - dst[i]=op_float2int(OP_CLAMP(-32768,32768.0F*_src[i],32767)); - } - } - else{ - opus_uint32 seed; - int mute; - seed=_of->dither_seed; - mute=_of->dither_mute; - if(_of->state_channel_count!=_nchannels)mute=65; - /*In order to avoid replacing digital silence with quiet dither noise, we - mute if the output has been silent for a while.*/ - if(mute>64)memset(_of->dither_a,0,sizeof(*_of->dither_a)*4*_nchannels); - for(i=0;i<_nsamples;i++){ - int silent; - silent=1; - for(ci=0;ci<_nchannels;ci++){ - float r; - float s; - float err; - int si; - int j; - s=_src[_nchannels*i+ci]; - silent&=s==0; - s*=OP_GAIN; - err=0; - for(j=0;j<4;j++){ - err+=OP_FCOEF_B[j]*_of->dither_b[ci*4+j] - -OP_FCOEF_A[j]*_of->dither_a[ci*4+j]; - } - for(j=3;j-->0;)_of->dither_a[ci*4+j+1]=_of->dither_a[ci*4+j]; - for(j=3;j-->0;)_of->dither_b[ci*4+j+1]=_of->dither_b[ci*4+j]; - _of->dither_a[ci*4]=err; - s-=err; - if(mute>16)r=0; - else{ - seed=op_rand(seed); - r=seed*OP_PRNG_GAIN; - seed=op_rand(seed); - r-=seed*OP_PRNG_GAIN; - } - /*Clamp in float out of paranoia that the input will be > 96 dBFS and - wrap if the integer is clamped.*/ - si=op_float2int(OP_CLAMP(-32768,s+r,32767)); - dst[_nchannels*i+ci]=(opus_int16)si; - /*Including clipping in the noise shaping is generally disastrous: the - futile effort to restore the clipped energy results in more clipping. - However, small amounts---at the level which could normally be created - by dither and rounding---are harmless and can even reduce clipping - somewhat due to the clipping sometimes reducing the dither + rounding - error.*/ - _of->dither_b[ci*4]=mute>16?0:OP_CLAMP(-1.5F,si-s,1.5F); - } - mute++; - if(!silent)mute=0; - } - _of->dither_mute=OP_MIN(mute,65); - _of->dither_seed=seed; - } - _of->state_channel_count=_nchannels; - return _nsamples; -} - -int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){ - return op_filter_read_native(_of,_pcm,_buf_size,op_float2short_filter,_li); -} - -int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){ - _of->state_channel_count=0; - return op_read_native(_of,_pcm,_buf_size,_li); -} - -static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz, - op_sample *_src,int _nsamples,int _nchannels){ - (void)_of; - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src)); - else{ - float *dst; - int i; - dst=(float *)_dst; - if(_nchannels==1){ - for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i]; - } - else{ - for(i=0;i<_nsamples;i++){ - float l; - float r; - int ci; - l=r=0; - for(ci=0;ci<_nchannels;ci++){ - l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*_src[_nchannels*i+ci]; - r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*_src[_nchannels*i+ci]; - } - dst[2*i+0]=l; - dst[2*i+1]=r; - } - } - } - return _nsamples; -} - -static int op_float2short_stereo_filter(OggOpusFile *_of, - void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){ - opus_int16 *dst; - dst=(opus_int16 *)_dst; - if(_nchannels==1){ - int i; - _nsamples=op_float2short_filter(_of,dst,_dst_sz>>1,_src,_nsamples,1); - for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i]; - } - else{ - if(_nchannels>2){ - _nsamples=OP_MIN(_nsamples,_dst_sz>>1); - _nsamples=op_stereo_filter(_of,_src,_nsamples*2, - _src,_nsamples,_nchannels); - } - _nsamples=op_float2short_filter(_of,dst,_dst_sz,_src,_nsamples,2); - } - return _nsamples; -} - -int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){ - return op_filter_read_native(_of,_pcm,_buf_size, - op_float2short_stereo_filter,NULL); -} - -int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){ - _of->state_channel_count=0; - return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL); -} - -#endif diff --git a/thirdparty/opus/repacketizer.c b/thirdparty/opus/repacketizer.c deleted file mode 100644 index c80ee7f001..0000000000 --- a/thirdparty/opus/repacketizer.c +++ /dev/null @@ -1,348 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus.h" -#include "opus_private.h" -#include "os_support.h" - - -int opus_repacketizer_get_size(void) -{ - return sizeof(OpusRepacketizer); -} - -OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) -{ - rp->nb_frames = 0; - return rp; -} - -OpusRepacketizer *opus_repacketizer_create(void) -{ - OpusRepacketizer *rp; - rp=(OpusRepacketizer *)opus_alloc(opus_repacketizer_get_size()); - if(rp==NULL)return NULL; - return opus_repacketizer_init(rp); -} - -void opus_repacketizer_destroy(OpusRepacketizer *rp) -{ - opus_free(rp); -} - -static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len, int self_delimited) -{ - unsigned char tmp_toc; - int curr_nb_frames,ret; - /* Set of check ToC */ - if (len<1) return OPUS_INVALID_PACKET; - if (rp->nb_frames == 0) - { - rp->toc = data[0]; - rp->framesize = opus_packet_get_samples_per_frame(data, 8000); - } else if ((rp->toc&0xFC) != (data[0]&0xFC)) - { - /*fprintf(stderr, "toc mismatch: 0x%x vs 0x%x\n", rp->toc, data[0]);*/ - return OPUS_INVALID_PACKET; - } - curr_nb_frames = opus_packet_get_nb_frames(data, len); - if(curr_nb_frames<1) return OPUS_INVALID_PACKET; - - /* Check the 120 ms maximum packet size */ - if ((curr_nb_frames+rp->nb_frames)*rp->framesize > 960) - { - return OPUS_INVALID_PACKET; - } - - ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL); - if(ret<1)return ret; - - rp->nb_frames += curr_nb_frames; - return OPUS_OK; -} - -int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) -{ - return opus_repacketizer_cat_impl(rp, data, len, 0); -} - -int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) -{ - return rp->nb_frames; -} - -opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, - unsigned char *data, opus_int32 maxlen, int self_delimited, int pad) -{ - int i, count; - opus_int32 tot_size; - opus_int16 *len; - const unsigned char **frames; - unsigned char * ptr; - - if (begin<0 || begin>=end || end>rp->nb_frames) - { - /*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/ - return OPUS_BAD_ARG; - } - count = end-begin; - - len = rp->len+begin; - frames = rp->frames+begin; - if (self_delimited) - tot_size = 1 + (len[count-1]>=252); - else - tot_size = 0; - - ptr = data; - if (count==1) - { - /* Code 0 */ - tot_size += len[0]+1; - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = rp->toc&0xFC; - } else if (count==2) - { - if (len[1] == len[0]) - { - /* Code 1 */ - tot_size += 2*len[0]+1; - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x1; - } else { - /* Code 2 */ - tot_size += len[0]+len[1]+2+(len[0]>=252); - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x2; - ptr += encode_size(len[0], ptr); - } - } - if (count > 2 || (pad && tot_size < maxlen)) - { - /* Code 3 */ - int vbr; - int pad_amount=0; - - /* Restart the process for the padding case */ - ptr = data; - if (self_delimited) - tot_size = 1 + (len[count-1]>=252); - else - tot_size = 0; - vbr = 0; - for (i=1;i<count;i++) - { - if (len[i] != len[0]) - { - vbr=1; - break; - } - } - if (vbr) - { - tot_size += 2; - for (i=0;i<count-1;i++) - tot_size += 1 + (len[i]>=252) + len[i]; - tot_size += len[count-1]; - - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x3; - *ptr++ = count | 0x80; - } else { - tot_size += count*len[0]+2; - if (tot_size > maxlen) - return OPUS_BUFFER_TOO_SMALL; - *ptr++ = (rp->toc&0xFC) | 0x3; - *ptr++ = count; - } - pad_amount = pad ? (maxlen-tot_size) : 0; - if (pad_amount != 0) - { - int nb_255s; - data[1] |= 0x40; - nb_255s = (pad_amount-1)/255; - for (i=0;i<nb_255s;i++) - *ptr++ = 255; - *ptr++ = pad_amount-255*nb_255s-1; - tot_size += pad_amount; - } - if (vbr) - { - for (i=0;i<count-1;i++) - ptr += encode_size(len[i], ptr); - } - } - if (self_delimited) { - int sdlen = encode_size(len[count-1], ptr); - ptr += sdlen; - } - /* Copy the actual data */ - for (i=0;i<count;i++) - { - /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place - padding from opus_packet_pad or opus_packet_unpad(). */ - celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]); - OPUS_MOVE(ptr, frames[i], len[i]); - ptr += len[i]; - } - if (pad) - { - /* Fill padding with zeros. */ - while (ptr<data+maxlen) - *ptr++=0; - } - return tot_size; -} - -opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) -{ - return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0); -} - -opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) -{ - return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0); -} - -int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len) -{ - OpusRepacketizer rp; - opus_int32 ret; - if (len < 1) - return OPUS_BAD_ARG; - if (len==new_len) - return OPUS_OK; - else if (len > new_len) - return OPUS_BAD_ARG; - opus_repacketizer_init(&rp); - /* Moving payload to the end of the packet so we can do in-place padding */ - OPUS_MOVE(data+new_len-len, data, len); - ret = opus_repacketizer_cat(&rp, data+new_len-len, len); - if (ret != OPUS_OK) - return ret; - ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1); - if (ret > 0) - return OPUS_OK; - else - return ret; -} - -opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len) -{ - OpusRepacketizer rp; - opus_int32 ret; - if (len < 1) - return OPUS_BAD_ARG; - opus_repacketizer_init(&rp); - ret = opus_repacketizer_cat(&rp, data, len); - if (ret < 0) - return ret; - ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0); - celt_assert(ret > 0 && ret <= len); - return ret; -} - -int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams) -{ - int s; - int count; - unsigned char toc; - opus_int16 size[48]; - opus_int32 packet_offset; - opus_int32 amount; - - if (len < 1) - return OPUS_BAD_ARG; - if (len==new_len) - return OPUS_OK; - else if (len > new_len) - return OPUS_BAD_ARG; - amount = new_len - len; - /* Seek to last stream */ - for (s=0;s<nb_streams-1;s++) - { - if (len<=0) - return OPUS_INVALID_PACKET; - count = opus_packet_parse_impl(data, len, 1, &toc, NULL, - size, NULL, &packet_offset); - if (count<0) - return count; - data += packet_offset; - len -= packet_offset; - } - return opus_packet_pad(data, len, len+amount); -} - -opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams) -{ - int s; - unsigned char toc; - opus_int16 size[48]; - opus_int32 packet_offset; - OpusRepacketizer rp; - unsigned char *dst; - opus_int32 dst_len; - - if (len < 1) - return OPUS_BAD_ARG; - dst = data; - dst_len = 0; - /* Unpad all frames */ - for (s=0;s<nb_streams;s++) - { - opus_int32 ret; - int self_delimited = s!=nb_streams-1; - if (len<=0) - return OPUS_INVALID_PACKET; - opus_repacketizer_init(&rp); - ret = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL, - size, NULL, &packet_offset); - if (ret<0) - return ret; - ret = opus_repacketizer_cat_impl(&rp, data, packet_offset, self_delimited); - if (ret < 0) - return ret; - ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0); - if (ret < 0) - return ret; - else - dst_len += ret; - dst += ret; - data += packet_offset; - len -= packet_offset; - } - return dst_len; -} - diff --git a/thirdparty/opus/repacketizer_demo.c b/thirdparty/opus/repacketizer_demo.c deleted file mode 100644 index dc05c1b359..0000000000 --- a/thirdparty/opus/repacketizer_demo.c +++ /dev/null @@ -1,217 +0,0 @@ -/* Copyright (c) 2011 Xiph.Org Foundation - Written by Jean-Marc Valin */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define MAX_PACKETOUT 32000 - -void usage(char *argv0) -{ - fprintf(stderr, "usage: %s [options] input_file output_file\n", argv0); -} - -static void int_to_char(opus_uint32 i, unsigned char ch[4]) -{ - ch[0] = i>>24; - ch[1] = (i>>16)&0xFF; - ch[2] = (i>>8)&0xFF; - ch[3] = i&0xFF; -} - -static opus_uint32 char_to_int(unsigned char ch[4]) -{ - return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16) - | ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3]; -} - -int main(int argc, char *argv[]) -{ - int i, eof=0; - FILE *fin, *fout; - unsigned char packets[48][1500]; - int len[48]; - int rng[48]; - OpusRepacketizer *rp; - unsigned char output_packet[MAX_PACKETOUT]; - int merge = 1, split=0; - - if (argc < 3) - { - usage(argv[0]); - return EXIT_FAILURE; - } - for (i=1;i<argc-2;i++) - { - if (strcmp(argv[i], "-merge")==0) - { - merge = atoi(argv[i+1]); - if(merge<1) - { - fprintf(stderr, "-merge parameter must be at least 1.\n"); - return EXIT_FAILURE; - } - if(merge>48) - { - fprintf(stderr, "-merge parameter must be less than 48.\n"); - return EXIT_FAILURE; - } - i++; - } else if (strcmp(argv[i], "-split")==0) - split = 1; - else - { - fprintf(stderr, "Unknown option: %s\n", argv[i]); - usage(argv[0]); - return EXIT_FAILURE; - } - } - fin = fopen(argv[argc-2], "r"); - if(fin==NULL) - { - fprintf(stderr, "Error opening input file: %s\n", argv[argc-2]); - return EXIT_FAILURE; - } - fout = fopen(argv[argc-1], "w"); - if(fout==NULL) - { - fprintf(stderr, "Error opening output file: %s\n", argv[argc-1]); - fclose(fin); - return EXIT_FAILURE; - } - - rp = opus_repacketizer_create(); - while (!eof) - { - int err; - int nb_packets=merge; - opus_repacketizer_init(rp); - for (i=0;i<nb_packets;i++) - { - unsigned char ch[4]; - err = fread(ch, 1, 4, fin); - len[i] = char_to_int(ch); - /*fprintf(stderr, "in len = %d\n", len[i]);*/ - if (len[i]>1500 || len[i]<0) - { - if (feof(fin)) - { - eof = 1; - } else { - fprintf(stderr, "Invalid payload length\n"); - fclose(fin); - fclose(fout); - return EXIT_FAILURE; - } - break; - } - err = fread(ch, 1, 4, fin); - rng[i] = char_to_int(ch); - err = fread(packets[i], 1, len[i], fin); - if (feof(fin)) - { - eof = 1; - break; - } - err = opus_repacketizer_cat(rp, packets[i], len[i]); - if (err!=OPUS_OK) - { - fprintf(stderr, "opus_repacketizer_cat() failed: %s\n", opus_strerror(err)); - break; - } - } - nb_packets = i; - - if (eof) - break; - - if (!split) - { - err = opus_repacketizer_out(rp, output_packet, MAX_PACKETOUT); - if (err>0) { - unsigned char int_field[4]; - int_to_char(err, int_field); - if(fwrite(int_field, 1, 4, fout)!=4){ - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - int_to_char(rng[nb_packets-1], int_field); - if (fwrite(int_field, 1, 4, fout)!=4) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - /*fprintf(stderr, "out len = %d\n", err);*/ - } else { - fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err)); - } - } else { - int nb_frames = opus_repacketizer_get_nb_frames(rp); - for (i=0;i<nb_frames;i++) - { - err = opus_repacketizer_out_range(rp, i, i+1, output_packet, MAX_PACKETOUT); - if (err>0) { - unsigned char int_field[4]; - int_to_char(err, int_field); - if (fwrite(int_field, 1, 4, fout)!=4) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - if (i==nb_frames-1) - int_to_char(rng[nb_packets-1], int_field); - else - int_to_char(0, int_field); - if (fwrite(int_field, 1, 4, fout)!=4) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) { - fprintf(stderr, "Error writing.\n"); - return EXIT_FAILURE; - } - /*fprintf(stderr, "out len = %d\n", err);*/ - } else { - fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err)); - } - - } - } - } - - fclose(fin); - fclose(fout); - return EXIT_SUCCESS; -} diff --git a/thirdparty/opus/silk/A2NLSF.c b/thirdparty/opus/silk/A2NLSF.c deleted file mode 100644 index b6e9e5ffcc..0000000000 --- a/thirdparty/opus/silk/A2NLSF.c +++ /dev/null @@ -1,267 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -/* Conversion between prediction filter coefficients and NLSFs */ -/* Requires the order to be an even number */ -/* A piecewise linear approximation maps LSF <-> cos(LSF) */ -/* Therefore the result is not accurate NLSFs, but the two */ -/* functions are accurate inverses of each other */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "tables.h" - -/* Number of binary divisions, when not in low complexity mode */ -#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */ -#define MAX_ITERATIONS_A2NLSF_FIX 30 - -/* Helper function for A2NLSF(..) */ -/* Transforms polynomials from cos(n*f) to cos(f)^n */ -static OPUS_INLINE void silk_A2NLSF_trans_poly( - opus_int32 *p, /* I/O Polynomial */ - const opus_int dd /* I Polynomial order (= filter order / 2 ) */ -) -{ - opus_int k, n; - - for( k = 2; k <= dd; k++ ) { - for( n = dd; n > k; n-- ) { - p[ n - 2 ] -= p[ n ]; - } - p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 ); - } -} -/* Helper function for A2NLSF(..) */ -/* Polynomial evaluation */ -static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ - opus_int32 *p, /* I Polynomial, Q16 */ - const opus_int32 x, /* I Evaluation point, Q12 */ - const opus_int dd /* I Order */ -) -{ - opus_int n; - opus_int32 x_Q16, y32; - - y32 = p[ dd ]; /* Q16 */ - x_Q16 = silk_LSHIFT( x, 4 ); - - if ( opus_likely( 8 == dd ) ) - { - y32 = silk_SMLAWW( p[ 7 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 6 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 5 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 4 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 3 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 2 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 1 ], y32, x_Q16 ); - y32 = silk_SMLAWW( p[ 0 ], y32, x_Q16 ); - } - else - { - for( n = dd - 1; n >= 0; n-- ) { - y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ - } - } - return y32; -} - -static OPUS_INLINE void silk_A2NLSF_init( - const opus_int32 *a_Q16, - opus_int32 *P, - opus_int32 *Q, - const opus_int dd -) -{ - opus_int k; - - /* Convert filter coefs to even and odd polynomials */ - P[dd] = silk_LSHIFT( 1, 16 ); - Q[dd] = silk_LSHIFT( 1, 16 ); - for( k = 0; k < dd; k++ ) { - P[ k ] = -a_Q16[ dd - k - 1 ] - a_Q16[ dd + k ]; /* Q16 */ - Q[ k ] = -a_Q16[ dd - k - 1 ] + a_Q16[ dd + k ]; /* Q16 */ - } - - /* Divide out zeros as we have that for even filter orders, */ - /* z = 1 is always a root in Q, and */ - /* z = -1 is always a root in P */ - for( k = dd; k > 0; k-- ) { - P[ k - 1 ] -= P[ k ]; - Q[ k - 1 ] += Q[ k ]; - } - - /* Transform polynomials from cos(n*f) to cos(f)^n */ - silk_A2NLSF_trans_poly( P, dd ); - silk_A2NLSF_trans_poly( Q, dd ); -} - -/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ -/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ -void silk_A2NLSF( - opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ - opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ - const opus_int d /* I Filter order (must be even) */ -) -{ - opus_int i, k, m, dd, root_ix, ffrac; - opus_int32 xlo, xhi, xmid; - opus_int32 ylo, yhi, ymid, thr; - opus_int32 nom, den; - opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ]; - opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; - opus_int32 *PQ[ 2 ]; - opus_int32 *p; - - /* Store pointers to array */ - PQ[ 0 ] = P; - PQ[ 1 ] = Q; - - dd = silk_RSHIFT( d, 1 ); - - silk_A2NLSF_init( a_Q16, P, Q, dd ); - - /* Find roots, alternating between P and Q */ - p = P; /* Pointer to polynomial */ - - xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - - if( ylo < 0 ) { - /* Set the first NLSF to zero and move on to the next */ - NLSF[ 0 ] = 0; - p = Q; /* Pointer to polynomial */ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - root_ix = 1; /* Index of current root */ - } else { - root_ix = 0; /* Index of current root */ - } - k = 1; /* Loop counter */ - i = 0; /* Counter for bandwidth expansions applied */ - thr = 0; - while( 1 ) { - /* Evaluate polynomial */ - xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */ - yhi = silk_A2NLSF_eval_poly( p, xhi, dd ); - - /* Detect zero crossing */ - if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) { - if( yhi == 0 ) { - /* If the root lies exactly at the end of the current */ - /* interval, look for the next root in the next interval */ - thr = 1; - } else { - thr = 0; - } - /* Binary division */ - ffrac = -256; - for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) { - /* Evaluate polynomial */ - xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 ); - ymid = silk_A2NLSF_eval_poly( p, xmid, dd ); - - /* Detect zero crossing */ - if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) { - /* Reduce frequency */ - xhi = xmid; - yhi = ymid; - } else { - /* Increase frequency */ - xlo = xmid; - ylo = ymid; - ffrac = silk_ADD_RSHIFT( ffrac, 128, m ); - } - } - - /* Interpolate */ - if( silk_abs( ylo ) < 65536 ) { - /* Avoid dividing by zero */ - den = ylo - yhi; - nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 ); - if( den != 0 ) { - ffrac += silk_DIV32( nom, den ); - } - } else { - /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */ - ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) ); - } - NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX ); - - silk_assert( NLSF[ root_ix ] >= 0 ); - - root_ix++; /* Next root */ - if( root_ix >= d ) { - /* Found all roots */ - break; - } - /* Alternate pointer to polynomial */ - p = PQ[ root_ix & 1 ]; - - /* Evaluate polynomial */ - xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/ - ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 ); - } else { - /* Increment loop counter */ - k++; - xlo = xhi; - ylo = yhi; - thr = 0; - - if( k > LSF_COS_TAB_SZ_FIX ) { - i++; - if( i > MAX_ITERATIONS_A2NLSF_FIX ) { - /* Set NLSFs to white spectrum and exit */ - NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 ); - for( k = 1; k < d; k++ ) { - NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] ); - } - return; - } - - /* Error: Apply progressively more bandwidth expansion and run again */ - silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/ - - silk_A2NLSF_init( a_Q16, P, Q, dd ); - p = P; /* Pointer to polynomial */ - xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - if( ylo < 0 ) { - /* Set the first NLSF to zero and move on to the next */ - NLSF[ 0 ] = 0; - p = Q; /* Pointer to polynomial */ - ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); - root_ix = 1; /* Index of current root */ - } else { - root_ix = 0; /* Index of current root */ - } - k = 1; /* Reset loop counter */ - } - } - } -} diff --git a/thirdparty/opus/silk/API.h b/thirdparty/opus/silk/API.h deleted file mode 100644 index 0131acbb08..0000000000 --- a/thirdparty/opus/silk/API.h +++ /dev/null @@ -1,134 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_API_H -#define SILK_API_H - -#include "control.h" -#include "typedef.h" -#include "errors.h" -#include "entenc.h" -#include "entdec.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define SILK_MAX_FRAMES_PER_PACKET 3 - -/* Struct for TOC (Table of Contents) */ -typedef struct { - opus_int VADFlag; /* Voice activity for packet */ - opus_int VADFlags[ SILK_MAX_FRAMES_PER_PACKET ]; /* Voice activity for each frame in packet */ - opus_int inbandFECFlag; /* Flag indicating if packet contains in-band FEC */ -} silk_TOC_struct; - -/****************************************/ -/* Encoder functions */ -/****************************************/ - -/***********************************************/ -/* Get size in bytes of the Silk encoder state */ -/***********************************************/ -opus_int silk_Get_Encoder_Size( /* O Returns error code */ - opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ -); - -/*************************/ -/* Init or reset encoder */ -/*************************/ -opus_int silk_InitEncoder( /* O Returns error code */ - void *encState, /* I/O State */ - int arch, /* I Run-time architecture */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -); - -/**************************/ -/* Encode frame with Silk */ -/**************************/ -/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ -/* encControl->payloadSize_ms is set to */ -opus_int silk_Encode( /* O Returns error code */ - void *encState, /* I/O State */ - silk_EncControlStruct *encControl, /* I Control status */ - const opus_int16 *samplesIn, /* I Speech sample input vector */ - opus_int nSamplesIn, /* I Number of samples in input vector */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ - const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ -); - -/****************************************/ -/* Decoder functions */ -/****************************************/ - -/***********************************************/ -/* Get size in bytes of the Silk decoder state */ -/***********************************************/ -opus_int silk_Get_Decoder_Size( /* O Returns error code */ - opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ -); - -/*************************/ -/* Init or Reset decoder */ -/*************************/ -opus_int silk_InitDecoder( /* O Returns error code */ - void *decState /* I/O State */ -); - -/******************/ -/* Decode a frame */ -/******************/ -opus_int silk_Decode( /* O Returns error code */ - void* decState, /* I/O State */ - silk_DecControlStruct* decControl, /* I/O Control Structure */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 *samplesOut, /* O Decoded output speech vector */ - opus_int32 *nSamplesOut, /* O Number of samples decoded */ - int arch /* I Run-time architecture */ -); - -#if 0 -/**************************************/ -/* Get table of contents for a packet */ -/**************************************/ -opus_int silk_get_TOC( - const opus_uint8 *payload, /* I Payload data */ - const opus_int nBytesIn, /* I Number of input bytes */ - const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ - silk_TOC_struct *Silk_TOC /* O Type of content */ -); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/CNG.c b/thirdparty/opus/silk/CNG.c deleted file mode 100644 index 8443ad63bb..0000000000 --- a/thirdparty/opus/silk/CNG.c +++ /dev/null @@ -1,184 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/* Generates excitation for CNG LPC synthesis */ -static OPUS_INLINE void silk_CNG_exc( - opus_int32 exc_Q14[], /* O CNG excitation signal Q10 */ - opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ - opus_int length, /* I Length */ - opus_int32 *rand_seed /* I/O Seed to random index generator */ -) -{ - opus_int32 seed; - opus_int i, idx, exc_mask; - - exc_mask = CNG_BUF_MASK_MAX; - while( exc_mask > length ) { - exc_mask = silk_RSHIFT( exc_mask, 1 ); - } - - seed = *rand_seed; - for( i = 0; i < length; i++ ) { - seed = silk_RAND( seed ); - idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); - silk_assert( idx >= 0 ); - silk_assert( idx <= CNG_BUF_MASK_MAX ); - exc_Q14[ i ] = exc_buf_Q14[ idx ]; - } - *rand_seed = seed; -} - -void silk_CNG_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -) -{ - opus_int i, NLSF_step_Q15, NLSF_acc_Q15; - - NLSF_step_Q15 = silk_DIV32_16( silk_int16_MAX, psDec->LPC_order + 1 ); - NLSF_acc_Q15 = 0; - for( i = 0; i < psDec->LPC_order; i++ ) { - NLSF_acc_Q15 += NLSF_step_Q15; - psDec->sCNG.CNG_smth_NLSF_Q15[ i ] = NLSF_acc_Q15; - } - psDec->sCNG.CNG_smth_Gain_Q16 = 0; - psDec->sCNG.rand_seed = 3176576; -} - -/* Updates CNG estimate, and applies the CNG when packet was lost */ -void silk_CNG( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O Signal */ - opus_int length /* I Length of residual */ -) -{ - opus_int i, subfr; - opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10; - opus_int16 A_Q12[ MAX_LPC_ORDER ]; - silk_CNG_struct *psCNG = &psDec->sCNG; - SAVE_STACK; - - if( psDec->fs_kHz != psCNG->fs_kHz ) { - /* Reset state */ - silk_CNG_Reset( psDec ); - - psCNG->fs_kHz = psDec->fs_kHz; - } - if( psDec->lossCnt == 0 && psDec->prevSignalType == TYPE_NO_VOICE_ACTIVITY ) { - /* Update CNG parameters */ - - /* Smoothing of LSF's */ - for( i = 0; i < psDec->LPC_order; i++ ) { - psCNG->CNG_smth_NLSF_Q15[ i ] += silk_SMULWB( (opus_int32)psDec->prevNLSF_Q15[ i ] - (opus_int32)psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 ); - } - /* Find the subframe with the highest gain */ - max_Gain_Q16 = 0; - subfr = 0; - for( i = 0; i < psDec->nb_subfr; i++ ) { - if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) { - max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ]; - subfr = i; - } - } - /* Update CNG excitation buffer with excitation from this subframe */ - silk_memmove( &psCNG->CNG_exc_buf_Q14[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q14, ( psDec->nb_subfr - 1 ) * psDec->subfr_length * sizeof( opus_int32 ) ); - silk_memcpy( psCNG->CNG_exc_buf_Q14, &psDec->exc_Q14[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( opus_int32 ) ); - - /* Smooth gains */ - for( i = 0; i < psDec->nb_subfr; i++ ) { - psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 ); - } - } - - /* Add CNG when packet is lost or during DTX */ - if( psDec->lossCnt ) { - VARDECL( opus_int32, CNG_sig_Q14 ); - ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 ); - - /* Generate CNG excitation */ - gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); - if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) { - gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 ); - gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); - gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 ); - } else { - gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 ); - gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); - gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); - } - gain_Q10 = silk_RSHIFT( gain_Q16, 6 ); - - silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed ); - - /* Convert CNG NLSF to filter representation */ - silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); - - /* Generate CNG signal, by synthesis filtering */ - silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - for( i = 0; i < length; i++ ) { - silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); - if( psDec->LPC_order == 16 ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] ); - } - - /* Update states */ - CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); - - /* Scale with Gain and add to input signal */ - frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) ); - - } - silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); - } else { - silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) ); - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/HP_variable_cutoff.c b/thirdparty/opus/silk/HP_variable_cutoff.c deleted file mode 100644 index bbe10f04ce..0000000000 --- a/thirdparty/opus/silk/HP_variable_cutoff.c +++ /dev/null @@ -1,77 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#ifdef FIXED_POINT -#include "main_FIX.h" -#else -#include "main_FLP.h" -#endif -#include "tuning_parameters.h" - -/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ -void silk_HP_variable_cutoff( - silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ -) -{ - opus_int quality_Q15; - opus_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7; - silk_encoder_state *psEncC1 = &state_Fxx[ 0 ].sCmn; - - /* Adaptive cutoff frequency: estimate low end of pitch frequency range */ - if( psEncC1->prevSignalType == TYPE_VOICED ) { - /* difference, in log domain */ - pitch_freq_Hz_Q16 = silk_DIV32_16( silk_LSHIFT( silk_MUL( psEncC1->fs_kHz, 1000 ), 16 ), psEncC1->prevLag ); - pitch_freq_log_Q7 = silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 ); - - /* adjustment based on quality */ - quality_Q15 = psEncC1->input_quality_bands_Q15[ 0 ]; - pitch_freq_log_Q7 = silk_SMLAWB( pitch_freq_log_Q7, silk_SMULWB( silk_LSHIFT( -quality_Q15, 2 ), quality_Q15 ), - pitch_freq_log_Q7 - ( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ) ) ); - - /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */ - delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT( psEncC1->variable_HP_smth1_Q15, 8 ); - if( delta_freq_Q7 < 0 ) { - /* less smoothing for decreasing pitch frequency, to track something close to the minimum */ - delta_freq_Q7 = silk_MUL( delta_freq_Q7, 3 ); - } - - /* limit delta, to reduce impact of outliers in pitch estimation */ - delta_freq_Q7 = silk_LIMIT_32( delta_freq_Q7, -SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) ); - - /* update smoother */ - psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB( psEncC1->variable_HP_smth1_Q15, - silk_SMULBB( psEncC1->speech_activity_Q8, delta_freq_Q7 ), SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) ); - - /* limit frequency range */ - psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32( psEncC1->variable_HP_smth1_Q15, - silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ), - silk_LSHIFT( silk_lin2log( VARIABLE_HP_MAX_CUTOFF_HZ ), 8 ) ); - } -} diff --git a/thirdparty/opus/silk/Inlines.h b/thirdparty/opus/silk/Inlines.h deleted file mode 100644 index ec986cdfdd..0000000000 --- a/thirdparty/opus/silk/Inlines.h +++ /dev/null @@ -1,188 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -/*! \file silk_Inlines.h - * \brief silk_Inlines.h defines OPUS_INLINE signal processing functions. - */ - -#ifndef SILK_FIX_INLINES_H -#define SILK_FIX_INLINES_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* count leading zeros of opus_int64 */ -static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in ) -{ - opus_int32 in_upper; - - in_upper = (opus_int32)silk_RSHIFT64(in, 32); - if (in_upper == 0) { - /* Search in the lower 32 bits */ - return 32 + silk_CLZ32( (opus_int32) in ); - } else { - /* Search in the upper 32 bits */ - return silk_CLZ32( in_upper ); - } -} - -/* get number of leading zeros and fractional part (the bits right after the leading one */ -static OPUS_INLINE void silk_CLZ_FRAC( - opus_int32 in, /* I input */ - opus_int32 *lz, /* O number of leading zeros */ - opus_int32 *frac_Q7 /* O the 7 bits right after the leading one */ -) -{ - opus_int32 lzeros = silk_CLZ32(in); - - * lz = lzeros; - * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f; -} - -/* Approximation of square root */ -/* Accuracy: < +/- 10% for output values > 15 */ -/* < +/- 2.5% for output values > 120 */ -static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x ) -{ - opus_int32 y, lz, frac_Q7; - - if( x <= 0 ) { - return 0; - } - - silk_CLZ_FRAC(x, &lz, &frac_Q7); - - if( lz & 1 ) { - y = 32768; - } else { - y = 46214; /* 46214 = sqrt(2) * 32768 */ - } - - /* get scaling right */ - y >>= silk_RSHIFT(lz, 1); - - /* increment using fractional part of input */ - y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7)); - - return y; -} - -/* Divide two int32 values and return result as int32 in a given Q-domain */ -static OPUS_INLINE opus_int32 silk_DIV32_varQ( /* O returns a good approximation of "(a32 << Qres) / b32" */ - const opus_int32 a32, /* I numerator (Q0) */ - const opus_int32 b32, /* I denominator (Q0) */ - const opus_int Qres /* I Q-domain of result (>= 0) */ -) -{ - opus_int a_headrm, b_headrm, lshift; - opus_int32 b32_inv, a32_nrm, b32_nrm, result; - - silk_assert( b32 != 0 ); - silk_assert( Qres >= 0 ); - - /* Compute number of bits head room and normalize inputs */ - a_headrm = silk_CLZ32( silk_abs(a32) ) - 1; - a32_nrm = silk_LSHIFT(a32, a_headrm); /* Q: a_headrm */ - b_headrm = silk_CLZ32( silk_abs(b32) ) - 1; - b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */ - - /* Inverse of b32, with 14 bits of precision */ - b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */ - - /* First approximation */ - result = silk_SMULWB(a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */ - - /* Compute residual by subtracting product of denominator and first approximation */ - /* It's OK to overflow because the final value of a32_nrm should always be small */ - a32_nrm = silk_SUB32_ovflw(a32_nrm, silk_LSHIFT_ovflw( silk_SMMUL(b32_nrm, result), 3 )); /* Q: a_headrm */ - - /* Refinement */ - result = silk_SMLAWB(result, a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */ - - /* Convert to Qres domain */ - lshift = 29 + a_headrm - b_headrm - Qres; - if( lshift < 0 ) { - return silk_LSHIFT_SAT32(result, -lshift); - } else { - if( lshift < 32){ - return silk_RSHIFT(result, lshift); - } else { - /* Avoid undefined result */ - return 0; - } - } -} - -/* Invert int32 value and return result as int32 in a given Q-domain */ -static OPUS_INLINE opus_int32 silk_INVERSE32_varQ( /* O returns a good approximation of "(1 << Qres) / b32" */ - const opus_int32 b32, /* I denominator (Q0) */ - const opus_int Qres /* I Q-domain of result (> 0) */ -) -{ - opus_int b_headrm, lshift; - opus_int32 b32_inv, b32_nrm, err_Q32, result; - - silk_assert( b32 != 0 ); - silk_assert( Qres > 0 ); - - /* Compute number of bits head room and normalize input */ - b_headrm = silk_CLZ32( silk_abs(b32) ) - 1; - b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */ - - /* Inverse of b32, with 14 bits of precision */ - b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */ - - /* First approximation */ - result = silk_LSHIFT(b32_inv, 16); /* Q: 61 - b_headrm */ - - /* Compute residual by subtracting product of denominator and first approximation from one */ - err_Q32 = silk_LSHIFT( ((opus_int32)1<<29) - silk_SMULWB(b32_nrm, b32_inv), 3 ); /* Q32 */ - - /* Refinement */ - result = silk_SMLAWW(result, err_Q32, b32_inv); /* Q: 61 - b_headrm */ - - /* Convert to Qres domain */ - lshift = 61 - b_headrm - Qres; - if( lshift <= 0 ) { - return silk_LSHIFT_SAT32(result, -lshift); - } else { - if( lshift < 32){ - return silk_RSHIFT(result, lshift); - }else{ - /* Avoid undefined result */ - return 0; - } - } -} - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_FIX_INLINES_H */ diff --git a/thirdparty/opus/silk/LPC_analysis_filter.c b/thirdparty/opus/silk/LPC_analysis_filter.c deleted file mode 100644 index 20906673ff..0000000000 --- a/thirdparty/opus/silk/LPC_analysis_filter.c +++ /dev/null @@ -1,108 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "celt_lpc.h" - -/*******************************************/ -/* LPC analysis filter */ -/* NB! State is kept internally and the */ -/* filter always starts with zero state */ -/* first d output samples are set to zero */ -/*******************************************/ - -void silk_LPC_analysis_filter( - opus_int16 *out, /* O Output signal */ - const opus_int16 *in, /* I Input signal */ - const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ - const opus_int32 len, /* I Signal length */ - const opus_int32 d, /* I Filter order */ - int arch /* I Run-time architecture */ -) -{ - opus_int j; -#ifdef FIXED_POINT - opus_int16 mem[SILK_MAX_ORDER_LPC]; - opus_int16 num[SILK_MAX_ORDER_LPC]; -#else - int ix; - opus_int32 out32_Q12, out32; - const opus_int16 *in_ptr; -#endif - - silk_assert( d >= 6 ); - silk_assert( (d & 1) == 0 ); - silk_assert( d <= len ); - -#ifdef FIXED_POINT - silk_assert( d <= SILK_MAX_ORDER_LPC ); - for ( j = 0; j < d; j++ ) { - num[ j ] = -B[ j ]; - } - for (j=0;j<d;j++) { - mem[ j ] = in[ d - j - 1 ]; - } - celt_fir( in + d, num, out + d, len - d, d, mem, arch ); - for ( j = 0; j < d; j++ ) { - out[ j ] = 0; - } -#else - (void)arch; - for( ix = d; ix < len; ix++ ) { - in_ptr = &in[ ix - 1 ]; - - out32_Q12 = silk_SMULBB( in_ptr[ 0 ], B[ 0 ] ); - /* Allowing wrap around so that two wraps can cancel each other. The rare - cases where the result wraps around can only be triggered by invalid streams*/ - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -1 ], B[ 1 ] ); - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -2 ], B[ 2 ] ); - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -3 ], B[ 3 ] ); - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -4 ], B[ 4 ] ); - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -5 ], B[ 5 ] ); - for( j = 6; j < d; j += 2 ) { - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j ], B[ j ] ); - out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j - 1 ], B[ j + 1 ] ); - } - - /* Subtract prediction */ - out32_Q12 = silk_SUB32_ovflw( silk_LSHIFT( (opus_int32)in_ptr[ 1 ], 12 ), out32_Q12 ); - - /* Scale to Q0 */ - out32 = silk_RSHIFT_ROUND( out32_Q12, 12 ); - - /* Saturate output */ - out[ ix ] = (opus_int16)silk_SAT16( out32 ); - } - - /* Set first d output samples to zero */ - silk_memset( out, 0, d * sizeof( opus_int16 ) ); -#endif -} diff --git a/thirdparty/opus/silk/LPC_inv_pred_gain.c b/thirdparty/opus/silk/LPC_inv_pred_gain.c deleted file mode 100644 index 4af89aa5fa..0000000000 --- a/thirdparty/opus/silk/LPC_inv_pred_gain.c +++ /dev/null @@ -1,154 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -#define QA 24 -#define A_LIMIT SILK_FIX_CONST( 0.99975, QA ) - -#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q))) - -/* Compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */ - opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */ - const opus_int order /* I Prediction order */ -) -{ - opus_int k, n, mult2Q; - opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA; - opus_int32 *Aold_QA, *Anew_QA; - - Anew_QA = A_QA[ order & 1 ]; - - invGain_Q30 = (opus_int32)1 << 30; - for( k = order - 1; k > 0; k-- ) { - /* Check for stability */ - if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) { - return 0; - } - - /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA ); - - /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */ - silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); - - /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ - mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); - rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); - - /* Update inverse gain */ - /* invGain_Q30 range: [ 0 : 2^30 ] */ - invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= ( 1 << 30 ) ); - - /* Swap pointers */ - Aold_QA = Anew_QA; - Anew_QA = A_QA[ k & 1 ]; - - /* Update AR coefficient */ - for( n = 0; n < k; n++ ) { - tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 ); - Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q ); - } - } - - /* Check for stability */ - if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) { - return 0; - } - - /* Set RC equal to negated AR coef */ - rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA ); - - /* Range: [ 1 : 2^30 ] */ - rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - - /* Update inverse gain */ - /* Range: [ 0 : 2^30 ] */ - invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); - silk_assert( invGain_Q30 >= 0 ); - silk_assert( invGain_Q30 <= 1<<30 ); - - return invGain_Q30; -} - -/* For input in Q12 domain */ -opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -) -{ - opus_int k; - opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; - opus_int32 *Anew_QA; - opus_int32 DC_resp = 0; - - Anew_QA = Atmp_QA[ order & 1 ]; - - /* Increase Q domain of the AR coefficients */ - for( k = 0; k < order; k++ ) { - DC_resp += (opus_int32)A_Q12[ k ]; - Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 ); - } - /* If the DC is unstable, we don't even need to do the full calculations */ - if( DC_resp >= 4096 ) { - return 0; - } - return LPC_inverse_pred_gain_QA( Atmp_QA, order ); -} - -#ifdef FIXED_POINT - -/* For input in Q24 domain */ -opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ - const opus_int order /* I Prediction order */ -) -{ - opus_int k; - opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; - opus_int32 *Anew_QA; - - Anew_QA = Atmp_QA[ order & 1 ]; - - /* Increase Q domain of the AR coefficients */ - for( k = 0; k < order; k++ ) { - Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA ); - } - - return LPC_inverse_pred_gain_QA( Atmp_QA, order ); -} -#endif diff --git a/thirdparty/opus/silk/LP_variable_cutoff.c b/thirdparty/opus/silk/LP_variable_cutoff.c deleted file mode 100644 index f639e1f899..0000000000 --- a/thirdparty/opus/silk/LP_variable_cutoff.c +++ /dev/null @@ -1,135 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* - Elliptic/Cauer filters designed with 0.1 dB passband ripple, - 80 dB minimum stopband attenuation, and - [0.95 : 0.15 : 0.35] normalized cut off frequencies. -*/ - -#include "main.h" - -/* Helper function, interpolates the filter taps */ -static OPUS_INLINE void silk_LP_interpolate_filter_taps( - opus_int32 B_Q28[ TRANSITION_NB ], - opus_int32 A_Q28[ TRANSITION_NA ], - const opus_int ind, - const opus_int32 fac_Q16 -) -{ - opus_int nb, na; - - if( ind < TRANSITION_INT_NUM - 1 ) { - if( fac_Q16 > 0 ) { - if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */ - /* Piece-wise linear interpolation of B and A */ - for( nb = 0; nb < TRANSITION_NB; nb++ ) { - B_Q28[ nb ] = silk_SMLAWB( - silk_Transition_LP_B_Q28[ ind ][ nb ], - silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] - - silk_Transition_LP_B_Q28[ ind ][ nb ], - fac_Q16 ); - } - for( na = 0; na < TRANSITION_NA; na++ ) { - A_Q28[ na ] = silk_SMLAWB( - silk_Transition_LP_A_Q28[ ind ][ na ], - silk_Transition_LP_A_Q28[ ind + 1 ][ na ] - - silk_Transition_LP_A_Q28[ ind ][ na ], - fac_Q16 ); - } - } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */ - silk_assert( fac_Q16 - ( 1 << 16 ) == silk_SAT16( fac_Q16 - ( 1 << 16 ) ) ); - /* Piece-wise linear interpolation of B and A */ - for( nb = 0; nb < TRANSITION_NB; nb++ ) { - B_Q28[ nb ] = silk_SMLAWB( - silk_Transition_LP_B_Q28[ ind + 1 ][ nb ], - silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] - - silk_Transition_LP_B_Q28[ ind ][ nb ], - fac_Q16 - ( (opus_int32)1 << 16 ) ); - } - for( na = 0; na < TRANSITION_NA; na++ ) { - A_Q28[ na ] = silk_SMLAWB( - silk_Transition_LP_A_Q28[ ind + 1 ][ na ], - silk_Transition_LP_A_Q28[ ind + 1 ][ na ] - - silk_Transition_LP_A_Q28[ ind ][ na ], - fac_Q16 - ( (opus_int32)1 << 16 ) ); - } - } - } else { - silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( opus_int32 ) ); - silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( opus_int32 ) ); - } - } else { - silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( opus_int32 ) ); - silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( opus_int32 ) ); - } -} - -/* Low-pass filter with variable cutoff frequency based on */ -/* piece-wise linear interpolation between elliptic filters */ -/* Start by setting psEncC->mode <> 0; */ -/* Deactivate by setting psEncC->mode = 0; */ -void silk_LP_variable_cutoff( - silk_LP_state *psLP, /* I/O LP filter state */ - opus_int16 *frame, /* I/O Low-pass filtered output signal */ - const opus_int frame_length /* I Frame length */ -) -{ - opus_int32 B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0; - opus_int ind = 0; - - silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES ); - - /* Run filter if needed */ - if( psLP->mode != 0 ) { - /* Calculate index and interpolation factor for interpolation */ -#if( TRANSITION_INT_STEPS == 64 ) - fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 ); -#else - fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES ); -#endif - ind = silk_RSHIFT( fac_Q16, 16 ); - fac_Q16 -= silk_LSHIFT( ind, 16 ); - - silk_assert( ind >= 0 ); - silk_assert( ind < TRANSITION_INT_NUM ); - - /* Interpolate filter coefficients */ - silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 ); - - /* Update transition frame number for next frame */ - psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES ); - - /* ARMA low-pass filtering */ - silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 ); - silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1); - } -} diff --git a/thirdparty/opus/silk/MacroCount.h b/thirdparty/opus/silk/MacroCount.h deleted file mode 100644 index 834817d058..0000000000 --- a/thirdparty/opus/silk/MacroCount.h +++ /dev/null @@ -1,718 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SIGPROCFIX_API_MACROCOUNT_H -#define SIGPROCFIX_API_MACROCOUNT_H -#include <stdio.h> - -#ifdef silk_MACRO_COUNT -#define varDefine opus_int64 ops_count = 0; - -extern opus_int64 ops_count; - -static OPUS_INLINE opus_int64 silk_SaveCount(){ - return(ops_count); -} - -static OPUS_INLINE opus_int64 silk_SaveResetCount(){ - opus_int64 ret; - - ret = ops_count; - ops_count = 0; - return(ret); -} - -static OPUS_INLINE silk_PrintCount(){ - printf("ops_count = %d \n ", (opus_int32)ops_count); -} - -#undef silk_MUL -static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 4; - ret = a32 * b32; - return ret; -} - -#undef silk_MUL_uint -static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){ - opus_uint32 ret; - ops_count += 4; - ret = a32 * b32; - return ret; -} -#undef silk_MLA -static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 4; - ret = a32 + b32 * c32; - return ret; -} - -#undef silk_MLA_uint -static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){ - opus_uint32 ret; - ops_count += 4; - ret = a32 + b32 * c32; - return ret; -} - -#undef silk_SMULWB -static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 5; - ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); - return ret; -} -#undef silk_SMLAWB -static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 5; - ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))); - return ret; -} - -#undef silk_SMULWT -static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 4; - ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); - return ret; -} -#undef silk_SMLAWT -static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 4; - ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); - return ret; -} - -#undef silk_SMULBB -static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 1; - ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32); - return ret; -} -#undef silk_SMLABB -static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 1; - ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); - return ret; -} - -#undef silk_SMULBT -static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){ - opus_int32 ret; - ops_count += 4; - ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16); - return ret; -} - -#undef silk_SMLABT -static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 1; - ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); - return ret; -} - -#undef silk_SMULTT -static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - ops_count += 1; - ret = (a32 >> 16) * (b32 >> 16); - return ret; -} - -#undef silk_SMLATT -static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - ops_count += 1; - ret = a32 + (b32 >> 16) * (c32 >> 16); - return ret; -} - - -/* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/ -#undef silk_MLA_ovflw -#define silk_MLA_ovflw silk_MLA - -#undef silk_SMLABB_ovflw -#define silk_SMLABB_ovflw silk_SMLABB - -#undef silk_SMLABT_ovflw -#define silk_SMLABT_ovflw silk_SMLABT - -#undef silk_SMLATT_ovflw -#define silk_SMLATT_ovflw silk_SMLATT - -#undef silk_SMLAWB_ovflw -#define silk_SMLAWB_ovflw silk_SMLAWB - -#undef silk_SMLAWT_ovflw -#define silk_SMLAWT_ovflw silk_SMLAWT - -#undef silk_SMULL -static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){ - opus_int64 ret; - ops_count += 8; - ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32)); - return ret; -} - -#undef silk_SMLAL -static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){ - opus_int64 ret; - ops_count += 8; - ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32)); - return ret; -} -#undef silk_SMLALBB -static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){ - opus_int64 ret; - ops_count += 4; - ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16)); - return ret; -} - -#undef SigProcFIX_CLZ16 -static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16) -{ - opus_int32 out32 = 0; - ops_count += 10; - if( in16 == 0 ) { - return 16; - } - /* test nibbles */ - if( in16 & 0xFF00 ) { - if( in16 & 0xF000 ) { - in16 >>= 12; - } else { - out32 += 4; - in16 >>= 8; - } - } else { - if( in16 & 0xFFF0 ) { - out32 += 8; - in16 >>= 4; - } else { - out32 += 12; - } - } - /* test bits and return */ - if( in16 & 0xC ) { - if( in16 & 0x8 ) - return out32 + 0; - else - return out32 + 1; - } else { - if( in16 & 0xE ) - return out32 + 2; - else - return out32 + 3; - } -} - -#undef SigProcFIX_CLZ32 -static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32) -{ - /* test highest 16 bits and convert to opus_int16 */ - ops_count += 2; - if( in32 & 0xFFFF0000 ) { - return SigProcFIX_CLZ16((opus_int16)(in32 >> 16)); - } else { - return SigProcFIX_CLZ16((opus_int16)in32) + 16; - } -} - -#undef silk_DIV32 -static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){ - ops_count += 64; - return a32 / b32; -} - -#undef silk_DIV32_16 -static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){ - ops_count += 32; - return a32 / b32; -} - -#undef silk_SAT8 -static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){ - opus_int8 tmp; - ops_count += 1; - tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX : \ - ((a) < silk_int8_MIN ? silk_int8_MIN : (a))); - return(tmp); -} - -#undef silk_SAT16 -static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){ - opus_int16 tmp; - ops_count += 1; - tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX : \ - ((a) < silk_int16_MIN ? silk_int16_MIN : (a))); - return(tmp); -} -#undef silk_SAT32 -static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){ - opus_int32 tmp; - ops_count += 1; - tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : \ - ((a) < silk_int32_MIN ? silk_int32_MIN : (a))); - return(tmp); -} -#undef silk_POS_SAT32 -static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){ - opus_int32 tmp; - ops_count += 1; - tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a)); - return(tmp); -} - -#undef silk_ADD_POS_SAT8 -static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){ - opus_int8 tmp; - ops_count += 1; - tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))); - return(tmp); -} -#undef silk_ADD_POS_SAT16 -static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){ - opus_int16 tmp; - ops_count += 1; - tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))); - return(tmp); -} - -#undef silk_ADD_POS_SAT32 -static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){ - opus_int32 tmp; - ops_count += 1; - tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))); - return(tmp); -} - -#undef silk_ADD_POS_SAT64 -static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){ - opus_int64 tmp; - ops_count += 1; - tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))); - return(tmp); -} - -#undef silk_LSHIFT8 -static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){ - opus_int8 ret; - ops_count += 1; - ret = a << shift; - return ret; -} -#undef silk_LSHIFT16 -static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){ - opus_int16 ret; - ops_count += 1; - ret = a << shift; - return ret; -} -#undef silk_LSHIFT32 -static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a << shift; - return ret; -} -#undef silk_LSHIFT64 -static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){ - ops_count += 1; - return a << shift; -} - -#undef silk_LSHIFT_ovflw -static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){ - ops_count += 1; - return a << shift; -} - -#undef silk_LSHIFT_uint -static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){ - opus_uint32 ret; - ops_count += 1; - ret = a << shift; - return ret; -} - -#undef silk_RSHIFT8 -static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} -#undef silk_RSHIFT16 -static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} -#undef silk_RSHIFT32 -static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} -#undef silk_RSHIFT64 -static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){ - ops_count += 1; - return a >> shift; -} - -#undef silk_RSHIFT_uint -static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){ - ops_count += 1; - return a >> shift; -} - -#undef silk_ADD_LSHIFT -static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_ADD_LSHIFT32 -static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_ADD_LSHIFT_uint -static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ - opus_uint32 ret; - ops_count += 1; - ret = a + (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_ADD_RSHIFT -static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b >> shift); - return ret; /* shift > 0*/ -} -#undef silk_ADD_RSHIFT32 -static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a + (b >> shift); - return ret; /* shift > 0*/ -} -#undef silk_ADD_RSHIFT_uint -static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ - opus_uint32 ret; - ops_count += 1; - ret = a + (b >> shift); - return ret; /* shift > 0*/ -} -#undef silk_SUB_LSHIFT32 -static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a - (b << shift); - return ret; /* shift >= 0*/ -} -#undef silk_SUB_RSHIFT32 -static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ - opus_int32 ret; - ops_count += 1; - ret = a - (b >> shift); - return ret; /* shift > 0*/ -} - -#undef silk_RSHIFT_ROUND -static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){ - opus_int32 ret; - ops_count += 3; - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - return ret; -} - -#undef silk_RSHIFT_ROUND64 -static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){ - opus_int64 ret; - ops_count += 6; - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - return ret; -} - -#undef silk_abs_int64 -static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){ - ops_count += 1; - return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/ -} - -#undef silk_abs_int32 -static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){ - ops_count += 1; - return silk_abs(a); -} - - -#undef silk_min -static silk_min(a, b){ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} -#undef silk_max -static silk_max(a, b){ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} -#undef silk_sign -static silk_sign(a){ - ops_count += 1; - return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )); -} - -#undef silk_ADD16 -static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){ - opus_int16 ret; - ops_count += 1; - ret = a + b; - return ret; -} - -#undef silk_ADD32 -static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){ - opus_int32 ret; - ops_count += 1; - ret = a + b; - return ret; -} - -#undef silk_ADD64 -static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){ - opus_int64 ret; - ops_count += 2; - ret = a + b; - return ret; -} - -#undef silk_SUB16 -static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){ - opus_int16 ret; - ops_count += 1; - ret = a - b; - return ret; -} - -#undef silk_SUB32 -static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){ - opus_int32 ret; - ops_count += 1; - ret = a - b; - return ret; -} - -#undef silk_SUB64 -static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){ - opus_int64 ret; - ops_count += 2; - ret = a - b; - return ret; -} - -#undef silk_ADD_SAT16 -static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) { - opus_int16 res; - /* Nb will be counted in AKP_add32 and silk_SAT16*/ - res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); - return res; -} - -#undef silk_ADD_SAT32 -static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){ - opus_int32 res; - ops_count += 1; - res = ((((a32) + (b32)) & 0x80000000) == 0 ? \ - ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ - ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); - return res; -} - -#undef silk_ADD_SAT64 -static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) { - opus_int64 res; - ops_count += 1; - res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ - ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ - ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); - return res; -} - -#undef silk_SUB_SAT16 -static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) { - opus_int16 res; - silk_assert(0); - /* Nb will be counted in sub-macros*/ - res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); - return res; -} - -#undef silk_SUB_SAT32 -static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) { - opus_int32 res; - ops_count += 1; - res = ((((a32)-(b32)) & 0x80000000) == 0 ? \ - (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ - ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); - return res; -} - -#undef silk_SUB_SAT64 -static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) { - opus_int64 res; - ops_count += 1; - res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ - (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ - ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); - - return res; -} - -#undef silk_SMULWW -static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){ - opus_int32 ret; - /* Nb will be counted in sub-macros*/ - ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)); - return ret; -} - -#undef silk_SMLAWW -static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){ - opus_int32 ret; - /* Nb will be counted in sub-macros*/ - ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)); - return ret; -} - -#undef silk_min_int -static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} - -#undef silk_min_16 -static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} -#undef silk_min_32 -static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} -#undef silk_min_64 -static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) -{ - ops_count += 1; - return (((a) < (b)) ? (a) : (b)); -} - -/* silk_min() versions with typecast in the function call */ -#undef silk_max_int -static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} -#undef silk_max_16 -static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} -#undef silk_max_32 -static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} - -#undef silk_max_64 -static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) -{ - ops_count += 1; - return (((a) > (b)) ? (a) : (b)); -} - - -#undef silk_LIMIT_int -static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2) -{ - opus_int ret; - ops_count += 6; - - ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); - - return(ret); -} - -#undef silk_LIMIT_16 -static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2) -{ - opus_int16 ret; - ops_count += 6; - - ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); - -return(ret); -} - - -#undef silk_LIMIT_32 -static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) -{ - opus_int32 ret; - ops_count += 6; - - ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); - return(ret); -} - -#else -#define varDefine -#define silk_SaveCount() - -#endif -#endif - diff --git a/thirdparty/opus/silk/MacroDebug.h b/thirdparty/opus/silk/MacroDebug.h deleted file mode 100644 index 35aedc5c5f..0000000000 --- a/thirdparty/opus/silk/MacroDebug.h +++ /dev/null @@ -1,952 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Copyright (C) 2012 Xiph.Org Foundation -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef MACRO_DEBUG_H -#define MACRO_DEBUG_H - -/* Redefine macro functions with extensive assertion in DEBUG mode. - As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */ - -#if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT) - -#undef silk_ADD16 -#define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){ - opus_int16 ret; - - ret = a + b; - if ( ret != silk_ADD_SAT16( a, b ) ) - { - fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_ADD32 -#define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){ - opus_int32 ret; - - ret = a + b; - if ( ret != silk_ADD_SAT32( a, b ) ) - { - fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_ADD64 -#define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){ - opus_int64 ret; - - ret = a + b; - if ( ret != silk_ADD_SAT64( a, b ) ) - { - fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SUB16 -#define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){ - opus_int16 ret; - - ret = a - b; - if ( ret != silk_SUB_SAT16( a, b ) ) - { - fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SUB32 -#define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){ - opus_int32 ret; - - ret = a - b; - if ( ret != silk_SUB_SAT32( a, b ) ) - { - fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SUB64 -#define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){ - opus_int64 ret; - - ret = a - b; - if ( ret != silk_SUB_SAT64( a, b ) ) - { - fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_ADD_SAT16 -#define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) { - opus_int16 res; - res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); - if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) ) - { - fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_ADD_SAT32 -#define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 res; - res = ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ? \ - ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ - ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); - if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) ) - { - fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_ADD_SAT64 -#define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) { - opus_int64 res; - int fail = 0; - res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ - ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ - ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); - if( res != a64 + b64 ) { - /* Check that we saturated to the correct extreme value */ - if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || - ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ) ) - { - fail = 1; - } - } else { - /* Saturation not necessary */ - fail = res != a64 + b64; - } - if ( fail ) - { - fprintf (stderr, "silk_ADD_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_SUB_SAT16 -#define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) { - opus_int16 res; - res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); - if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) ) - { - fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_SUB_SAT32 -#define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) { - opus_int32 res; - res = ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ? \ - (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ - ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); - if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) ) - { - fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_SUB_SAT64 -#define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) { - opus_int64 res; - int fail = 0; - res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ - (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ - ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); - if( res != a64 - b64 ) { - /* Check that we saturated to the correct extreme value */ - if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || - ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) )) - { - fail = 1; - } - } else { - /* Saturation not necessary */ - fail = res != a64 - b64; - } - if ( fail ) - { - fprintf (stderr, "silk_SUB_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return res; -} - -#undef silk_MUL -#define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret; - opus_int64 ret64; - ret = a32 * b32; - ret64 = (opus_int64)a32 * (opus_int64)b32; - if ( (opus_int64)ret != ret64 ) - { - fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_MUL_uint -#define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){ - opus_uint32 ret; - ret = a32 * b32; - if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 ) - { - fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_MLA -#define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + b32 * c32; - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) - { - fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_MLA_uint -#define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){ - opus_uint32 ret; - ret = a32 + b32 * c32; - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) - { - fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULWB -#define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret; - ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); - if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 ) - { - fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMLAWB -#define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) ); - if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) ) - { - fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULWT -#define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret; - ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); - if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 ) - { - fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMLAWT -#define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); - if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) ) - { - fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULL -#define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){ - opus_int64 ret64; - int fail = 0; - ret64 = a64 * b64; - if( b64 != 0 ) { - fail = a64 != (ret64 / b64); - } else if( a64 != 0 ) { - fail = b64 != (ret64 / a64); - } - if ( fail ) - { - fprintf (stderr, "silk_SMULL(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret64; -} - -/* no checking needed for silk_SMULBB */ -#undef silk_SMLABB -#define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 ) - { - fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -/* no checking needed for silk_SMULBT */ -#undef silk_SMLABT -#define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); - if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) ) - { - fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -/* no checking needed for silk_SMULTT */ -#undef silk_SMLATT -#define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret; - ret = a32 + (b32 >> 16) * (c32 >> 16); - if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) ) - { - fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_SMULWW -#define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){ - opus_int32 ret, tmp1, tmp2; - opus_int64 ret64; - int fail = 0; - - ret = silk_SMULWB( a32, b32 ); - tmp1 = silk_RSHIFT_ROUND( b32, 16 ); - tmp2 = silk_MUL( a32, tmp1 ); - - fail |= (opus_int64)tmp2 != (opus_int64) a32 * (opus_int64) tmp1; - - tmp1 = ret; - ret = silk_ADD32( tmp1, tmp2 ); - fail |= silk_ADD32( tmp1, tmp2 ) != silk_ADD_SAT32( tmp1, tmp2 ); - - ret64 = silk_RSHIFT64( silk_SMULL( a32, b32 ), 16 ); - fail |= (opus_int64)ret != ret64; - - if ( fail ) - { - fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - - return ret; -} - -#undef silk_SMLAWW -#define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ - opus_int32 ret, tmp; - - tmp = silk_SMULWW( b32, c32 ); - ret = silk_ADD32( a32, tmp ); - if ( ret != silk_ADD_SAT32( a32, tmp ) ) - { - fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ -#undef silk_MLA_ovflw -#define silk_MLA_ovflw(a32, b32, c32) ((a32) + ((b32) * (c32))) -#undef silk_SMLABB_ovflw -#define silk_SMLABB_ovflw(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) - -/* no checking needed for silk_SMULL - no checking needed for silk_SMLAL - no checking needed for silk_SMLALBB - no checking needed for SigProcFIX_CLZ16 - no checking needed for SigProcFIX_CLZ32*/ - -#undef silk_DIV32 -#define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){ - if ( b32 == 0 ) - { - fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a32 / b32; -} - -#undef silk_DIV32_16 -#define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){ - int fail = 0; - fail |= b32 == 0; - fail |= b32 > silk_int16_MAX; - fail |= b32 < silk_int16_MIN; - if ( fail ) - { - fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a32 / b32; -} - -/* no checking needed for silk_SAT8 - no checking needed for silk_SAT16 - no checking needed for silk_SAT32 - no checking needed for silk_POS_SAT32 - no checking needed for silk_ADD_POS_SAT8 - no checking needed for silk_ADD_POS_SAT16 - no checking needed for silk_ADD_POS_SAT32 - no checking needed for silk_ADD_POS_SAT64 */ - -#undef silk_LSHIFT8 -#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ - opus_int8 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 8; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT16 -#define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ - opus_int16 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 16; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT32 -#define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ - opus_int32 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 32; - fail |= (opus_int64)ret != ((opus_int64)a) << shift; - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT64 -#define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){ - opus_int64 ret; - int fail = 0; - ret = a << shift; - fail |= shift < 0; - fail |= shift >= 64; - fail |= (ret>>shift) != ((opus_int64)a); - if ( fail ) - { - fprintf (stderr, "silk_LSHIFT64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_LSHIFT_ovflw -#define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */ - { - fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a << shift; -} - -#undef silk_LSHIFT_uint -#define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ - opus_uint32 ret; - ret = a << shift; - if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift)) - { - fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_RSHIFT8 -#define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>=8) ) - { - fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT16 -#define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>=16) ) - { - fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT32 -#define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>=32) ) - { - fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT64 -#define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){ - if ( (shift < 0) || (shift>=64) ) - { - fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_RSHIFT_uint -#define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ - if ( (shift < 0) || (shift>32) ) - { - fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return a >> shift; -} - -#undef silk_ADD_LSHIFT -#define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){ - opus_int16 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_ADD_LSHIFT32 -#define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_ADD_LSHIFT_uint -#define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ - opus_uint32 ret; - ret = a + (b << shift); - if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_ADD_RSHIFT -#define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){ - opus_int16 ret; - ret = a + (b >> shift); - if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_ADD_RSHIFT32 -#define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a + (b >> shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_ADD_RSHIFT_uint -#define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ - opus_uint32 ret; - ret = a + (b >> shift); - if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_SUB_LSHIFT32 -#define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a - (b << shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) ) - { - fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift >= 0 */ -} - -#undef silk_SUB_RSHIFT32 -#define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = a - (b >> shift); - if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) ) - { - fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; /* shift > 0 */ -} - -#undef silk_RSHIFT_ROUND -#define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){ - opus_int32 ret; - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - /* the marco definition can't handle a shift of zero */ - if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) ) - { - fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return ret; -} - -#undef silk_RSHIFT_ROUND64 -#define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){ - opus_int64 ret; - /* the marco definition can't handle a shift of zero */ - if ( (shift <= 0) || (shift>=64) ) - { - fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; - return ret; -} - -/* silk_abs is used on floats also, so doesn't work... */ -/*#undef silk_abs -static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){ - silk_assert(a != 0x80000000); - return (((a) > 0) ? (a) : -(a)); // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN -}*/ - -#undef silk_abs_int64 -#define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){ - if ( a == silk_int64_MIN ) - { - fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ -} - -#undef silk_abs_int32 -#define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){ - if ( a == silk_int32_MIN ) - { - fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return silk_abs(a); -} - -#undef silk_CHECK_FIT8 -#define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){ - opus_int8 ret; - ret = (opus_int8)a; - if ( (opus_int64)ret != a ) - { - fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return( ret ); -} - -#undef silk_CHECK_FIT16 -#define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){ - opus_int16 ret; - ret = (opus_int16)a; - if ( (opus_int64)ret != a ) - { - fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return( ret ); -} - -#undef silk_CHECK_FIT32 -#define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__) -static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){ - opus_int32 ret; - ret = (opus_int32)a; - if ( (opus_int64)ret != a ) - { - fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line); -#ifdef FIXED_DEBUG_ASSERT - silk_assert( 0 ); -#endif - } - return( ret ); -} - -/* no checking for silk_NSHIFT_MUL_32_32 - no checking for silk_NSHIFT_MUL_16_16 - no checking needed for silk_min - no checking needed for silk_max - no checking needed for silk_sign -*/ - -#endif -#endif /* MACRO_DEBUG_H */ diff --git a/thirdparty/opus/silk/NLSF2A.c b/thirdparty/opus/silk/NLSF2A.c deleted file mode 100644 index b1c559ea68..0000000000 --- a/thirdparty/opus/silk/NLSF2A.c +++ /dev/null @@ -1,178 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* conversion between prediction filter coefficients and LSFs */ -/* order should be even */ -/* a piecewise linear approximation maps LSF <-> cos(LSF) */ -/* therefore the result is not accurate LSFs, but the two */ -/* functions are accurate inverses of each other */ - -#include "SigProc_FIX.h" -#include "tables.h" - -#define QA 16 - -/* helper function for NLSF2A(..) */ -static OPUS_INLINE void silk_NLSF2A_find_poly( - opus_int32 *out, /* O intermediate polynomial, QA [dd+1] */ - const opus_int32 *cLSF, /* I vector of interleaved 2*cos(LSFs), QA [d] */ - opus_int dd /* I polynomial order (= 1/2 * filter order) */ -) -{ - opus_int k, n; - opus_int32 ftmp; - - out[0] = silk_LSHIFT( 1, QA ); - out[1] = -cLSF[0]; - for( k = 1; k < dd; k++ ) { - ftmp = cLSF[2*k]; /* QA*/ - out[k+1] = silk_LSHIFT( out[k-1], 1 ) - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[k] ), QA ); - for( n = k; n > 1; n-- ) { - out[n] += out[n-2] - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[n-1] ), QA ); - } - out[1] -= ftmp; - } -} - -/* compute whitening filter coefficients from normalized line spectral frequencies */ -void silk_NLSF2A( - opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ - const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ - const opus_int d /* I filter order (should be even) */ -) -{ - /* This ordering was found to maximize quality. It improves numerical accuracy of - silk_NLSF2A_find_poly() compared to "standard" ordering. */ - static const unsigned char ordering16[16] = { - 0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1 - }; - static const unsigned char ordering10[10] = { - 0, 9, 6, 3, 4, 5, 8, 1, 2, 7 - }; - const unsigned char *ordering; - opus_int k, i, dd; - opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; - opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta; - opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ]; - opus_int32 maxabs, absval, idx=0, sc_Q16; - - silk_assert( LSF_COS_TAB_SZ_FIX == 128 ); - silk_assert( d==10||d==16 ); - - /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */ - ordering = d == 16 ? ordering16 : ordering10; - for( k = 0; k < d; k++ ) { - silk_assert(NLSF[k] >= 0 ); - - /* f_int on a scale 0-127 (rounded down) */ - f_int = silk_RSHIFT( NLSF[k], 15 - 7 ); - - /* f_frac, range: 0..255 */ - f_frac = NLSF[k] - silk_LSHIFT( f_int, 15 - 7 ); - - silk_assert(f_int >= 0); - silk_assert(f_int < LSF_COS_TAB_SZ_FIX ); - - /* Read start and end value from table */ - cos_val = silk_LSFCosTab_FIX_Q12[ f_int ]; /* Q12 */ - delta = silk_LSFCosTab_FIX_Q12[ f_int + 1 ] - cos_val; /* Q12, with a range of 0..200 */ - - /* Linear interpolation */ - cos_LSF_QA[ordering[k]] = silk_RSHIFT_ROUND( silk_LSHIFT( cos_val, 8 ) + silk_MUL( delta, f_frac ), 20 - QA ); /* QA */ - } - - dd = silk_RSHIFT( d, 1 ); - - /* generate even and odd polynomials using convolution */ - silk_NLSF2A_find_poly( P, &cos_LSF_QA[ 0 ], dd ); - silk_NLSF2A_find_poly( Q, &cos_LSF_QA[ 1 ], dd ); - - /* convert even and odd polynomials to opus_int32 Q12 filter coefs */ - for( k = 0; k < dd; k++ ) { - Ptmp = P[ k+1 ] + P[ k ]; - Qtmp = Q[ k+1 ] - Q[ k ]; - - /* the Ptmp and Qtmp values at this stage need to fit in int32 */ - a32_QA1[ k ] = -Qtmp - Ptmp; /* QA+1 */ - a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */ - } - - /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */ - for( i = 0; i < 10; i++ ) { - /* Find maximum absolute value and its index */ - maxabs = 0; - for( k = 0; k < d; k++ ) { - absval = silk_abs( a32_QA1[k] ); - if( absval > maxabs ) { - maxabs = absval; - idx = k; - } - } - maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */ - - if( maxabs > silk_int16_MAX ) { - /* Reduce magnitude of prediction coefficients */ - maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */ - sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ), - silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) ); - silk_bwexpander_32( a32_QA1, d, sc_Q16 ); - } else { - break; - } - } - - if( i == 10 ) { - /* Reached the last iteration, clip the coefficients */ - for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */ - a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 ); - } - } else { - for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ - } - } - - for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) { - if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) { - /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */ - /* on the unscaled coefficients, convert to Q12 and measure again */ - silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) ); - for( k = 0; k < d; k++ ) { - a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ - } - } else { - break; - } - } -} - diff --git a/thirdparty/opus/silk/NLSF_VQ.c b/thirdparty/opus/silk/NLSF_VQ.c deleted file mode 100644 index 69b6e22e18..0000000000 --- a/thirdparty/opus/silk/NLSF_VQ.c +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ -void silk_NLSF_VQ( - opus_int32 err_Q26[], /* O Quantization errors [K] */ - const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ - const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ - const opus_int K, /* I Number of codebook vectors */ - const opus_int LPC_order /* I Number of LPCs */ -) -{ - opus_int i, m; - opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26; - - silk_assert( LPC_order <= 16 ); - silk_assert( ( LPC_order & 1 ) == 0 ); - - /* Loop over codebook */ - for( i = 0; i < K; i++ ) { - sum_error_Q26 = 0; - for( m = 0; m < LPC_order; m += 2 ) { - /* Compute weighted squared quantization error for index m */ - diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ - sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 ); - - /* Compute weighted squared quantization error for index m + 1 */ - diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ - sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 ); - - sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 ); - - silk_assert( sum_error_Q26 >= 0 ); - silk_assert( sum_error_Q30 >= 0 ); - } - err_Q26[ i ] = sum_error_Q26; - } -} diff --git a/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c b/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c deleted file mode 100644 index 04894c59ab..0000000000 --- a/thirdparty/opus/silk/NLSF_VQ_weights_laroia.c +++ /dev/null @@ -1,80 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "define.h" -#include "SigProc_FIX.h" - -/* -R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP -Parameters Using Structured Vector Quantization", Proc. IEEE Int. Conf. Acoust., Speech, -Signal Processing, pp. 641-644, 1991. -*/ - -/* Laroia low complexity NLSF weights */ -void silk_NLSF_VQ_weights_laroia( - opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ - const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ - const opus_int D /* I Input vector dimension (even) */ -) -{ - opus_int k; - opus_int32 tmp1_int, tmp2_int; - - silk_assert( D > 0 ); - silk_assert( ( D & 1 ) == 0 ); - - /* First value */ - tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 ); - tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); - tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 ); - tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int ); - pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 ); - - /* Main loop */ - for( k = 1; k < D - 1; k += 2 ) { - tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 ); - tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); - pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ k ] > 0 ); - - tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 ); - tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int ); - pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 ); - } - - /* Last value */ - tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 ); - tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); - pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); - silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 ); -} diff --git a/thirdparty/opus/silk/NLSF_decode.c b/thirdparty/opus/silk/NLSF_decode.c deleted file mode 100644 index 9f715060b8..0000000000 --- a/thirdparty/opus/silk/NLSF_decode.c +++ /dev/null @@ -1,101 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Predictive dequantizer for NLSF residuals */ -static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ - opus_int16 x_Q10[], /* O Output [ order ] */ - const opus_int8 indices[], /* I Quantization indices [ order ] */ - const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ - const opus_int quant_step_size_Q16, /* I Quantization step size */ - const opus_int16 order /* I Number of input values */ -) -{ - opus_int i, out_Q10, pred_Q10; - - out_Q10 = 0; - for( i = order-1; i >= 0; i-- ) { - pred_Q10 = silk_RSHIFT( silk_SMULBB( out_Q10, (opus_int16)pred_coef_Q8[ i ] ), 8 ); - out_Q10 = silk_LSHIFT( indices[ i ], 10 ); - if( out_Q10 > 0 ) { - out_Q10 = silk_SUB16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( out_Q10 < 0 ) { - out_Q10 = silk_ADD16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } - out_Q10 = silk_SMLAWB( pred_Q10, (opus_int32)out_Q10, quant_step_size_Q16 ); - x_Q10[ i ] = out_Q10; - } -} - - -/***********************/ -/* NLSF vector decoder */ -/***********************/ -void silk_NLSF_decode( - opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ -) -{ - opus_int i; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - opus_int16 res_Q10[ MAX_LPC_ORDER ]; - opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; - opus_int32 W_tmp_Q9, NLSF_Q15_tmp; - const opus_uint8 *pCB_element; - - /* Decode first stage */ - pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; - for( i = 0; i < psNLSF_CB->order; i++ ) { - pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 ); - } - - /* Unpack entropy table indices and predictor for current CB1 index */ - silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); - - /* Predictive residual dequantizer */ - silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); - - /* Weights from codebook vector */ - silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order ); - - /* Apply inverse square-rooted weights and add to output */ - for( i = 0; i < psNLSF_CB->order; i++ ) { - W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); - NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) ); - pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); - } - - /* NLSF stabilization */ - silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); -} diff --git a/thirdparty/opus/silk/NLSF_del_dec_quant.c b/thirdparty/opus/silk/NLSF_del_dec_quant.c deleted file mode 100644 index de88fee060..0000000000 --- a/thirdparty/opus/silk/NLSF_del_dec_quant.c +++ /dev/null @@ -1,217 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Delayed-decision quantizer for NLSF residuals */ -opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */ - opus_int8 indices[], /* O Quantization indices [ order ] */ - const opus_int16 x_Q10[], /* I Input [ order ] */ - const opus_int16 w_Q5[], /* I Weights [ order ] */ - const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ - const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */ - const opus_uint8 ec_rates_Q5[], /* I Rates [] */ - const opus_int quant_step_size_Q16, /* I Quantization step size */ - const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */ - const opus_int32 mu_Q20, /* I R/D tradeoff */ - const opus_int16 order /* I Number of input values */ -) -{ - opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10; - opus_int pred_Q10, diff_Q10, rate0_Q5, rate1_Q5; - opus_int16 out0_Q10, out1_Q10; - opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25; - opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ]; - opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ]; - opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; - opus_int32 RD_Q25[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; - opus_int32 RD_min_Q25[ NLSF_QUANT_DEL_DEC_STATES ]; - opus_int32 RD_max_Q25[ NLSF_QUANT_DEL_DEC_STATES ]; - const opus_uint8 *rates_Q5; - - opus_int out0_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT]; - opus_int out1_Q10_table[2 * NLSF_QUANT_MAX_AMPLITUDE_EXT]; - - for (i = -NLSF_QUANT_MAX_AMPLITUDE_EXT; i <= NLSF_QUANT_MAX_AMPLITUDE_EXT-1; i++) - { - out0_Q10 = silk_LSHIFT( i, 10 ); - out1_Q10 = silk_ADD16( out0_Q10, 1024 ); - if( i > 0 ) { - out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( i == 0 ) { - out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else if( i == -1 ) { - out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } else { - out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); - } - out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 ); - out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 ); - } - - silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */ - - nStates = 1; - RD_Q25[ 0 ] = 0; - prev_out_Q10[ 0 ] = 0; - for( i = order - 1; ; i-- ) { - rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ]; - in_Q10 = x_Q10[ i ]; - for( j = 0; j < nStates; j++ ) { - pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 ); - res_Q10 = silk_SUB16( in_Q10, pred_Q10 ); - ind_tmp = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 ); - ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 ); - ind[ j ][ i ] = (opus_int8)ind_tmp; - - /* compute outputs for ind_tmp and ind_tmp + 1 */ - out0_Q10 = out0_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ]; - out1_Q10 = out1_Q10_table[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE_EXT ]; - - out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 ); - out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 ); - prev_out_Q10[ j ] = out0_Q10; - prev_out_Q10[ j + nStates ] = out1_Q10; - - /* compute RD for ind_tmp and ind_tmp + 1 */ - if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) { - if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) { - rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ]; - rate1_Q5 = 280; - } else { - rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp ); - rate1_Q5 = silk_ADD16( rate0_Q5, 43 ); - } - } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) { - if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) { - rate0_Q5 = 280; - rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ]; - } else { - rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp ); - rate1_Q5 = silk_SUB16( rate0_Q5, 43 ); - } - } else { - rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ]; - rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ]; - } - RD_tmp_Q25 = RD_Q25[ j ]; - diff_Q10 = silk_SUB16( in_Q10, out0_Q10 ); - RD_Q25[ j ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 ); - diff_Q10 = silk_SUB16( in_Q10, out1_Q10 ); - RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 ); - } - - if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) { - /* double number of states and copy */ - for( j = 0; j < nStates; j++ ) { - ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1; - } - nStates = silk_LSHIFT( nStates, 1 ); - for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - ind[ j ][ i ] = ind[ j - nStates ][ i ]; - } - } else if( i > 0 ) { - /* sort lower and upper half of RD_Q25, pairwise */ - for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) { - RD_max_Q25[ j ] = RD_Q25[ j ]; - RD_min_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ]; - RD_Q25[ j ] = RD_min_Q25[ j ]; - RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ]; - /* swap prev_out values */ - out0_Q10 = prev_out_Q10[ j ]; - prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ]; - prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10; - ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES; - } else { - RD_min_Q25[ j ] = RD_Q25[ j ]; - RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ]; - ind_sort[ j ] = j; - } - } - /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */ - /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */ - while( 1 ) { - min_max_Q25 = silk_int32_MAX; - max_min_Q25 = 0; - ind_min_max = 0; - ind_max_min = 0; - for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - if( min_max_Q25 > RD_max_Q25[ j ] ) { - min_max_Q25 = RD_max_Q25[ j ]; - ind_min_max = j; - } - if( max_min_Q25 < RD_min_Q25[ j ] ) { - max_min_Q25 = RD_min_Q25[ j ]; - ind_max_min = j; - } - } - if( min_max_Q25 >= max_min_Q25 ) { - break; - } - /* copy ind_min_max to ind_max_min */ - ind_sort[ ind_max_min ] = ind_sort[ ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES; - RD_Q25[ ind_max_min ] = RD_Q25[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ]; - prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ]; - RD_min_Q25[ ind_max_min ] = 0; - RD_max_Q25[ ind_min_max ] = silk_int32_MAX; - silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) ); - } - /* increment index if it comes from the upper half */ - for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { - ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 ); - } - } else { /* i == 0 */ - break; - } - } - - /* last sample: find winner, copy indices and return RD value */ - ind_tmp = 0; - min_Q25 = silk_int32_MAX; - for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) { - if( min_Q25 > RD_Q25[ j ] ) { - min_Q25 = RD_Q25[ j ]; - ind_tmp = j; - } - } - for( j = 0; j < order; j++ ) { - indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ]; - silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT ); - silk_assert( indices[ j ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT ); - } - indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 ); - silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT ); - silk_assert( min_Q25 >= 0 ); - return min_Q25; -} diff --git a/thirdparty/opus/silk/NLSF_encode.c b/thirdparty/opus/silk/NLSF_encode.c deleted file mode 100644 index f03c3f1c35..0000000000 --- a/thirdparty/opus/silk/NLSF_encode.c +++ /dev/null @@ -1,137 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/***********************/ -/* NLSF vector encoder */ -/***********************/ -opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ - const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ - const opus_int nSurvivors, /* I Max survivors after first stage */ - const opus_int signalType /* I Signal type: 0/1/2 */ -) -{ - opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7; - opus_int32 W_tmp_Q9, ret; - VARDECL( opus_int32, err_Q26 ); - VARDECL( opus_int32, RD_Q25 ); - VARDECL( opus_int, tempIndices1 ); - VARDECL( opus_int8, tempIndices2 ); - opus_int16 res_Q15[ MAX_LPC_ORDER ]; - opus_int16 res_Q10[ MAX_LPC_ORDER ]; - opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ]; - opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; - opus_int16 W_adj_Q5[ MAX_LPC_ORDER ]; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - const opus_uint8 *pCB_element, *iCDF_ptr; - SAVE_STACK; - - silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS ); - silk_assert( signalType >= 0 && signalType <= 2 ); - silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 ); - - /* NLSF stabilization */ - silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); - - /* First stage: VQ */ - ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 ); - silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order ); - - /* Sort the quantization errors */ - ALLOC( tempIndices1, nSurvivors, opus_int ); - silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors ); - - ALLOC( RD_Q25, nSurvivors, opus_int32 ); - ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 ); - - /* Loop over survivors */ - for( s = 0; s < nSurvivors; s++ ) { - ind1 = tempIndices1[ s ]; - - /* Residual after first stage */ - pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ]; - for( i = 0; i < psNLSF_CB->order; i++ ) { - NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 ); - res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ]; - } - - /* Weights from codebook vector */ - silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order ); - - /* Apply square-rooted weights */ - for( i = 0; i < psNLSF_CB->order; i++ ) { - W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); - res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 ); - } - - /* Modify input weights accordingly */ - for( i = 0; i < psNLSF_CB->order; i++ ) { - W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] ); - } - - /* Unpack entropy table indices and predictor for current CB1 index */ - silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, ind1 ); - - /* Trellis quantizer */ - RD_Q25[ s ] = silk_NLSF_del_dec_quant( &tempIndices2[ s * MAX_LPC_ORDER ], res_Q10, W_adj_Q5, pred_Q8, ec_ix, - psNLSF_CB->ec_Rates_Q5, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->invQuantStepSize_Q6, NLSF_mu_Q20, psNLSF_CB->order ); - - /* Add rate for first stage */ - iCDF_ptr = &psNLSF_CB->CB1_iCDF[ ( signalType >> 1 ) * psNLSF_CB->nVectors ]; - if( ind1 == 0 ) { - prob_Q8 = 256 - iCDF_ptr[ ind1 ]; - } else { - prob_Q8 = iCDF_ptr[ ind1 - 1 ] - iCDF_ptr[ ind1 ]; - } - bits_q7 = ( 8 << 7 ) - silk_lin2log( prob_Q8 ); - RD_Q25[ s ] = silk_SMLABB( RD_Q25[ s ], bits_q7, silk_RSHIFT( NLSF_mu_Q20, 2 ) ); - } - - /* Find the lowest rate-distortion error */ - silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 ); - - NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ]; - silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) ); - - /* Decode */ - silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB ); - - ret = RD_Q25[ 0 ]; - RESTORE_STACK; - return ret; -} diff --git a/thirdparty/opus/silk/NLSF_stabilize.c b/thirdparty/opus/silk/NLSF_stabilize.c deleted file mode 100644 index 8f3426b91e..0000000000 --- a/thirdparty/opus/silk/NLSF_stabilize.c +++ /dev/null @@ -1,142 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* NLSF stabilizer: */ -/* */ -/* - Moves NLSFs further apart if they are too close */ -/* - Moves NLSFs away from borders if they are too close */ -/* - High effort to achieve a modification with minimum */ -/* Euclidean distance to input vector */ -/* - Output are sorted NLSF coefficients */ -/* */ - -#include "SigProc_FIX.h" - -/* Constant Definitions */ -#define MAX_LOOPS 20 - -/* NLSF stabilizer, for a single input data vector */ -void silk_NLSF_stabilize( - opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ - const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ - const opus_int L /* I Number of NLSF parameters in the input vector */ -) -{ - opus_int i, I=0, k, loops; - opus_int16 center_freq_Q15; - opus_int32 diff_Q15, min_diff_Q15, min_center_Q15, max_center_Q15; - - /* This is necessary to ensure an output within range of a opus_int16 */ - silk_assert( NDeltaMin_Q15[L] >= 1 ); - - for( loops = 0; loops < MAX_LOOPS; loops++ ) { - /**************************/ - /* Find smallest distance */ - /**************************/ - /* First element */ - min_diff_Q15 = NLSF_Q15[0] - NDeltaMin_Q15[0]; - I = 0; - /* Middle elements */ - for( i = 1; i <= L-1; i++ ) { - diff_Q15 = NLSF_Q15[i] - ( NLSF_Q15[i-1] + NDeltaMin_Q15[i] ); - if( diff_Q15 < min_diff_Q15 ) { - min_diff_Q15 = diff_Q15; - I = i; - } - } - /* Last element */ - diff_Q15 = ( 1 << 15 ) - ( NLSF_Q15[L-1] + NDeltaMin_Q15[L] ); - if( diff_Q15 < min_diff_Q15 ) { - min_diff_Q15 = diff_Q15; - I = L; - } - - /***************************************************/ - /* Now check if the smallest distance non-negative */ - /***************************************************/ - if( min_diff_Q15 >= 0 ) { - return; - } - - if( I == 0 ) { - /* Move away from lower limit */ - NLSF_Q15[0] = NDeltaMin_Q15[0]; - - } else if( I == L) { - /* Move away from higher limit */ - NLSF_Q15[L-1] = ( 1 << 15 ) - NDeltaMin_Q15[L]; - - } else { - /* Find the lower extreme for the location of the current center frequency */ - min_center_Q15 = 0; - for( k = 0; k < I; k++ ) { - min_center_Q15 += NDeltaMin_Q15[k]; - } - min_center_Q15 += silk_RSHIFT( NDeltaMin_Q15[I], 1 ); - - /* Find the upper extreme for the location of the current center frequency */ - max_center_Q15 = 1 << 15; - for( k = L; k > I; k-- ) { - max_center_Q15 -= NDeltaMin_Q15[k]; - } - max_center_Q15 -= silk_RSHIFT( NDeltaMin_Q15[I], 1 ); - - /* Move apart, sorted by value, keeping the same center frequency */ - center_freq_Q15 = (opus_int16)silk_LIMIT_32( silk_RSHIFT_ROUND( (opus_int32)NLSF_Q15[I-1] + (opus_int32)NLSF_Q15[I], 1 ), - min_center_Q15, max_center_Q15 ); - NLSF_Q15[I-1] = center_freq_Q15 - silk_RSHIFT( NDeltaMin_Q15[I], 1 ); - NLSF_Q15[I] = NLSF_Q15[I-1] + NDeltaMin_Q15[I]; - } - } - - /* Safe and simple fall back method, which is less ideal than the above */ - if( loops == MAX_LOOPS ) - { - /* Insertion sort (fast for already almost sorted arrays): */ - /* Best case: O(n) for an already sorted array */ - /* Worst case: O(n^2) for an inversely sorted array */ - silk_insertion_sort_increasing_all_values_int16( &NLSF_Q15[0], L ); - - /* First NLSF should be no less than NDeltaMin[0] */ - NLSF_Q15[0] = silk_max_int( NLSF_Q15[0], NDeltaMin_Q15[0] ); - - /* Keep delta_min distance between the NLSFs */ - for( i = 1; i < L; i++ ) - NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], silk_ADD_SAT16( NLSF_Q15[i-1], NDeltaMin_Q15[i] ) ); - - /* Last NLSF should be no higher than 1 - NDeltaMin[L] */ - NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] ); - - /* Keep NDeltaMin distance between the NLSFs */ - for( i = L-2; i >= 0; i-- ) - NLSF_Q15[i] = silk_min_int( NLSF_Q15[i], NLSF_Q15[i+1] - NDeltaMin_Q15[i+1] ); - } -} diff --git a/thirdparty/opus/silk/NLSF_unpack.c b/thirdparty/opus/silk/NLSF_unpack.c deleted file mode 100644 index 17bd23f752..0000000000 --- a/thirdparty/opus/silk/NLSF_unpack.c +++ /dev/null @@ -1,55 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Unpack predictor values and indices for entropy coding tables */ -void silk_NLSF_unpack( - opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */ - opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int CB1_index /* I Index of vector in first LSF codebook */ -) -{ - opus_int i; - opus_uint8 entry; - const opus_uint8 *ec_sel_ptr; - - ec_sel_ptr = &psNLSF_CB->ec_sel[ CB1_index * psNLSF_CB->order / 2 ]; - for( i = 0; i < psNLSF_CB->order; i += 2 ) { - entry = *ec_sel_ptr++; - ec_ix [ i ] = silk_SMULBB( silk_RSHIFT( entry, 1 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 ); - pred_Q8[ i ] = psNLSF_CB->pred_Q8[ i + ( entry & 1 ) * ( psNLSF_CB->order - 1 ) ]; - ec_ix [ i + 1 ] = silk_SMULBB( silk_RSHIFT( entry, 5 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 ); - pred_Q8[ i + 1 ] = psNLSF_CB->pred_Q8[ i + ( silk_RSHIFT( entry, 4 ) & 1 ) * ( psNLSF_CB->order - 1 ) + 1 ]; - } -} - diff --git a/thirdparty/opus/silk/NSQ.c b/thirdparty/opus/silk/NSQ.c deleted file mode 100644 index 43e3fee7e0..0000000000 --- a/thirdparty/opus/silk/NSQ.c +++ /dev/null @@ -1,429 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "NSQ.h" - - -static OPUS_INLINE void silk_nsq_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -); - -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE void silk_noise_shape_quantizer( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - int arch /* I Architecture */ -); -#endif - -void silk_NSQ_c -( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int k, lag, start_idx, LSF_interpolation_flag; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - SAVE_STACK; - - NSQ->rand_seed = psIndices->Seed; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - /* Set up pointers to start of sub frame */ - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->rewhite_flag = 1; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - } - } - - silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); - - silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, - offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Update lagPrev for next frame */ - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech and noise shaping signals */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/***********************************/ -/* silk_noise_shape_quantizer */ -/***********************************/ - -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE -#endif -void silk_noise_shape_quantizer( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - int arch /* I Architecture */ -) -{ - opus_int i; - opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; - opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; - opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; -#ifdef silk_short_prediction_create_arch_coef - opus_int32 a_Q12_arch[MAX_LPC_ORDER]; -#endif - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - /* Set up short term AR state */ - psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; - -#ifdef silk_short_prediction_create_arch_coef - silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder); -#endif - - for( i = 0; i < length; i++ ) { - /* Generate dither */ - NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); - - /* Short-term prediction */ - LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch); - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q13 = 2; - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - pred_lag_ptr++; - } else { - LTP_pred_Q13 = 0; - } - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch); - - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 ); - - n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); - n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 ); - - silk_assert( lag > 0 || signalType != TYPE_VOICED ); - - /* Combine prediction and noise shaping signals */ - tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ - tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); - shp_lag_ptr++; - - tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */ - tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */ - } else { - tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */ - } - - r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( NSQ->rand_seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* Q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q20 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q20 = silk_SMLABB( rd1_Q20, rr_Q10, rr_Q10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q20 = silk_SMLABB( rd2_Q20, rr_Q10, rr_Q10 ); - - if( rd2_Q20 < rd1_Q20 ) { - q1_Q10 = q2_Q10; - } - - pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 ); - - /* Excitation */ - exc_Q14 = silk_LSHIFT( q1_Q10, 4 ); - if ( NSQ->rand_seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 ); - xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 ); - - /* Scale XQ back to normal level before saving */ - xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( xq_Q14, Gain_Q10 ), 8 ) ); - - /* Update states */ - psLPC_Q14++; - *psLPC_Q14 = xq_Q14; - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); - NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; - - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); - sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Make dither dependent on quantized signal */ - NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); - } - - /* Update LPC synth buffer */ - silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); -} - -static OPUS_INLINE void silk_nsq_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -) -{ - opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - for( i = 0; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); - } - } -} diff --git a/thirdparty/opus/silk/NSQ.h b/thirdparty/opus/silk/NSQ.h deleted file mode 100644 index 971832f660..0000000000 --- a/thirdparty/opus/silk/NSQ.h +++ /dev/null @@ -1,101 +0,0 @@ -/*********************************************************************** -Copyright (c) 2014 Vidyo. -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef SILK_NSQ_H -#define SILK_NSQ_H - -#include "SigProc_FIX.h" - -#undef silk_short_prediction_create_arch_coef - -static OPUS_INLINE opus_int32 silk_noise_shape_quantizer_short_prediction_c(const opus_int32 *buf32, const opus_int16 *coef16, opus_int order) -{ - opus_int32 out; - silk_assert( order == 10 || order == 16 ); - - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - out = silk_RSHIFT( order, 1 ); - out = silk_SMLAWB( out, buf32[ 0 ], coef16[ 0 ] ); - out = silk_SMLAWB( out, buf32[ -1 ], coef16[ 1 ] ); - out = silk_SMLAWB( out, buf32[ -2 ], coef16[ 2 ] ); - out = silk_SMLAWB( out, buf32[ -3 ], coef16[ 3 ] ); - out = silk_SMLAWB( out, buf32[ -4 ], coef16[ 4 ] ); - out = silk_SMLAWB( out, buf32[ -5 ], coef16[ 5 ] ); - out = silk_SMLAWB( out, buf32[ -6 ], coef16[ 6 ] ); - out = silk_SMLAWB( out, buf32[ -7 ], coef16[ 7 ] ); - out = silk_SMLAWB( out, buf32[ -8 ], coef16[ 8 ] ); - out = silk_SMLAWB( out, buf32[ -9 ], coef16[ 9 ] ); - - if( order == 16 ) - { - out = silk_SMLAWB( out, buf32[ -10 ], coef16[ 10 ] ); - out = silk_SMLAWB( out, buf32[ -11 ], coef16[ 11 ] ); - out = silk_SMLAWB( out, buf32[ -12 ], coef16[ 12 ] ); - out = silk_SMLAWB( out, buf32[ -13 ], coef16[ 13 ] ); - out = silk_SMLAWB( out, buf32[ -14 ], coef16[ 14 ] ); - out = silk_SMLAWB( out, buf32[ -15 ], coef16[ 15 ] ); - } - return out; -} - -#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) ((void)arch,silk_noise_shape_quantizer_short_prediction_c(in, coef, order)) - -static OPUS_INLINE opus_int32 silk_NSQ_noise_shape_feedback_loop_c(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order) -{ - opus_int32 out; - opus_int32 tmp1, tmp2; - opus_int j; - - tmp2 = data0[0]; - tmp1 = data1[0]; - data1[0] = tmp2; - - out = silk_RSHIFT(order, 1); - out = silk_SMLAWB(out, tmp2, coef[0]); - - for (j = 2; j < order; j += 2) { - tmp2 = data1[j - 1]; - data1[j - 1] = tmp1; - out = silk_SMLAWB(out, tmp1, coef[j - 1]); - tmp1 = data1[j + 0]; - data1[j + 0] = tmp2; - out = silk_SMLAWB(out, tmp2, coef[j]); - } - data1[order - 1] = tmp1; - out = silk_SMLAWB(out, tmp1, coef[order - 1]); - /* Q11 -> Q12 */ - out = silk_LSHIFT32( out, 1 ); - return out; -} - -#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order)) - -#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) -#include "arm/NSQ_neon.h" -#endif - -#endif /* SILK_NSQ_H */ diff --git a/thirdparty/opus/silk/NSQ_del_dec.c b/thirdparty/opus/silk/NSQ_del_dec.c deleted file mode 100644 index ab6feeac98..0000000000 --- a/thirdparty/opus/silk/NSQ_del_dec.c +++ /dev/null @@ -1,716 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "NSQ.h" - - -typedef struct { - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; - opus_int32 RandState[ DECISION_DELAY ]; - opus_int32 Q_Q10[ DECISION_DELAY ]; - opus_int32 Xq_Q14[ DECISION_DELAY ]; - opus_int32 Pred_Q15[ DECISION_DELAY ]; - opus_int32 Shape_Q14[ DECISION_DELAY ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_AR_Q14; - opus_int32 Seed; - opus_int32 SeedInit; - opus_int32 RD_Q10; -} NSQ_del_dec_struct; - -typedef struct { - opus_int32 Q_Q10; - opus_int32 RD_Q10; - opus_int32 xq_Q14; - opus_int32 LF_AR_Q14; - opus_int32 sLTP_shp_Q14; - opus_int32 LPC_exc_Q14; -} NSQ_sample_struct; - -typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; - -#if defined(MIPSr1_ASM) -#include "mips/NSQ_del_dec_mipsr1.h" -#endif -static OPUS_INLINE void silk_nsq_del_dec_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -); - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -); - -void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; - opus_int last_smple_idx, smpl_buf_idx, decisionDelay; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - opus_int32 RDmin_Q10, Gain_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - VARDECL( opus_int32, delayedGain_Q10 ); - VARDECL( NSQ_del_dec_struct, psDelDec ); - NSQ_del_dec_struct *psDD; - SAVE_STACK; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - /* Initialize delayed decision states */ - ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); - silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); - for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psDD->Seed = ( k + psIndices->Seed ) & 3; - psDD->SeedInit = psDD->Seed; - psDD->RD_Q10 = 0; - psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; - psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; - silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); - } - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - smpl_buf_idx = 0; /* index of oldest samples */ - - decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); - - /* For voiced frames limit the decision delay to lower than the pitch lag */ - if( psIndices->signalType == TYPE_VOICED ) { - for( k = 0; k < psEncC->nb_subfr; k++ ) { - decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); - } - } else { - if( lag > 0 ) { - decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); - } - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); - /* Set up pointers to start of sub frame */ - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - subfr = 0; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - if( k == 2 ) { - /* RESET DELAYED DECISIONS */ - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { - if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ i ].RD_Q10; - Winner_ind = i; - } - } - for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { - if( i != Winner_ind ) { - psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); - silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - last_smple_idx = smpl_buf_idx + decisionDelay; - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - - subfr = 0; - } - - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - NSQ->rewhite_flag = 1; - } - } - - silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, - psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); - - silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, - delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], - Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, - psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch ); - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ k ].RD_Q10; - Winner_ind = k; - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - psIndices->Seed = psDD->SeedInit; - last_smple_idx = smpl_buf_idx + decisionDelay; - Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); - - /* Update states */ - NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech signal */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; -#ifdef silk_short_prediction_create_arch_coef - opus_int32 a_Q12_arch[MAX_LPC_ORDER]; -#endif - - VARDECL( NSQ_sample_pair, psSampleState ); - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - SAVE_STACK; - - silk_assert( nStatesDelayedDecision > 0 ); - ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - -#ifdef silk_short_prediction_create_arch_coef - silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder); -#endif - - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q14 = 2; - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch); - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } - RESTORE_STACK; -} -#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */ - -static OPUS_INLINE void silk_nsq_del_dec_scale_states( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -) -{ - opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - NSQ_del_dec_struct *psDD; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - for( i = 0; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - - /* Scale scalar states */ - psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); - } - for( i = 0; i < DECISION_DELAY; i++ ) { - psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); - psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); - } - } - } -} diff --git a/thirdparty/opus/silk/PLC.c b/thirdparty/opus/silk/PLC.c deleted file mode 100644 index fb6ea887b7..0000000000 --- a/thirdparty/opus/silk/PLC.c +++ /dev/null @@ -1,446 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "PLC.h" - -#define NB_ATT 2 -static const opus_int16 HARM_ATT_Q15[NB_ATT] = { 32440, 31130 }; /* 0.99, 0.95 */ -static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT] = { 31130, 26214 }; /* 0.95, 0.8 */ -static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */ - -static OPUS_INLINE void silk_PLC_update( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl /* I/O Decoder control */ -); - -static OPUS_INLINE void silk_PLC_conceal( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* O LPC residual signal */ - int arch /* I Run-time architecture */ -); - - -void silk_PLC_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -) -{ - psDec->sPLC.pitchL_Q8 = silk_LSHIFT( psDec->frame_length, 8 - 1 ); - psDec->sPLC.prevGain_Q16[ 0 ] = SILK_FIX_CONST( 1, 16 ); - psDec->sPLC.prevGain_Q16[ 1 ] = SILK_FIX_CONST( 1, 16 ); - psDec->sPLC.subfr_length = 20; - psDec->sPLC.nb_subfr = 2; -} - -void silk_PLC( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O signal */ - opus_int lost, /* I Loss flag */ - int arch /* I Run-time architecture */ -) -{ - /* PLC control function */ - if( psDec->fs_kHz != psDec->sPLC.fs_kHz ) { - silk_PLC_Reset( psDec ); - psDec->sPLC.fs_kHz = psDec->fs_kHz; - } - - if( lost ) { - /****************************/ - /* Generate Signal */ - /****************************/ - silk_PLC_conceal( psDec, psDecCtrl, frame, arch ); - - psDec->lossCnt++; - } else { - /****************************/ - /* Update state */ - /****************************/ - silk_PLC_update( psDec, psDecCtrl ); - } -} - -/**************************************************/ -/* Update state of PLC */ -/**************************************************/ -static OPUS_INLINE void silk_PLC_update( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl /* I/O Decoder control */ -) -{ - opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14; - opus_int i, j; - silk_PLC_struct *psPLC; - - psPLC = &psDec->sPLC; - - /* Update parameters used in case of packet loss */ - psDec->prevSignalType = psDec->indices.signalType; - LTP_Gain_Q14 = 0; - if( psDec->indices.signalType == TYPE_VOICED ) { - /* Find the parameters for the last subframe which contains a pitch pulse */ - for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; j++ ) { - if( j == psDec->nb_subfr ) { - break; - } - temp_LTP_Gain_Q14 = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( psDec->nb_subfr - 1 - j ) * LTP_ORDER + i ]; - } - if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) { - LTP_Gain_Q14 = temp_LTP_Gain_Q14; - silk_memcpy( psPLC->LTPCoef_Q14, - &psDecCtrl->LTPCoef_Q14[ silk_SMULBB( psDec->nb_subfr - 1 - j, LTP_ORDER ) ], - LTP_ORDER * sizeof( opus_int16 ) ); - - psPLC->pitchL_Q8 = silk_LSHIFT( psDecCtrl->pitchL[ psDec->nb_subfr - 1 - j ], 8 ); - } - } - - silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) ); - psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14; - - /* Limit LT coefs */ - if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) { - opus_int scale_Q10; - opus_int32 tmp; - - tmp = silk_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 ); - scale_Q10 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) ); - for( i = 0; i < LTP_ORDER; i++ ) { - psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 ); - } - } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) { - opus_int scale_Q14; - opus_int32 tmp; - - tmp = silk_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 ); - scale_Q14 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) ); - for( i = 0; i < LTP_ORDER; i++ ) { - psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 ); - } - } - } else { - psPLC->pitchL_Q8 = silk_LSHIFT( silk_SMULBB( psDec->fs_kHz, 18 ), 8 ); - silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 )); - } - - /* Save LPC coeficients */ - silk_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); - psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14; - - /* Save last two gains */ - silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) ); - - psPLC->subfr_length = psDec->subfr_length; - psPLC->nb_subfr = psDec->nb_subfr; -} - -static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2, - const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr) -{ - int i, k; - VARDECL( opus_int16, exc_buf ); - opus_int16 *exc_buf_ptr; - SAVE_STACK; - ALLOC( exc_buf, 2*subfr_length, opus_int16 ); - /* Find random noise component */ - /* Scale previous excitation signal */ - exc_buf_ptr = exc_buf; - for( k = 0; k < 2; k++ ) { - for( i = 0; i < subfr_length; i++ ) { - exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( - silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) ); - } - exc_buf_ptr += subfr_length; - } - /* Find the subframe with lowest energy of the last two and use that as random noise generator */ - silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length ); - silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length ); - RESTORE_STACK; -} - -static OPUS_INLINE void silk_PLC_conceal( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* O LPC residual signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, j, k; - opus_int lag, idx, sLTP_buf_idx, shift1, shift2; - opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30; - opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; - opus_int32 LPC_pred_Q10, LTP_pred_Q12; - opus_int16 rand_scale_Q14; - opus_int16 *B_Q14; - opus_int32 *sLPC_Q14_ptr; - opus_int16 A_Q12[ MAX_LPC_ORDER ]; -#ifdef SMALL_FOOTPRINT - opus_int16 *sLTP; -#else - VARDECL( opus_int16, sLTP ); -#endif - VARDECL( opus_int32, sLTP_Q14 ); - silk_PLC_struct *psPLC = &psDec->sPLC; - opus_int32 prevGain_Q10[2]; - SAVE_STACK; - - ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); -#ifdef SMALL_FOOTPRINT - /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */ - sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length; -#else - ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); -#endif - - prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); - prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); - - if( psDec->first_frame_after_reset ) { - silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); - } - - silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr); - - if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { - /* First sub-frame has lowest energy */ - rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ]; - } else { - /* Second sub-frame has lowest energy */ - rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ]; - } - - /* Set up Gain to random noise component */ - B_Q14 = psPLC->LTPCoef_Q14; - rand_scale_Q14 = psPLC->randScale_Q14; - - /* Set up attenuation gains */ - harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; - if( psDec->prevSignalType == TYPE_VOICED ) { - rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; - } else { - rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; - } - - /* LPC concealment. Apply BWE to previous LPC */ - silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) ); - - /* Preload LPC coeficients to array on stack. Gives small performance gain */ - silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); - - /* First Lost frame */ - if( psDec->lossCnt == 0 ) { - rand_scale_Q14 = 1 << 14; - - /* Reduce random noise Gain for voiced frames */ - if( psDec->prevSignalType == TYPE_VOICED ) { - for( i = 0; i < LTP_ORDER; i++ ) { - rand_scale_Q14 -= B_Q14[ i ]; - } - rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */ - rand_scale_Q14 = (opus_int16)silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 ); - } else { - /* Reduce random noise for unvoiced frames with high LPC gain */ - opus_int32 invGain_Q30, down_scale_Q30; - - invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order ); - - down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 ); - down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 ); - down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES ); - - rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 ); - } - } - - rand_seed = psPLC->rand_seed; - lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); - sLTP_buf_idx = psDec->ltp_mem_length; - - /* Rewhiten LTP state */ - idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; - silk_assert( idx > 0 ); - silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order, arch ); - /* Scale LTP state */ - inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 ); - inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 ); - for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) { - sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] ); - } - - /***************************/ - /* LTP synthesis filtering */ - /***************************/ - for( k = 0; k < psDec->nb_subfr; k++ ) { - /* Set up pointer */ - pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - for( i = 0; i < psDec->subfr_length; i++ ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q12 = 2; - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); - LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); - pred_lag_ptr++; - - /* Generate LPC excitation */ - rand_seed = silk_RAND( rand_seed ); - idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK; - sLTP_Q14[ sLTP_buf_idx ] = silk_LSHIFT32( silk_SMLAWB( LTP_pred_Q12, rand_ptr[ idx ], rand_scale_Q14 ), 2 ); - sLTP_buf_idx++; - } - - /* Gradually reduce LTP gain */ - for( j = 0; j < LTP_ORDER; j++ ) { - B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 ); - } - /* Gradually reduce excitation gain */ - rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); - - /* Slowly increase pitch lag */ - psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 ); - psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) ); - lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); - } - - /***************************/ - /* LPC synthesis filtering */ - /***************************/ - sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ]; - - /* Copy LPC state */ - silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - - silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */ - for( i = 0; i < psDec->frame_length; i++ ) { - /* partly unrolled */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); - for( j = 10; j < psDec->LPC_order; j++ ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] ); - } - - /* Add prediction to LPC excitation */ - sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], - silk_LSHIFT_SAT32( LPC_pred_Q10, 4 )); - - /* Scale with Gain */ - frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) ); - } - - /* Save LPC state */ - silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); - - /**************************************/ - /* Update states */ - /**************************************/ - psPLC->rand_seed = rand_seed; - psPLC->randScale_Q14 = rand_scale_Q14; - for( i = 0; i < MAX_NB_SUBFR; i++ ) { - psDecCtrl->pitchL[ i ] = lag; - } - RESTORE_STACK; -} - -/* Glues concealed frames with new good received frames */ -void silk_PLC_glue_frames( - silk_decoder_state *psDec, /* I/O decoder state */ - opus_int16 frame[], /* I/O signal */ - opus_int length /* I length of signal */ -) -{ - opus_int i, energy_shift; - opus_int32 energy; - silk_PLC_struct *psPLC; - psPLC = &psDec->sPLC; - - if( psDec->lossCnt ) { - /* Calculate energy in concealed residual */ - silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length ); - - psPLC->last_frame_lost = 1; - } else { - if( psDec->sPLC.last_frame_lost ) { - /* Calculate residual in decoded signal if last frame was lost */ - silk_sum_sqr_shift( &energy, &energy_shift, frame, length ); - - /* Normalize energies */ - if( energy_shift > psPLC->conc_energy_shift ) { - psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift ); - } else if( energy_shift < psPLC->conc_energy_shift ) { - energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift ); - } - - /* Fade in the energy difference */ - if( energy > psPLC->conc_energy ) { - opus_int32 frac_Q24, LZ; - opus_int32 gain_Q16, slope_Q16; - - LZ = silk_CLZ32( psPLC->conc_energy ); - LZ = LZ - 1; - psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ ); - energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) ); - - frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) ); - - gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 ); - slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length ); - /* Make slope 4x steeper to avoid missing onsets after DTX */ - slope_Q16 = silk_LSHIFT( slope_Q16, 2 ); - - for( i = 0; i < length; i++ ) { - frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] ); - gain_Q16 += slope_Q16; - if( gain_Q16 > (opus_int32)1 << 16 ) { - break; - } - } - } - } - psPLC->last_frame_lost = 0; - } -} diff --git a/thirdparty/opus/silk/PLC.h b/thirdparty/opus/silk/PLC.h deleted file mode 100644 index 6438f51633..0000000000 --- a/thirdparty/opus/silk/PLC.h +++ /dev/null @@ -1,62 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_PLC_H -#define SILK_PLC_H - -#include "main.h" - -#define BWE_COEF 0.99 -#define V_PITCH_GAIN_START_MIN_Q14 11469 /* 0.7 in Q14 */ -#define V_PITCH_GAIN_START_MAX_Q14 15565 /* 0.95 in Q14 */ -#define MAX_PITCH_LAG_MS 18 -#define RAND_BUF_SIZE 128 -#define RAND_BUF_MASK ( RAND_BUF_SIZE - 1 ) -#define LOG2_INV_LPC_GAIN_HIGH_THRES 3 /* 2^3 = 8 dB LPC gain */ -#define LOG2_INV_LPC_GAIN_LOW_THRES 8 /* 2^8 = 24 dB LPC gain */ -#define PITCH_DRIFT_FAC_Q16 655 /* 0.01 in Q16 */ - -void silk_PLC_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -); - -void silk_PLC( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O signal */ - opus_int lost, /* I Loss flag */ - int arch /* I Run-time architecture */ -); - -void silk_PLC_glue_frames( - silk_decoder_state *psDec, /* I/O decoder state */ - opus_int16 frame[], /* I/O signal */ - opus_int length /* I length of signal */ -); - -#endif - diff --git a/thirdparty/opus/silk/SigProc_FIX.h b/thirdparty/opus/silk/SigProc_FIX.h deleted file mode 100644 index b63299441e..0000000000 --- a/thirdparty/opus/silk/SigProc_FIX.h +++ /dev/null @@ -1,615 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_H -#define SILK_SIGPROC_FIX_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */ - -#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */ - -#include <string.h> /* for memset(), memcpy(), memmove() */ -#include "typedef.h" -#include "resampler_structs.h" -#include "macros.h" -#include "cpu_support.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/SigProc_FIX_sse.h" -#endif - -/********************************************************************/ -/* SIGNAL PROCESSING FUNCTIONS */ -/********************************************************************/ - -/*! - * Initialize/reset the resampler state for a given pair of input/output sampling rates -*/ -opus_int silk_resampler_init( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ - opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ - opus_int forEnc /* I If 1: encoder; if 0: decoder */ -); - -/*! - * Resampler: convert from one sampling rate to another - */ -opus_int silk_resampler( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -); - -/*! -* Downsample 2x, mediocre quality -*/ -void silk_resampler_down2( - opus_int32 *S, /* I/O State vector [ 2 ] */ - opus_int16 *out, /* O Output signal [ len ] */ - const opus_int16 *in, /* I Input signal [ floor(len/2) ] */ - opus_int32 inLen /* I Number of input samples */ -); - -/*! - * Downsample by a factor 2/3, low quality -*/ -void silk_resampler_down2_3( - opus_int32 *S, /* I/O State vector [ 6 ] */ - opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ - const opus_int16 *in, /* I Input signal [ inLen ] */ - opus_int32 inLen /* I Number of input samples */ -); - -/*! - * second order ARMA filter; - * slower than biquad() but uses more precise coefficients - * can handle (slowly) varying coefficients - */ -void silk_biquad_alt( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len, /* I signal length (must be even) */ - opus_int stride /* I Operate on interleaved signal if > 1 */ -); - -/* Variable order MA prediction error filter. */ -void silk_LPC_analysis_filter( - opus_int16 *out, /* O Output signal */ - const opus_int16 *in, /* I Input signal */ - const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ - const opus_int32 len, /* I Signal length */ - const opus_int32 d, /* I Filter order */ - int arch /* I Run-time architecture */ -); - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander( - opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ -); - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander_32( - opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ -); - -/* Compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ - const opus_int order /* I Prediction order */ -); - -/* For input in Q24 domain */ -opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ - const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ - const opus_int order /* I Prediction order */ -); - -/* Split signal in two decimated bands using first-order allpass filters */ -void silk_ana_filt_bank_1( - const opus_int16 *in, /* I Input signal [N] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *outL, /* O Low band [N/2] */ - opus_int16 *outH, /* O High band [N/2] */ - const opus_int32 N /* I Number of input samples */ -); - -/********************************************************************/ -/* SCALAR FUNCTIONS */ -/********************************************************************/ - -/* Approximation of 128 * log2() (exact inverse of approx 2^() below) */ -/* Convert input to a log scale */ -opus_int32 silk_lin2log( - const opus_int32 inLin /* I input in linear scale */ -); - -/* Approximation of a sigmoid function */ -opus_int silk_sigm_Q15( - opus_int in_Q5 /* I */ -); - -/* Approximation of 2^() (exact inverse of approx log2() above) */ -/* Convert input to a linear scale */ -opus_int32 silk_log2lin( - const opus_int32 inLog_Q7 /* I input on log scale */ -); - -/* Compute number of bits to right shift the sum of squares of a vector */ -/* of int16s to make it fit in an int32 */ -void silk_sum_sqr_shift( - opus_int32 *energy, /* O Energy of x, after shifting to the right */ - opus_int *shift, /* O Number of bits right shift applied to energy */ - const opus_int16 *x, /* I Input vector */ - opus_int len /* I Length of input vector */ -); - -/* Calculates the reflection coefficients from the correlation sequence */ -/* Faster than schur64(), but much less accurate. */ -/* uses SMLAWB(), requiring armv5E and higher. */ -opus_int32 silk_schur( /* O Returns residual energy */ - opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ - const opus_int32 *c, /* I correlations [order+1] */ - const opus_int32 order /* I prediction order */ -); - -/* Calculates the reflection coefficients from the correlation sequence */ -/* Slower than schur(), but more accurate. */ -/* Uses SMULL(), available on armv4 */ -opus_int32 silk_schur64( /* O returns residual energy */ - opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ - const opus_int32 c[], /* I Correlations [order+1] */ - opus_int32 order /* I Prediction order */ -); - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ - const opus_int32 order /* I Prediction order */ -); - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a_Q16( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ - const opus_int32 order /* I Prediction order */ -); - -/* Apply sine window to signal vector. */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -/* every other sample of window is linearly interpolated, for speed */ -void silk_apply_sine_window( - opus_int16 px_win[], /* O Pointer to windowed signal */ - const opus_int16 px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -); - -/* Compute autocorrelation */ -void silk_autocorr( - opus_int32 *results, /* O Result (length correlationCount) */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *inputData, /* I Input data to correlate */ - const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount, /* I Number of correlation taps to compute */ - int arch /* I Run-time architecture */ -); - -void silk_decode_pitch( - opus_int16 lagIndex, /* I */ - opus_int8 contourIndex, /* O */ - opus_int pitch_lags[], /* O 4 pitch values */ - const opus_int Fs_kHz, /* I sampling frequency (kHz) */ - const opus_int nb_subfr /* I number of sub frames */ -); - -opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O 4 pitch lag values */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ - const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I Sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I number of 5 ms subframes */ - int arch /* I Run-time architecture */ -); - -/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ -/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ -void silk_A2NLSF( - opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ - opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ - const opus_int d /* I Filter order (must be even) */ -); - -/* compute whitening filter coefficients from normalized line spectral frequencies */ -void silk_NLSF2A( - opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ - const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ - const opus_int d /* I filter order (should be even) */ -); - -void silk_insertion_sort_increasing( - opus_int32 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -); - -void silk_insertion_sort_decreasing_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -); - -void silk_insertion_sort_increasing_all_values_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - const opus_int L /* I Vector length */ -); - -/* NLSF stabilizer, for a single input data vector */ -void silk_NLSF_stabilize( - opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ - const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ - const opus_int L /* I Number of NLSF parameters in the input vector */ -); - -/* Laroia low complexity NLSF weights */ -void silk_NLSF_VQ_weights_laroia( - opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ - const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ - const opus_int D /* I Input vector dimension (even) */ -); - -/* Compute reflection coefficients from input signal */ -void silk_burg_modified_c( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -); - -/* Copy and multiply a vector by a constant */ -void silk_scale_copy_vector16( - opus_int16 *data_out, - const opus_int16 *data_in, - opus_int32 gain_Q16, /* I Gain in Q16 */ - const opus_int dataSize /* I Length */ -); - -/* Some for the LTP related function requires Q26 to work.*/ -void silk_scale_vector32_Q26_lshift_18( - opus_int32 *data1, /* I/O Q0/Q18 */ - opus_int32 gain_Q26, /* I Q26 */ - opus_int dataSize /* I length */ -); - -/********************************************************************/ -/* INLINE ARM MATH */ -/********************************************************************/ - -/* return sum( inVec1[i] * inVec2[i] ) */ - -opus_int32 silk_inner_prod_aligned( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int len, /* I vector lengths */ - int arch /* I Run-time architecture */ -); - - -opus_int32 silk_inner_prod_aligned_scale( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int scale, /* I number of bits to shift */ - const opus_int len /* I vector lengths */ -); - -opus_int64 silk_inner_prod16_aligned_64_c( - const opus_int16 *inVec1, /* I input vector 1 */ - const opus_int16 *inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ -); - -/********************************************************************/ -/* MACROS */ -/********************************************************************/ - -/* Rotate a32 right by 'rot' bits. Negative rot values result in rotating - left. Output is 32bit int. - Note: contemporary compilers recognize the C expression below and - compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */ -static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) -{ - opus_uint32 x = (opus_uint32) a32; - opus_uint32 r = (opus_uint32) rot; - opus_uint32 m = (opus_uint32) -rot; - if( rot == 0 ) { - return a32; - } else if( rot < 0 ) { - return (opus_int32) ((x << m) | (x >> (32 - m))); - } else { - return (opus_int32) ((x << (32 - r)) | (x >> r)); - } -} - -/* Allocate opus_int16 aligned to 4-byte memory address */ -#if EMBEDDED_ARM -#define silk_DWORD_ALIGN __attribute__((aligned(4))) -#else -#define silk_DWORD_ALIGN -#endif - -/* Useful Macros that can be adjusted to other platforms */ -#define silk_memcpy(dest, src, size) memcpy((dest), (src), (size)) -#define silk_memset(dest, src, size) memset((dest), (src), (size)) -#define silk_memmove(dest, src, size) memmove((dest), (src), (size)) - -/* Fixed point macros */ - -/* (a32 * b32) output have to be 32bit int */ -#define silk_MUL(a32, b32) ((a32) * (b32)) - -/* (a32 * b32) output have to be 32bit uint */ -#define silk_MUL_uint(a32, b32) silk_MUL(a32, b32) - -/* a32 + (b32 * c32) output have to be 32bit int */ -#define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32))) - -/* a32 + (b32 * c32) output have to be 32bit uint */ -#define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32) - -/* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ -#define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16)) - -/* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ -#define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16)) - -#define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16))) - -/* (a32 * b32) */ -#define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32)) - -/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour - (just standard two's complement implementation-specific behaviour) */ -#define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b))) -/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour - (just standard two's complement implementation-specific behaviour) */ -#define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b))) - -/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ -#define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32)) -#define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))) - -#define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16))) -#define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32))) - -/* These macros enables checking for overflow in silk_API_Debug.h*/ -#define silk_ADD16(a, b) ((a) + (b)) -#define silk_ADD32(a, b) ((a) + (b)) -#define silk_ADD64(a, b) ((a) + (b)) - -#define silk_SUB16(a, b) ((a) - (b)) -#define silk_SUB32(a, b) ((a) - (b)) -#define silk_SUB64(a, b) ((a) - (b)) - -#define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \ - ((a) < silk_int8_MIN ? silk_int8_MIN : (a))) -#define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \ - ((a) < silk_int16_MIN ? silk_int16_MIN : (a))) -#define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \ - ((a) < silk_int32_MIN ? silk_int32_MIN : (a))) - -#define silk_CHECK_FIT8(a) (a) -#define silk_CHECK_FIT16(a) (a) -#define silk_CHECK_FIT32(a) (a) - -#define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) ) -#define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \ - ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \ - ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) ) - -#define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) ) -#define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \ - (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \ - ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) ) - -/* Saturation for positive input values */ -#define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a)) - -/* Add with saturation for positive input values */ -#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))) -#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))) - -#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */ -#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */ -#define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */ -#define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */ -#define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */ - -#define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */ -#define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */ -#define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */ -#define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */ -#define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */ - -/* saturates before shifting */ -#define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \ - silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) )) - -#define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */ -#define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */ -#define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */ - -#define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */ -#define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ -#define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */ -#define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */ -#define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ -#define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */ -#define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ -#define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ - -/* Requires that shift > 0 */ -#define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) -#define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) - -/* Number of rightshift required to fit the multiplication */ -#define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) ) -#define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) ) - - -#define silk_min(a, b) (((a) < (b)) ? (a) : (b)) -#define silk_max(a, b) (((a) > (b)) ? (a) : (b)) - -/* Macro to convert floating-point constants to fixed-point */ -#define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5)) - -/* silk_min() versions with typecast in the function call */ -static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) -{ - return (((a) < (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) -{ - return (((a) < (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) -{ - return (((a) < (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) -{ - return (((a) < (b)) ? (a) : (b)); -} - -/* silk_min() versions with typecast in the function call */ -static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) -{ - return (((a) > (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) -{ - return (((a) > (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) -{ - return (((a) > (b)) ? (a) : (b)); -} -static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) -{ - return (((a) > (b)) ? (a) : (b)); -} - -#define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ - : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))) - -#define silk_LIMIT_int silk_LIMIT -#define silk_LIMIT_16 silk_LIMIT -#define silk_LIMIT_32 silk_LIMIT - -#define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ -#define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1))) -#define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31)) -#define silk_abs_int64(a) (((a) > 0) ? (a) : -(a)) - -#define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )) - -/* PSEUDO-RANDOM GENERATOR */ -/* Make sure to store the result as the seed for the next call (also in between */ -/* frames), otherwise result won't be random at all. When only using some of the */ -/* bits, take the most significant bits by right-shifting. */ -#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165)) - -/* Add some multiplication functions that can be easily mapped to ARM. */ - -/* silk_SMMUL: Signed top word multiply. - ARMv6 2 instruction cycles. - ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/ -/*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/ -/* the following seems faster on x86 */ -#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) - -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) - -#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_c(inVec1, inVec2, len)) -#endif - -#include "Inlines.h" -#include "MacroCount.h" -#include "MacroDebug.h" - -#ifdef OPUS_ARM_INLINE_ASM -#include "arm/SigProc_FIX_armv4.h" -#endif - -#ifdef OPUS_ARM_INLINE_EDSP -#include "arm/SigProc_FIX_armv5e.h" -#endif - -#if defined(MIPSr1_ASM) -#include "mips/sigproc_fix_mipsr1.h" -#endif - - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_SIGPROC_FIX_H */ diff --git a/thirdparty/opus/silk/VAD.c b/thirdparty/opus/silk/VAD.c deleted file mode 100644 index 0a782af2f1..0000000000 --- a/thirdparty/opus/silk/VAD.c +++ /dev/null @@ -1,362 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/* Silk VAD noise level estimation */ -# if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE void silk_VAD_GetNoiseLevels( - const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -); -#endif - -/**********************************/ -/* Initialization of the Silk VAD */ -/**********************************/ -opus_int silk_VAD_Init( /* O Return value, 0 if success */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -) -{ - opus_int b, ret = 0; - - /* reset state memory */ - silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) ); - - /* init noise levels */ - /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */ - for( b = 0; b < VAD_N_BANDS; b++ ) { - psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 ); - } - - /* Initialize state */ - for( b = 0; b < VAD_N_BANDS; b++ ) { - psSilk_VAD->NL[ b ] = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] ); - psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] ); - } - psSilk_VAD->counter = 15; - - /* init smoothed energy-to-noise ratio*/ - for( b = 0; b < VAD_N_BANDS; b++ ) { - psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */ - } - - return( ret ); -} - -/* Weighting factors for tilt measure */ -static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 }; - -/***************************************/ -/* Get the speech activity level in Q8 */ -/***************************************/ -opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */ - silk_encoder_state *psEncC, /* I/O Encoder state */ - const opus_int16 pIn[] /* I PCM input */ -) -{ - opus_int SA_Q15, pSNR_dB_Q7, input_tilt; - opus_int decimated_framelength1, decimated_framelength2; - opus_int decimated_framelength; - opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; - opus_int32 sumSquared, smooth_coef_Q16; - opus_int16 HPstateTmp; - VARDECL( opus_int16, X ); - opus_int32 Xnrg[ VAD_N_BANDS ]; - opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; - opus_int32 speech_nrg, x_tmp; - opus_int X_offset[ VAD_N_BANDS ]; - opus_int ret = 0; - silk_VAD_state *psSilk_VAD = &psEncC->sVAD; - SAVE_STACK; - - /* Safety checks */ - silk_assert( VAD_N_BANDS == 4 ); - silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - silk_assert( psEncC->frame_length <= 512 ); - silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); - - /***********************/ - /* Filter and Decimate */ - /***********************/ - decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); - decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); - decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); - /* Decimate into 4 bands: - 0 L 3L L 3L 5L - - -- - -- -- - 8 8 2 4 4 - - [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | - - They're arranged to allow the minimal ( frame_length / 4 ) extra - scratch space during the downsampling process */ - X_offset[ 0 ] = 0; - X_offset[ 1 ] = decimated_framelength + decimated_framelength2; - X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; - X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; - ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); - - /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ - silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], - X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); - - /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], - X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); - - /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], - X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); - - /*********************************************/ - /* HP filter on lowest band (differentiator) */ - /*********************************************/ - X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); - HPstateTmp = X[ decimated_framelength - 1 ]; - for( i = decimated_framelength - 1; i > 0; i-- ) { - X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); - X[ i ] -= X[ i - 1 ]; - } - X[ 0 ] -= psSilk_VAD->HPstate; - psSilk_VAD->HPstate = HPstateTmp; - - /*************************************/ - /* Calculate the energy in each band */ - /*************************************/ - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Find the decimated framelength in the non-uniformly divided bands */ - decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); - - /* Split length into subframe lengths */ - dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); - dec_subframe_offset = 0; - - /* Compute energy per sub-frame */ - /* initialize with summed energy of last subframe */ - Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; - for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { - sumSquared = 0; - for( i = 0; i < dec_subframe_length; i++ ) { - /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ - /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ - x_tmp = silk_RSHIFT( - X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); - sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); - - /* Safety check */ - silk_assert( sumSquared >= 0 ); - } - - /* Add/saturate summed energy of current subframe */ - if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); - } else { - /* Look-ahead subframe */ - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); - } - - dec_subframe_offset += dec_subframe_length; - } - psSilk_VAD->XnrgSubfr[ b ] = sumSquared; - } - - /********************/ - /* Noise estimation */ - /********************/ - silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); - - /***********************************************/ - /* Signal-plus-noise to noise ratio estimation */ - /***********************************************/ - sumSquared = 0; - input_tilt = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; - if( speech_nrg > 0 ) { - /* Divide, with sufficient resolution */ - if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); - } else { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); - } - - /* Convert to log domain */ - SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; - - /* Sum-of-squares */ - sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ - - /* Tilt measure */ - if( speech_nrg < ( (opus_int32)1 << 20 ) ) { - /* Scale down SNR value for small subband speech energies */ - SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); - } - input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); - } else { - NrgToNoiseRatio_Q8[ b ] = 256; - } - } - - /* Mean-of-squares */ - sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ - - /* Root-mean-square approximation, scale to dBs, and write to output pointer */ - pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ - - /*********************************/ - /* Speech Probability Estimation */ - /*********************************/ - SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); - - /**************************/ - /* Frequency Tilt Measure */ - /**************************/ - psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); - - /**************************************************/ - /* Scale the sigmoid output based on power levels */ - /**************************************************/ - speech_nrg = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ - speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); - } - - /* Power scaling */ - if( speech_nrg <= 0 ) { - SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 32768 ) { - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); - } else { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); - } - - /* square-root */ - speech_nrg = silk_SQRT_APPROX( speech_nrg ); - SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); - } - - /* Copy the resulting speech activity in Q8 */ - psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); - - /***********************************/ - /* Energy Level and SNR estimation */ - /***********************************/ - /* Smoothing coefficient */ - smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); - - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - smooth_coef_Q16 >>= 1; - } - - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* compute smoothed energy-to-noise ratio per band */ - psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], - NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 ); - - /* signal to noise ratio in dB per band */ - SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 ); - /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ - psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); - } - - RESTORE_STACK; - return( ret ); -} - -/**************************/ -/* Noise level estimation */ -/**************************/ -# if !defined(OPUS_X86_MAY_HAVE_SSE4_1) -static OPUS_INLINE -#endif -void silk_VAD_GetNoiseLevels( - const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -) -{ - opus_int k; - opus_int32 nl, nrg, inv_nrg; - opus_int coef, min_coef; - - /* Initially faster smoothing */ - if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */ - min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 ); - } else { - min_coef = 0; - } - - for( k = 0; k < VAD_N_BANDS; k++ ) { - /* Get old noise level estimate for current band */ - nl = psSilk_VAD->NL[ k ]; - silk_assert( nl >= 0 ); - - /* Add bias */ - nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] ); - silk_assert( nrg > 0 ); - - /* Invert energies */ - inv_nrg = silk_DIV32( silk_int32_MAX, nrg ); - silk_assert( inv_nrg >= 0 ); - - /* Less update when subband energy is high */ - if( nrg > silk_LSHIFT( nl, 3 ) ) { - coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3; - } else if( nrg < nl ) { - coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16; - } else { - coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1 ); - } - - /* Initially faster smoothing */ - coef = silk_max_int( coef, min_coef ); - - /* Smooth inverse energies */ - psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef ); - silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 ); - - /* Compute noise level by inverting again */ - nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] ); - silk_assert( nl >= 0 ); - - /* Limit noise levels (guarantee 7 bits of head room) */ - nl = silk_min( nl, 0x00FFFFFF ); - - /* Store as part of state */ - psSilk_VAD->NL[ k ] = nl; - } - - /* Increment frame counter */ - psSilk_VAD->counter++; -} diff --git a/thirdparty/opus/silk/VQ_WMat_EC.c b/thirdparty/opus/silk/VQ_WMat_EC.c deleted file mode 100644 index 7983f1db80..0000000000 --- a/thirdparty/opus/silk/VQ_WMat_EC.c +++ /dev/null @@ -1,120 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC_c( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -) -{ - opus_int k, gain_tmp_Q7; - const opus_int8 *cb_row_Q7; - opus_int16 diff_Q14[ 5 ]; - opus_int32 sum1_Q14, sum2_Q16; - - /* Loop over codebook */ - *rate_dist_Q14 = silk_int32_MAX; - cb_row_Q7 = cb_Q7; - for( k = 0; k < L; k++ ) { - gain_tmp_Q7 = cb_gain_Q7[k]; - - diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); - diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); - diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); - diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); - diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); - - /* Weighted rate */ - sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); - - /* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); - - silk_assert( sum1_Q14 >= 0 ); - - /* first row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); - - /* second row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); - - /* third row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); - - /* fourth row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); - - /* last row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); - - silk_assert( sum1_Q14 >= 0 ); - - /* find best */ - if( sum1_Q14 < *rate_dist_Q14 ) { - *rate_dist_Q14 = sum1_Q14; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; - } - - /* Go to next cbk vector */ - cb_row_Q7 += LTP_ORDER; - } -} diff --git a/thirdparty/opus/silk/ana_filt_bank_1.c b/thirdparty/opus/silk/ana_filt_bank_1.c deleted file mode 100644 index 24cfb03fdb..0000000000 --- a/thirdparty/opus/silk/ana_filt_bank_1.c +++ /dev/null @@ -1,74 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Coefficients for 2-band filter bank based on first-order allpass filters */ -static opus_int16 A_fb1_20 = 5394 << 1; -static opus_int16 A_fb1_21 = -24290; /* (opus_int16)(20623 << 1) */ - -/* Split signal into two decimated bands using first-order allpass filters */ -void silk_ana_filt_bank_1( - const opus_int16 *in, /* I Input signal [N] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *outL, /* O Low band [N/2] */ - opus_int16 *outH, /* O High band [N/2] */ - const opus_int32 N /* I Number of input samples */ -) -{ - opus_int k, N2 = silk_RSHIFT( N, 1 ); - opus_int32 in32, X, Y, out_1, out_2; - - /* Internal variables and state are in Q10 format */ - for( k = 0; k < N2; k++ ) { - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); - - /* All-pass section for even input sample */ - Y = silk_SUB32( in32, S[ 0 ] ); - X = silk_SMLAWB( Y, Y, A_fb1_21 ); - out_1 = silk_ADD32( S[ 0 ], X ); - S[ 0 ] = silk_ADD32( in32, X ); - - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); - - /* All-pass section for odd input sample, and add to output of previous section */ - Y = silk_SUB32( in32, S[ 1 ] ); - X = silk_SMULWB( Y, A_fb1_20 ); - out_2 = silk_ADD32( S[ 1 ], X ); - S[ 1 ] = silk_ADD32( in32, X ); - - /* Add/subtract, convert back to int16 and store to output */ - outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) ); - outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) ); - } -} diff --git a/thirdparty/opus/silk/arm/NSQ_neon.c b/thirdparty/opus/silk/arm/NSQ_neon.c deleted file mode 100644 index 9642529973..0000000000 --- a/thirdparty/opus/silk/arm/NSQ_neon.c +++ /dev/null @@ -1,112 +0,0 @@ -/*********************************************************************** -Copyright (C) 2014 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <arm_neon.h> -#include "main.h" -#include "stack_alloc.h" -#include "NSQ.h" -#include "celt/cpu_support.h" -#include "celt/arm/armcpu.h" - -opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order) -{ - int32x4_t coef0 = vld1q_s32(coef32); - int32x4_t coef1 = vld1q_s32(coef32 + 4); - int32x4_t coef2 = vld1q_s32(coef32 + 8); - int32x4_t coef3 = vld1q_s32(coef32 + 12); - - int32x4_t a0 = vld1q_s32(buf32 - 15); - int32x4_t a1 = vld1q_s32(buf32 - 11); - int32x4_t a2 = vld1q_s32(buf32 - 7); - int32x4_t a3 = vld1q_s32(buf32 - 3); - - int32x4_t b0 = vqdmulhq_s32(coef0, a0); - int32x4_t b1 = vqdmulhq_s32(coef1, a1); - int32x4_t b2 = vqdmulhq_s32(coef2, a2); - int32x4_t b3 = vqdmulhq_s32(coef3, a3); - - int32x4_t c0 = vaddq_s32(b0, b1); - int32x4_t c1 = vaddq_s32(b2, b3); - - int32x4_t d = vaddq_s32(c0, c1); - - int64x2_t e = vpaddlq_s32(d); - - int64x1_t f = vadd_s64(vget_low_s64(e), vget_high_s64(e)); - - opus_int32 out = vget_lane_s32(vreinterpret_s32_s64(f), 0); - - out += silk_RSHIFT( order, 1 ); - - return out; -} - - -opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order) -{ - opus_int32 out; - if (order == 8) - { - int32x4_t a00 = vdupq_n_s32(data0[0]); - int32x4_t a01 = vld1q_s32(data1); /* data1[0] ... [3] */ - - int32x4_t a0 = vextq_s32 (a00, a01, 3); /* data0[0] data1[0] ...[2] */ - int32x4_t a1 = vld1q_s32(data1 + 3); /* data1[3] ... [6] */ - - /*TODO: Convert these once in advance instead of once per sample, like - silk_noise_shape_quantizer_short_prediction_neon() does.*/ - int16x8_t coef16 = vld1q_s16(coef); - int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16)); - int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16)); - - /*This is not bit-exact with the C version, since we do not drop the - lower 16 bits of each multiply, but wait until the end to truncate - precision. This is an encoder-specific calculation (and unlike - silk_noise_shape_quantizer_short_prediction_neon(), is not meant to - simulate what the decoder will do). We still could use vqdmulhq_s32() - like silk_noise_shape_quantizer_short_prediction_neon() and save - half the multiplies, but the speed difference is not large, since we - then need two extra adds.*/ - int64x2_t b0 = vmull_s32(vget_low_s32(a0), vget_low_s32(coef0)); - int64x2_t b1 = vmlal_s32(b0, vget_high_s32(a0), vget_high_s32(coef0)); - int64x2_t b2 = vmlal_s32(b1, vget_low_s32(a1), vget_low_s32(coef1)); - int64x2_t b3 = vmlal_s32(b2, vget_high_s32(a1), vget_high_s32(coef1)); - - int64x1_t c = vadd_s64(vget_low_s64(b3), vget_high_s64(b3)); - int64x1_t cS = vrshr_n_s64(c, 15); - int32x2_t d = vreinterpret_s32_s64(cS); - - out = vget_lane_s32(d, 0); - vst1q_s32(data1, a0); - vst1q_s32(data1 + 4, a1); - return out; - } - return silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order); -} diff --git a/thirdparty/opus/silk/arm/NSQ_neon.h b/thirdparty/opus/silk/arm/NSQ_neon.h deleted file mode 100644 index 77c946af85..0000000000 --- a/thirdparty/opus/silk/arm/NSQ_neon.h +++ /dev/null @@ -1,113 +0,0 @@ -/*********************************************************************** -Copyright (C) 2014 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef SILK_NSQ_NEON_H -#define SILK_NSQ_NEON_H - -#include "cpu_support.h" - -#undef silk_short_prediction_create_arch_coef -/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */ -static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order) -{ - out[15] = in[0] << 15; - out[14] = in[1] << 15; - out[13] = in[2] << 15; - out[12] = in[3] << 15; - out[11] = in[4] << 15; - out[10] = in[5] << 15; - out[9] = in[6] << 15; - out[8] = in[7] << 15; - out[7] = in[8] << 15; - out[6] = in[9] << 15; - - if (order == 16) - { - out[5] = in[10] << 15; - out[4] = in[11] << 15; - out[3] = in[12] << 15; - out[2] = in[13] << 15; - out[1] = in[14] << 15; - out[0] = in[15] << 15; - } - else - { - out[5] = 0; - out[4] = 0; - out[3] = 0; - out[2] = 0; - out[1] = 0; - out[0] = 0; - } -} - -#if defined(OPUS_ARM_PRESUME_NEON_INTR) - -#define silk_short_prediction_create_arch_coef(out, in, order) \ - (silk_short_prediction_create_arch_coef_neon(out, in, order)) - -#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - -#define silk_short_prediction_create_arch_coef(out, in, order) \ - do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0) - -#endif - -opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order); - -opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order); - -#if defined(OPUS_ARM_PRESUME_NEON_INTR) -#undef silk_noise_shape_quantizer_short_prediction -#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \ - ((void)arch,silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order)) - -#undef silk_NSQ_noise_shape_feedback_loop -#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_neon(data0, data1, coef, order)) - -#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR) - -/* silk_noise_shape_quantizer_short_prediction implementations take different parameters based on arch - (coef vs. coefRev) so can't use the usual IMPL table implementation */ -#undef silk_noise_shape_quantizer_short_prediction -#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \ - (arch == OPUS_ARCH_ARM_NEON ? \ - silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \ - silk_noise_shape_quantizer_short_prediction_c(in, coef, order)) - -extern opus_int32 - (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])( - const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, - opus_int order); - -#undef silk_NSQ_noise_shape_feedback_loop -#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) \ - (SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[(arch)&OPUS_ARCHMASK](data0, data1, \ - coef, order)) - -#endif - -#endif /* SILK_NSQ_NEON_H */ diff --git a/thirdparty/opus/silk/arm/SigProc_FIX_armv4.h b/thirdparty/opus/silk/arm/SigProc_FIX_armv4.h deleted file mode 100644 index ff62b1e5d6..0000000000 --- a/thirdparty/opus/silk/arm/SigProc_FIX_armv4.h +++ /dev/null @@ -1,47 +0,0 @@ -/*********************************************************************** -Copyright (C) 2013 Xiph.Org Foundation and contributors -Copyright (c) 2013 Parrot -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_ARMv4_H -#define SILK_SIGPROC_FIX_ARMv4_H - -#undef silk_MLA -static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - opus_int32 res; - __asm__( - "#silk_MLA\n\t" - "mla %0, %1, %2, %3\n\t" - : "=&r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c)) - -#endif diff --git a/thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h b/thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h deleted file mode 100644 index 617a09cab1..0000000000 --- a/thirdparty/opus/silk/arm/SigProc_FIX_armv5e.h +++ /dev/null @@ -1,61 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Copyright (c) 2013 Parrot -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_ARMv5E_H -#define SILK_SIGPROC_FIX_ARMv5E_H - -#undef silk_SMULTT -static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) -{ - opus_int32 res; - __asm__( - "#silk_SMULTT\n\t" - "smultt %0, %1, %2\n\t" - : "=r"(res) - : "%r"(a), "r"(b) - ); - return res; -} -#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b)) - -#undef silk_SMLATT -static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - opus_int32 res; - __asm__( - "#silk_SMLATT\n\t" - "smlatt %0, %1, %2, %3\n\t" - : "=r"(res) - : "%r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c)) - -#endif diff --git a/thirdparty/opus/silk/arm/arm_silk_map.c b/thirdparty/opus/silk/arm/arm_silk_map.c deleted file mode 100644 index 9bd86a7b21..0000000000 --- a/thirdparty/opus/silk/arm/arm_silk_map.c +++ /dev/null @@ -1,55 +0,0 @@ -/*********************************************************************** -Copyright (C) 2014 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include "NSQ.h" - -#if defined(OPUS_HAVE_RTCD) - -# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \ - !defined(OPUS_ARM_PRESUME_NEON_INTR)) - -/*There is no table for silk_noise_shape_quantizer_short_prediction because the - NEON version takes different parameters than the C version. - Instead RTCD is done via if statements at the call sites. - See NSQ_neon.h for details.*/ - -opus_int32 - (*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])( - const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, - opus_int order) = { - silk_NSQ_noise_shape_feedback_loop_c, /* ARMv4 */ - silk_NSQ_noise_shape_feedback_loop_c, /* EDSP */ - silk_NSQ_noise_shape_feedback_loop_c, /* Media */ - silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */ -}; - -# endif - -#endif /* OPUS_HAVE_RTCD */ diff --git a/thirdparty/opus/silk/arm/macros_arm64.h b/thirdparty/opus/silk/arm/macros_arm64.h deleted file mode 100644 index ed030413c5..0000000000 --- a/thirdparty/opus/silk/arm/macros_arm64.h +++ /dev/null @@ -1,39 +0,0 @@ -/*********************************************************************** -Copyright (C) 2015 Vidyo -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_ARM64_H -#define SILK_MACROS_ARM64_H - -#include <arm_neon.h> - -#undef silk_ADD_SAT32 -#define silk_ADD_SAT32(a, b) (vqadds_s32((a), (b))) - -#undef silk_SUB_SAT32 -#define silk_SUB_SAT32(a, b) (vqsubs_s32((a), (b))) - -#endif /* SILK_MACROS_ARM64_H */ diff --git a/thirdparty/opus/silk/arm/macros_armv4.h b/thirdparty/opus/silk/arm/macros_armv4.h deleted file mode 100644 index 3f30e97288..0000000000 --- a/thirdparty/opus/silk/arm/macros_armv4.h +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (C) 2013 Xiph.Org Foundation and contributors. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_ARMv4_H -#define SILK_MACROS_ARMv4_H - -/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ -#undef silk_SMULWB -static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMULWB\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(b<<16) - ); - return rd_hi; -} -#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b)) - -/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ -#undef silk_SMLAWB -#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c)) - -/* (a32 * (b32 >> 16)) >> 16 */ -#undef silk_SMULWT -static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMULWT\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(b&~0xFFFF) - ); - return rd_hi; -} -#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b)) - -/* a32 + (b32 * (c32 >> 16)) >> 16 */ -#undef silk_SMLAWT -#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c)) - -/* (a32 * b32) >> 16 */ -#undef silk_SMULWW -static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMULWW\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(a), "r"(b) - ); - return (rd_hi<<16)+(rd_lo>>16); -} -#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) - -#undef silk_SMLAWW -static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - unsigned rd_lo; - int rd_hi; - __asm__( - "#silk_SMLAWW\n\t" - "smull %0, %1, %2, %3\n\t" - : "=&r"(rd_lo), "=&r"(rd_hi) - : "%r"(b), "r"(c) - ); - return a+(rd_hi<<16)+(rd_lo>>16); -} -#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) - -#endif /* SILK_MACROS_ARMv4_H */ diff --git a/thirdparty/opus/silk/arm/macros_armv5e.h b/thirdparty/opus/silk/arm/macros_armv5e.h deleted file mode 100644 index aad4117e46..0000000000 --- a/thirdparty/opus/silk/arm/macros_armv5e.h +++ /dev/null @@ -1,213 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Copyright (c) 2013 Parrot -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_ARMv5E_H -#define SILK_MACROS_ARMv5E_H - -/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ -#undef silk_SMULWB -static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) -{ - int res; - __asm__( - "#silk_SMULWB\n\t" - "smulwb %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b)) - -/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ -#undef silk_SMLAWB -static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b, - opus_int16 c) -{ - int res; - __asm__( - "#silk_SMLAWB\n\t" - "smlawb %0, %1, %2, %3\n\t" - : "=r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c)) - -/* (a32 * (b32 >> 16)) >> 16 */ -#undef silk_SMULWT -static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SMULWT\n\t" - "smulwt %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b)) - -/* a32 + (b32 * (c32 >> 16)) >> 16 */ -#undef silk_SMLAWT -static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - int res; - __asm__( - "#silk_SMLAWT\n\t" - "smlawt %0, %1, %2, %3\n\t" - : "=r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c)) - -/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ -#undef silk_SMULBB -static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SMULBB\n\t" - "smulbb %0, %1, %2\n\t" - : "=r"(res) - : "%r"(a), "r"(b) - ); - return res; -} -#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b)) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ -#undef silk_SMLABB -static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - int res; - __asm__( - "#silk_SMLABB\n\t" - "smlabb %0, %1, %2, %3\n\t" - : "=r"(res) - : "%r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c)) - -/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ -#undef silk_SMULBT -static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SMULBT\n\t" - "smulbt %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b)) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ -#undef silk_SMLABT -static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b, - opus_int32 c) -{ - int res; - __asm__( - "#silk_SMLABT\n\t" - "smlabt %0, %1, %2, %3\n\t" - : "=r"(res) - : "r"(b), "r"(c), "r"(a) - ); - return res; -} -#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c)) - -/* add/subtract with output saturated */ -#undef silk_ADD_SAT32 -static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_ADD_SAT32\n\t" - "qadd %0, %1, %2\n\t" - : "=r"(res) - : "%r"(a), "r"(b) - ); - return res; -} -#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b)) - -#undef silk_SUB_SAT32 -static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b) -{ - int res; - __asm__( - "#silk_SUB_SAT32\n\t" - "qsub %0, %1, %2\n\t" - : "=r"(res) - : "r"(a), "r"(b) - ); - return res; -} -#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b)) - -#undef silk_CLZ16 -static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16) -{ - int res; - __asm__( - "#silk_CLZ16\n\t" - "clz %0, %1;\n" - : "=r"(res) - : "r"(in16<<16|0x8000) - ); - return res; -} -#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16)) - -#undef silk_CLZ32 -static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32) -{ - int res; - __asm__( - "#silk_CLZ32\n\t" - "clz %0, %1\n\t" - : "=r"(res) - : "r"(in32) - ); - return res; -} -#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32)) - -#endif /* SILK_MACROS_ARMv5E_H */ diff --git a/thirdparty/opus/silk/biquad_alt.c b/thirdparty/opus/silk/biquad_alt.c deleted file mode 100644 index d55f5ee92e..0000000000 --- a/thirdparty/opus/silk/biquad_alt.c +++ /dev/null @@ -1,78 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -/* * - * silk_biquad_alt.c * - * * - * Second order ARMA filter * - * Can handle slowly varying filter coefficients * - * */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Second order ARMA filter, alternative implementation */ -void silk_biquad_alt( - const opus_int16 *in, /* I input signal */ - const opus_int32 *B_Q28, /* I MA coefficients [3] */ - const opus_int32 *A_Q28, /* I AR coefficients [2] */ - opus_int32 *S, /* I/O State vector [2] */ - opus_int16 *out, /* O output signal */ - const opus_int32 len, /* I signal length (must be even) */ - opus_int stride /* I Operate on interleaved signal if > 1 */ -) -{ - /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ - opus_int k; - opus_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14; - - /* Negate A_Q28 values and split in two parts */ - A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */ - A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */ - A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */ - A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */ - - for( k = 0; k < len; k++ ) { - /* S[ 0 ], S[ 1 ]: Q12 */ - inval = in[ k * stride ]; - out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 ); - - S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 ); - S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 ); - S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval); - - S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A1_L_Q28 ), 14 ); - S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 ); - S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval ); - - /* Scale back to Q0 and saturate */ - out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); - } -} diff --git a/thirdparty/opus/silk/bwexpander.c b/thirdparty/opus/silk/bwexpander.c deleted file mode 100644 index 2eb4456695..0000000000 --- a/thirdparty/opus/silk/bwexpander.c +++ /dev/null @@ -1,51 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander( - opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ -) -{ - opus_int i; - opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536; - - /* NB: Dont use silk_SMULWB, instead of silk_RSHIFT_ROUND( silk_MUL(), 16 ), below. */ - /* Bias in silk_SMULWB can lead to unstable filters */ - for( i = 0; i < d - 1; i++ ) { - ar[ i ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ] ), 16 ); - chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); - } - ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 ); -} diff --git a/thirdparty/opus/silk/bwexpander_32.c b/thirdparty/opus/silk/bwexpander_32.c deleted file mode 100644 index d0010f73df..0000000000 --- a/thirdparty/opus/silk/bwexpander_32.c +++ /dev/null @@ -1,50 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Chirp (bandwidth expand) LP AR filter */ -void silk_bwexpander_32( - opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I Length of ar */ - opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ -) -{ - opus_int i; - opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536; - - for( i = 0; i < d - 1; i++ ) { - ar[ i ] = silk_SMULWW( chirp_Q16, ar[ i ] ); - chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); - } - ar[ d - 1 ] = silk_SMULWW( chirp_Q16, ar[ d - 1 ] ); -} - diff --git a/thirdparty/opus/silk/check_control_input.c b/thirdparty/opus/silk/check_control_input.c deleted file mode 100644 index b5de9ce48d..0000000000 --- a/thirdparty/opus/silk/check_control_input.c +++ /dev/null @@ -1,106 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "control.h" -#include "errors.h" - -/* Check encoder control struct */ -opus_int check_control_input( - silk_EncControlStruct *encControl /* I Control structure */ -) -{ - silk_assert( encControl != NULL ); - - if( ( ( encControl->API_sampleRate != 8000 ) && - ( encControl->API_sampleRate != 12000 ) && - ( encControl->API_sampleRate != 16000 ) && - ( encControl->API_sampleRate != 24000 ) && - ( encControl->API_sampleRate != 32000 ) && - ( encControl->API_sampleRate != 44100 ) && - ( encControl->API_sampleRate != 48000 ) ) || - ( ( encControl->desiredInternalSampleRate != 8000 ) && - ( encControl->desiredInternalSampleRate != 12000 ) && - ( encControl->desiredInternalSampleRate != 16000 ) ) || - ( ( encControl->maxInternalSampleRate != 8000 ) && - ( encControl->maxInternalSampleRate != 12000 ) && - ( encControl->maxInternalSampleRate != 16000 ) ) || - ( ( encControl->minInternalSampleRate != 8000 ) && - ( encControl->minInternalSampleRate != 12000 ) && - ( encControl->minInternalSampleRate != 16000 ) ) || - ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) || - ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) || - ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) { - silk_assert( 0 ); - return SILK_ENC_FS_NOT_SUPPORTED; - } - if( encControl->payloadSize_ms != 10 && - encControl->payloadSize_ms != 20 && - encControl->payloadSize_ms != 40 && - encControl->payloadSize_ms != 60 ) { - silk_assert( 0 ); - return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; - } - if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_LOSS_RATE; - } - if( encControl->useDTX < 0 || encControl->useDTX > 1 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_DTX_SETTING; - } - if( encControl->useCBR < 0 || encControl->useCBR > 1 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_CBR_SETTING; - } - if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_INBAND_FEC_SETTING; - } - if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; - } - if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; - } - if( encControl->nChannelsInternal > encControl->nChannelsAPI ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; - } - if( encControl->complexity < 0 || encControl->complexity > 10 ) { - silk_assert( 0 ); - return SILK_ENC_INVALID_COMPLEXITY_SETTING; - } - - return SILK_NO_ERROR; -} diff --git a/thirdparty/opus/silk/code_signs.c b/thirdparty/opus/silk/code_signs.c deleted file mode 100644 index dfd1dca9a1..0000000000 --- a/thirdparty/opus/silk/code_signs.c +++ /dev/null @@ -1,115 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/*#define silk_enc_map(a) ((a) > 0 ? 1 : 0)*/ -/*#define silk_dec_map(a) ((a) > 0 ? 1 : -1)*/ -/* shifting avoids if-statement */ -#define silk_enc_map(a) ( silk_RSHIFT( (a), 15 ) + 1 ) -#define silk_dec_map(a) ( silk_LSHIFT( (a), 1 ) - 1 ) - -/* Encodes signs of excitation */ -void silk_encode_signs( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - const opus_int8 pulses[], /* I pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -) -{ - opus_int i, j, p; - opus_uint8 icdf[ 2 ]; - const opus_int8 *q_ptr; - const opus_uint8 *icdf_ptr; - - icdf[ 1 ] = 0; - q_ptr = pulses; - i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) ); - icdf_ptr = &silk_sign_iCDF[ i ]; - length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH ); - for( i = 0; i < length; i++ ) { - p = sum_pulses[ i ]; - if( p > 0 ) { - icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ]; - for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) { - if( q_ptr[ j ] != 0 ) { - ec_enc_icdf( psRangeEnc, silk_enc_map( q_ptr[ j ]), icdf, 8 ); - } - } - } - q_ptr += SHELL_CODEC_FRAME_LENGTH; - } -} - -/* Decodes signs of excitation */ -void silk_decode_signs( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* I/O pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -) -{ - opus_int i, j, p; - opus_uint8 icdf[ 2 ]; - opus_int16 *q_ptr; - const opus_uint8 *icdf_ptr; - - icdf[ 1 ] = 0; - q_ptr = pulses; - i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) ); - icdf_ptr = &silk_sign_iCDF[ i ]; - length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH ); - for( i = 0; i < length; i++ ) { - p = sum_pulses[ i ]; - if( p > 0 ) { - icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ]; - for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) { - if( q_ptr[ j ] > 0 ) { - /* attach sign */ -#if 0 - /* conditional implementation */ - if( ec_dec_icdf( psRangeDec, icdf, 8 ) == 0 ) { - q_ptr[ j ] = -q_ptr[ j ]; - } -#else - /* implementation with shift, subtraction, multiplication */ - q_ptr[ j ] *= silk_dec_map( ec_dec_icdf( psRangeDec, icdf, 8 ) ); -#endif - } - } - } - q_ptr += SHELL_CODEC_FRAME_LENGTH; - } -} diff --git a/thirdparty/opus/silk/control.h b/thirdparty/opus/silk/control.h deleted file mode 100644 index 747e5426a0..0000000000 --- a/thirdparty/opus/silk/control.h +++ /dev/null @@ -1,142 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_CONTROL_H -#define SILK_CONTROL_H - -#include "typedef.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Decoder API flags */ -#define FLAG_DECODE_NORMAL 0 -#define FLAG_PACKET_LOST 1 -#define FLAG_DECODE_LBRR 2 - -/***********************************************/ -/* Structure for controlling encoder operation */ -/***********************************************/ -typedef struct { - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsAPI; - - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsInternal; - - /* I: Input signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */ - opus_int32 API_sampleRate; - - /* I: Maximum internal sampling rate in Hertz; 8000/12000/16000 */ - opus_int32 maxInternalSampleRate; - - /* I: Minimum internal sampling rate in Hertz; 8000/12000/16000 */ - opus_int32 minInternalSampleRate; - - /* I: Soft request for internal sampling rate in Hertz; 8000/12000/16000 */ - opus_int32 desiredInternalSampleRate; - - /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ - opus_int payloadSize_ms; - - /* I: Bitrate during active speech in bits/second; internally limited */ - opus_int32 bitRate; - - /* I: Uplink packet loss in percent (0-100) */ - opus_int packetLossPercentage; - - /* I: Complexity mode; 0 is lowest, 10 is highest complexity */ - opus_int complexity; - - /* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */ - opus_int useInBandFEC; - - /* I: Flag to enable discontinuous transmission (DTX); 0/1 */ - opus_int useDTX; - - /* I: Flag to use constant bitrate */ - opus_int useCBR; - - /* I: Maximum number of bits allowed for the frame */ - opus_int maxBits; - - /* I: Causes a smooth downmix to mono */ - opus_int toMono; - - /* I: Opus encoder is allowing us to switch bandwidth */ - opus_int opusCanSwitch; - - /* I: Make frames as independent as possible (but still use LPC) */ - opus_int reducedDependency; - - /* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */ - opus_int32 internalSampleRate; - - /* O: Flag that bandwidth switching is allowed (because low voice activity) */ - opus_int allowBandwidthSwitch; - - /* O: Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */ - opus_int inWBmodeWithoutVariableLP; - - /* O: Stereo width */ - opus_int stereoWidth_Q14; - - /* O: Tells the Opus encoder we're ready to switch */ - opus_int switchReady; - -} silk_EncControlStruct; - -/**************************************************************************/ -/* Structure for controlling decoder operation and reading decoder status */ -/**************************************************************************/ -typedef struct { - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsAPI; - - /* I: Number of channels; 1/2 */ - opus_int32 nChannelsInternal; - - /* I: Output signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */ - opus_int32 API_sampleRate; - - /* I: Internal sampling rate used, in Hertz; 8000/12000/16000 */ - opus_int32 internalSampleRate; - - /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ - opus_int payloadSize_ms; - - /* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */ - opus_int prevPitchLag; -} silk_DecControlStruct; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/control_SNR.c b/thirdparty/opus/silk/control_SNR.c deleted file mode 100644 index cee87eb0d8..0000000000 --- a/thirdparty/opus/silk/control_SNR.c +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "tuning_parameters.h" - -/* Control SNR of redidual quantizer */ -opus_int silk_control_SNR( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ -) -{ - opus_int k, ret = SILK_NO_ERROR; - opus_int32 frac_Q6; - const opus_int32 *rateTable; - - /* Set bitrate/coding quality */ - TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS ); - if( TargetRate_bps != psEncC->TargetRate_bps ) { - psEncC->TargetRate_bps = TargetRate_bps; - - /* If new TargetRate_bps, translate to SNR_dB value */ - if( psEncC->fs_kHz == 8 ) { - rateTable = silk_TargetRate_table_NB; - } else if( psEncC->fs_kHz == 12 ) { - rateTable = silk_TargetRate_table_MB; - } else { - rateTable = silk_TargetRate_table_WB; - } - - /* Reduce bitrate for 10 ms modes in these calculations */ - if( psEncC->nb_subfr == 2 ) { - TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS; - } - - /* Find bitrate interval in table and interpolate */ - for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) { - if( TargetRate_bps <= rateTable[ k ] ) { - frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), - rateTable[ k ] - rateTable[ k - 1 ] ); - psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] ); - break; - } - } - } - - return ret; -} diff --git a/thirdparty/opus/silk/control_audio_bandwidth.c b/thirdparty/opus/silk/control_audio_bandwidth.c deleted file mode 100644 index 4f9bc5cbda..0000000000 --- a/thirdparty/opus/silk/control_audio_bandwidth.c +++ /dev/null @@ -1,126 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "tuning_parameters.h" - -/* Control internal sampling rate */ -opus_int silk_control_audio_bandwidth( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl /* I Control structure */ -) -{ - opus_int fs_kHz; - opus_int32 fs_Hz; - - fs_kHz = psEncC->fs_kHz; - fs_Hz = silk_SMULBB( fs_kHz, 1000 ); - if( fs_Hz == 0 ) { - /* Encoder has just been initialized */ - fs_Hz = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz ); - fs_kHz = silk_DIV32_16( fs_Hz, 1000 ); - } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) { - /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */ - fs_Hz = psEncC->API_fs_Hz; - fs_Hz = silk_min( fs_Hz, psEncC->maxInternal_fs_Hz ); - fs_Hz = silk_max( fs_Hz, psEncC->minInternal_fs_Hz ); - fs_kHz = silk_DIV32_16( fs_Hz, 1000 ); - } else { - /* State machine for the internal sampling rate switching */ - if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) { - /* Stop transition phase */ - psEncC->sLP.mode = 0; - } - if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) { - /* Check if we should switch down */ - if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz ) - { - /* Switch down */ - if( psEncC->sLP.mode == 0 ) { - /* New transition */ - psEncC->sLP.transition_frame_no = TRANSITION_FRAMES; - - /* Reset transition filter state */ - silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); - } - if( encControl->opusCanSwitch ) { - /* Stop transition phase */ - psEncC->sLP.mode = 0; - - /* Switch to a lower sample frequency */ - fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8; - } else { - if( psEncC->sLP.transition_frame_no <= 0 ) { - encControl->switchReady = 1; - /* Make room for redundancy */ - encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 ); - } else { - /* Direction: down (at double speed) */ - psEncC->sLP.mode = -2; - } - } - } - else - /* Check if we should switch up */ - if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz ) - { - /* Switch up */ - if( encControl->opusCanSwitch ) { - /* Switch to a higher sample frequency */ - fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16; - - /* New transition */ - psEncC->sLP.transition_frame_no = 0; - - /* Reset transition filter state */ - silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); - - /* Direction: up */ - psEncC->sLP.mode = 1; - } else { - if( psEncC->sLP.mode == 0 ) { - encControl->switchReady = 1; - /* Make room for redundancy */ - encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 ); - } else { - /* Direction: up */ - psEncC->sLP.mode = 1; - } - } - } else { - if (psEncC->sLP.mode<0) - psEncC->sLP.mode = 1; - } - } - } - - return fs_kHz; -} diff --git a/thirdparty/opus/silk/control_codec.c b/thirdparty/opus/silk/control_codec.c deleted file mode 100644 index 044eea3f2a..0000000000 --- a/thirdparty/opus/silk/control_codec.c +++ /dev/null @@ -1,428 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#ifdef FIXED_POINT -#include "main_FIX.h" -#define silk_encoder_state_Fxx silk_encoder_state_FIX -#else -#include "main_FLP.h" -#define silk_encoder_state_Fxx silk_encoder_state_FLP -#endif -#include "stack_alloc.h" -#include "tuning_parameters.h" -#include "pitch_est_defines.h" - -static opus_int silk_setup_resamplers( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz /* I */ -); - -static opus_int silk_setup_fs( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz, /* I */ - opus_int PacketSize_ms /* I */ -); - -static opus_int silk_setup_complexity( - silk_encoder_state *psEncC, /* I/O */ - opus_int Complexity /* I */ -); - -static OPUS_INLINE opus_int silk_setup_LBRR( - silk_encoder_state *psEncC, /* I/O */ - const opus_int32 TargetRate_bps /* I */ -); - - -/* Control encoder */ -opus_int silk_control_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl, /* I Control structure */ - const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ - const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ - const opus_int channelNb, /* I Channel number */ - const opus_int force_fs_kHz -) -{ - opus_int fs_kHz, ret = 0; - - psEnc->sCmn.useDTX = encControl->useDTX; - psEnc->sCmn.useCBR = encControl->useCBR; - psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate; - psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate; - psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate; - psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate; - psEnc->sCmn.useInBandFEC = encControl->useInBandFEC; - psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI; - psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal; - psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch; - psEnc->sCmn.channelNb = channelNb; - - if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) { - if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) { - /* Change in API sampling rate in the middle of encoding a packet */ - ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz ); - } - return ret; - } - - /* Beyond this point we know that there are no previously coded frames in the payload buffer */ - - /********************************************/ - /* Determine internal sampling rate */ - /********************************************/ - fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl ); - if( force_fs_kHz ) { - fs_kHz = force_fs_kHz; - } - /********************************************/ - /* Prepare resampler and buffered data */ - /********************************************/ - ret += silk_setup_resamplers( psEnc, fs_kHz ); - - /********************************************/ - /* Set internal sampling frequency */ - /********************************************/ - ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms ); - - /********************************************/ - /* Set encoding complexity */ - /********************************************/ - ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity ); - - /********************************************/ - /* Set packet loss rate measured by farend */ - /********************************************/ - psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage; - - /********************************************/ - /* Set LBRR usage */ - /********************************************/ - ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps ); - - psEnc->sCmn.controlled_since_last_payload = 1; - - return ret; -} - -static opus_int silk_setup_resamplers( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz /* I */ -) -{ - opus_int ret = SILK_NO_ERROR; - SAVE_STACK; - - if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) - { - if( psEnc->sCmn.fs_kHz == 0 ) { - /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ - ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); - } else { - VARDECL( opus_int16, x_buf_API_fs_Hz ); - VARDECL( silk_resampler_state_struct, temp_resampler_state ); -#ifdef FIXED_POINT - opus_int16 *x_bufFIX = psEnc->x_buf; -#else - VARDECL( opus_int16, x_bufFIX ); - opus_int32 new_buf_samples; -#endif - opus_int32 api_buf_samples; - opus_int32 old_buf_samples; - opus_int32 buf_length_ms; - - buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS; - old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz; - -#ifndef FIXED_POINT - new_buf_samples = buf_length_ms * fs_kHz; - ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ), - opus_int16 ); - silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples ); -#endif - - /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ - ALLOC( temp_resampler_state, 1, silk_resampler_state_struct ); - ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); - - /* Calculate number of samples to temporarily upsample */ - api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 ); - - /* Temporary resampling of x_buf data to API_fs_Hz */ - ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 ); - ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples ); - - /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ - ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); - - /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ - ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples ); - -#ifndef FIXED_POINT - silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples); -#endif - } - } - - psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; - - RESTORE_STACK; - return ret; -} - -static opus_int silk_setup_fs( - silk_encoder_state_Fxx *psEnc, /* I/O */ - opus_int fs_kHz, /* I */ - opus_int PacketSize_ms /* I */ -) -{ - opus_int ret = SILK_NO_ERROR; - - /* Set packet size */ - if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) { - if( ( PacketSize_ms != 10 ) && - ( PacketSize_ms != 20 ) && - ( PacketSize_ms != 40 ) && - ( PacketSize_ms != 60 ) ) { - ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; - } - if( PacketSize_ms <= 10 ) { - psEnc->sCmn.nFramesPerPacket = 1; - psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1; - psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz ); - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); - if( psEnc->sCmn.fs_kHz == 8 ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; - } - } else { - psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS ); - psEnc->sCmn.nb_subfr = MAX_NB_SUBFR; - psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz ); - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); - if( psEnc->sCmn.fs_kHz == 8 ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; - } - } - psEnc->sCmn.PacketSize_ms = PacketSize_ms; - psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ - } - - /* Set internal sampling frequency */ - silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); - silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); - if( psEnc->sCmn.fs_kHz != fs_kHz ) { - /* reset part of the state */ - silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); - silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); - silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); - silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); - silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); - psEnc->sCmn.inputBufIx = 0; - psEnc->sCmn.nFramesEncoded = 0; - psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ - - /* Initialize non-zero parameters */ - psEnc->sCmn.prevLag = 100; - psEnc->sCmn.first_frame_after_reset = 1; - psEnc->sPrefilt.lagPrev = 100; - psEnc->sShape.LastGainIndex = 10; - psEnc->sCmn.sNSQ.lagPrev = 100; - psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; - psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; - - psEnc->sCmn.fs_kHz = fs_kHz; - if( psEnc->sCmn.fs_kHz == 8 ) { - if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; - } - } else { - if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; - } else { - psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; - } - } - if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) { - psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER; - psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB; - } else { - psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER; - psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB; - } - psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz; - psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); - psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); - psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz ); - psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz ); - if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); - } else { - psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); - } - if( psEnc->sCmn.fs_kHz == 16 ) { - psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 ); - psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; - } else if( psEnc->sCmn.fs_kHz == 12 ) { - psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 ); - psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; - } else { - psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 ); - psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; - } - } - - /* Check that settings are valid */ - silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); - - return ret; -} - -static opus_int silk_setup_complexity( - silk_encoder_state *psEncC, /* I/O */ - opus_int Complexity /* I */ -) -{ - opus_int ret = 0; - - /* Set encoding complexity */ - silk_assert( Complexity >= 0 && Complexity <= 10 ); - if( Complexity < 2 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); - psEncC->pitchEstimationLPCOrder = 6; - psEncC->shapingLPCOrder = 8; - psEncC->la_shape = 3 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 1; - psEncC->useInterpolatedNLSFs = 0; - psEncC->LTPQuantLowComplexity = 1; - psEncC->NLSF_MSVQ_Survivors = 2; - psEncC->warping_Q16 = 0; - } else if( Complexity < 4 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); - psEncC->pitchEstimationLPCOrder = 8; - psEncC->shapingLPCOrder = 10; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 1; - psEncC->useInterpolatedNLSFs = 0; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 4; - psEncC->warping_Q16 = 0; - } else if( Complexity < 6 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 ); - psEncC->pitchEstimationLPCOrder = 10; - psEncC->shapingLPCOrder = 12; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 2; - psEncC->useInterpolatedNLSFs = 1; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 8; - psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); - } else if( Complexity < 8 ) { - psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 ); - psEncC->pitchEstimationLPCOrder = 12; - psEncC->shapingLPCOrder = 14; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = 3; - psEncC->useInterpolatedNLSFs = 1; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 16; - psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); - } else { - psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX; - psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 ); - psEncC->pitchEstimationLPCOrder = 16; - psEncC->shapingLPCOrder = 16; - psEncC->la_shape = 5 * psEncC->fs_kHz; - psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES; - psEncC->useInterpolatedNLSFs = 1; - psEncC->LTPQuantLowComplexity = 0; - psEncC->NLSF_MSVQ_Survivors = 32; - psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); - } - - /* Do not allow higher pitch estimation LPC order than predict LPC order */ - psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder ); - psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape; - psEncC->Complexity = Complexity; - - silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); - silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); - silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); - silk_assert( psEncC->warping_Q16 <= 32767 ); - silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); - silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); - silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS ); - - return ret; -} - -static OPUS_INLINE opus_int silk_setup_LBRR( - silk_encoder_state *psEncC, /* I/O */ - const opus_int32 TargetRate_bps /* I */ -) -{ - opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR; - opus_int32 LBRR_rate_thres_bps; - - LBRR_in_previous_packet = psEncC->LBRR_enabled; - psEncC->LBRR_enabled = 0; - if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) { - if( psEncC->fs_kHz == 8 ) { - LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS; - } else if( psEncC->fs_kHz == 12 ) { - LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS; - } else { - LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS; - } - LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); - - if( TargetRate_bps > LBRR_rate_thres_bps ) { - /* Set gain increase for coding LBRR excitation */ - if( LBRR_in_previous_packet == 0 ) { - /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ - psEncC->LBRR_GainIncreases = 7; - } else { - psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); - } - psEncC->LBRR_enabled = 1; - } - } - - return ret; -} diff --git a/thirdparty/opus/silk/debug.c b/thirdparty/opus/silk/debug.c deleted file mode 100644 index 9253faf71b..0000000000 --- a/thirdparty/opus/silk/debug.c +++ /dev/null @@ -1,170 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "debug.h" -#include "SigProc_FIX.h" - -#if SILK_TIC_TOC - -#ifdef _WIN32 - -#if (defined(_WIN32) || defined(_WINCE)) -#include <windows.h> /* timer */ -#else /* Linux or Mac*/ -#include <sys/time.h> -#endif - -unsigned long silk_GetHighResolutionTime(void) /* O time in usec*/ -{ - /* Returns a time counter in microsec */ - /* the resolution is platform dependent */ - /* but is typically 1.62 us resolution */ - LARGE_INTEGER lpPerformanceCount; - LARGE_INTEGER lpFrequency; - QueryPerformanceCounter(&lpPerformanceCount); - QueryPerformanceFrequency(&lpFrequency); - return (unsigned long)((1000000*(lpPerformanceCount.QuadPart)) / lpFrequency.QuadPart); -} -#else /* Linux or Mac*/ -unsigned long GetHighResolutionTime(void) /* O time in usec*/ -{ - struct timeval tv; - gettimeofday(&tv, 0); - return((tv.tv_sec*1000000)+(tv.tv_usec)); -} -#endif - -int silk_Timer_nTimers = 0; -int silk_Timer_depth_ctr = 0; -char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; -#ifdef WIN32 -LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; -#else -unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; -#endif -unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; -opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX]; - -#ifdef WIN32 -void silk_TimerSave(char *file_name) -{ - if( silk_Timer_nTimers > 0 ) - { - int k; - FILE *fp; - LARGE_INTEGER lpFrequency; - LARGE_INTEGER lpPerformanceCount1, lpPerformanceCount2; - int del = 0x7FFFFFFF; - double avg, sum_avg; - /* estimate overhead of calling performance counters */ - for( k = 0; k < 1000; k++ ) { - QueryPerformanceCounter(&lpPerformanceCount1); - QueryPerformanceCounter(&lpPerformanceCount2); - lpPerformanceCount2.QuadPart -= lpPerformanceCount1.QuadPart; - if( (int)lpPerformanceCount2.LowPart < del ) - del = lpPerformanceCount2.LowPart; - } - QueryPerformanceFrequency(&lpFrequency); - /* print results to file */ - sum_avg = 0.0f; - for( k = 0; k < silk_Timer_nTimers; k++ ) { - if (silk_Timer_depth[k] == 0) { - sum_avg += (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart * silk_Timer_cnt[k]; - } - } - fp = fopen(file_name, "w"); - fprintf(fp, " min avg %% max count\n"); - for( k = 0; k < silk_Timer_nTimers; k++ ) { - if (silk_Timer_depth[k] == 0) { - fprintf(fp, "%-28s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 1) { - fprintf(fp, " %-27s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 2) { - fprintf(fp, " %-26s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 3) { - fprintf(fp, " %-25s", silk_Timer_tags[k]); - } else { - fprintf(fp, " %-24s", silk_Timer_tags[k]); - } - avg = (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart; - fprintf(fp, "%8.2f", (1e6 * (silk_max_64(silk_Timer_min[k] - del, 0))) / lpFrequency.QuadPart); - fprintf(fp, "%12.2f %6.2f", avg, 100.0 * avg / sum_avg * silk_Timer_cnt[k]); - fprintf(fp, "%12.2f", (1e6 * (silk_max_64(silk_Timer_max[k] - del, 0))) / lpFrequency.QuadPart); - fprintf(fp, "%10d\n", silk_Timer_cnt[k]); - } - fprintf(fp, " microseconds\n"); - fclose(fp); - } -} -#else -void silk_TimerSave(char *file_name) -{ - if( silk_Timer_nTimers > 0 ) - { - int k; - FILE *fp; - /* print results to file */ - fp = fopen(file_name, "w"); - fprintf(fp, " min avg max count\n"); - for( k = 0; k < silk_Timer_nTimers; k++ ) - { - if (silk_Timer_depth[k] == 0) { - fprintf(fp, "%-28s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 1) { - fprintf(fp, " %-27s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 2) { - fprintf(fp, " %-26s", silk_Timer_tags[k]); - } else if (silk_Timer_depth[k] == 3) { - fprintf(fp, " %-25s", silk_Timer_tags[k]); - } else { - fprintf(fp, " %-24s", silk_Timer_tags[k]); - } - fprintf(fp, "%d ", silk_Timer_min[k]); - fprintf(fp, "%f ", (double)silk_Timer_sum[k] / (double)silk_Timer_cnt[k]); - fprintf(fp, "%d ", silk_Timer_max[k]); - fprintf(fp, "%10d\n", silk_Timer_cnt[k]); - } - fprintf(fp, " microseconds\n"); - fclose(fp); - } -} -#endif - -#endif /* SILK_TIC_TOC */ - -#if SILK_DEBUG -FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ]; -int silk_debug_store_count = 0; -#endif /* SILK_DEBUG */ - diff --git a/thirdparty/opus/silk/debug.h b/thirdparty/opus/silk/debug.h deleted file mode 100644 index efb6d3e99e..0000000000 --- a/thirdparty/opus/silk/debug.h +++ /dev/null @@ -1,279 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_DEBUG_H -#define SILK_DEBUG_H - -#include "typedef.h" -#include <stdio.h> /* file writing */ -#include <string.h> /* strcpy, strcmp */ - -#ifdef __cplusplus -extern "C" -{ -#endif - -unsigned long GetHighResolutionTime(void); /* O time in usec*/ - -/* make SILK_DEBUG dependent on compiler's _DEBUG */ -#if defined _WIN32 - #ifdef _DEBUG - #define SILK_DEBUG 1 - #else - #define SILK_DEBUG 0 - #endif - - /* overrule the above */ - #if 0 - /* #define NO_ASSERTS*/ - #undef SILK_DEBUG - #define SILK_DEBUG 1 - #endif -#else - #define SILK_DEBUG 0 -#endif - -/* Flag for using timers */ -#define SILK_TIC_TOC 0 - - -#if SILK_TIC_TOC - -#if (defined(_WIN32) || defined(_WINCE)) -#include <windows.h> /* timer */ -#else /* Linux or Mac*/ -#include <sys/time.h> -#endif - -/*********************************/ -/* timer functions for profiling */ -/*********************************/ -/* example: */ -/* */ -/* TIC(LPC) */ -/* do_LPC(in_vec, order, acoef); // do LPC analysis */ -/* TOC(LPC) */ -/* */ -/* and call the following just before exiting (from main) */ -/* */ -/* silk_TimerSave("silk_TimingData.txt"); */ -/* */ -/* results are now in silk_TimingData.txt */ - -void silk_TimerSave(char *file_name); - -/* max number of timers (in different locations) */ -#define silk_NUM_TIMERS_MAX 50 -/* max length of name tags in TIC(..), TOC(..) */ -#define silk_NUM_TIMERS_MAX_TAG_LEN 30 - -extern int silk_Timer_nTimers; -extern int silk_Timer_depth_ctr; -extern char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; -#ifdef _WIN32 -extern LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; -#else -extern unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; -#endif -extern unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX]; -extern opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX]; - -/* WARNING: TIC()/TOC can measure only up to 0.1 seconds at a time */ -#ifdef _WIN32 -#define TIC(TAG_NAME) { \ - static int init = 0; \ - static int ID = -1; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - if (ID == -1) { \ - ID = silk_Timer_nTimers; \ - silk_Timer_nTimers++; \ - silk_Timer_depth[ID] = silk_Timer_depth_ctr; \ - strcpy(silk_Timer_tags[ID], #TAG_NAME); \ - silk_Timer_cnt[ID] = 0; \ - silk_Timer_sum[ID] = 0; \ - silk_Timer_min[ID] = 0xFFFFFFFF; \ - silk_Timer_max[ID] = 0; \ - } \ - } \ - silk_Timer_depth_ctr++; \ - QueryPerformanceCounter(&silk_Timer_start[ID]); \ -} -#else -#define TIC(TAG_NAME) { \ - static int init = 0; \ - static int ID = -1; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - if (ID == -1) { \ - ID = silk_Timer_nTimers; \ - silk_Timer_nTimers++; \ - silk_Timer_depth[ID] = silk_Timer_depth_ctr; \ - strcpy(silk_Timer_tags[ID], #TAG_NAME); \ - silk_Timer_cnt[ID] = 0; \ - silk_Timer_sum[ID] = 0; \ - silk_Timer_min[ID] = 0xFFFFFFFF; \ - silk_Timer_max[ID] = 0; \ - } \ - } \ - silk_Timer_depth_ctr++; \ - silk_Timer_start[ID] = GetHighResolutionTime(); \ -} -#endif - -#ifdef _WIN32 -#define TOC(TAG_NAME) { \ - LARGE_INTEGER lpPerformanceCount; \ - static int init = 0; \ - static int ID = 0; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - } \ - QueryPerformanceCounter(&lpPerformanceCount); \ - lpPerformanceCount.QuadPart -= silk_Timer_start[ID].QuadPart; \ - if((lpPerformanceCount.QuadPart < 100000000) && \ - (lpPerformanceCount.QuadPart >= 0)) { \ - silk_Timer_cnt[ID]++; \ - silk_Timer_sum[ID] += lpPerformanceCount.QuadPart; \ - if( lpPerformanceCount.QuadPart > silk_Timer_max[ID] ) \ - silk_Timer_max[ID] = lpPerformanceCount.QuadPart; \ - if( lpPerformanceCount.QuadPart < silk_Timer_min[ID] ) \ - silk_Timer_min[ID] = lpPerformanceCount.QuadPart; \ - } \ - silk_Timer_depth_ctr--; \ -} -#else -#define TOC(TAG_NAME) { \ - unsigned long endTime; \ - static int init = 0; \ - static int ID = 0; \ - if( init == 0 ) \ - { \ - int k; \ - init = 1; \ - for( k = 0; k < silk_Timer_nTimers; k++ ) { \ - if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ - ID = k; \ - break; \ - } \ - } \ - } \ - endTime = GetHighResolutionTime(); \ - endTime -= silk_Timer_start[ID]; \ - if((endTime < 100000000) && \ - (endTime >= 0)) { \ - silk_Timer_cnt[ID]++; \ - silk_Timer_sum[ID] += endTime; \ - if( endTime > silk_Timer_max[ID] ) \ - silk_Timer_max[ID] = endTime; \ - if( endTime < silk_Timer_min[ID] ) \ - silk_Timer_min[ID] = endTime; \ - } \ - silk_Timer_depth_ctr--; \ -} -#endif - -#else /* SILK_TIC_TOC */ - -/* define macros as empty strings */ -#define TIC(TAG_NAME) -#define TOC(TAG_NAME) -#define silk_TimerSave(FILE_NAME) - -#endif /* SILK_TIC_TOC */ - - -#if SILK_DEBUG -/************************************/ -/* write data to file for debugging */ -/************************************/ -/* Example: DEBUG_STORE_DATA(testfile.pcm, &RIN[0], 160*sizeof(opus_int16)); */ - -#define silk_NUM_STORES_MAX 100 -extern FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ]; -extern int silk_debug_store_count; - -/* Faster way of storing the data */ -#define DEBUG_STORE_DATA( FILE_NAME, DATA_PTR, N_BYTES ) { \ - static opus_int init = 0, cnt = 0; \ - static FILE **fp; \ - if (init == 0) { \ - init = 1; \ - cnt = silk_debug_store_count++; \ - silk_debug_store_fp[ cnt ] = fopen(#FILE_NAME, "wb"); \ - } \ - fwrite((DATA_PTR), (N_BYTES), 1, silk_debug_store_fp[ cnt ]); \ -} - -/* Call this at the end of main() */ -#define SILK_DEBUG_STORE_CLOSE_FILES { \ - opus_int i; \ - for( i = 0; i < silk_debug_store_count; i++ ) { \ - fclose( silk_debug_store_fp[ i ] ); \ - } \ -} - -#else /* SILK_DEBUG */ - -/* define macros as empty strings */ -#define DEBUG_STORE_DATA(FILE_NAME, DATA_PTR, N_BYTES) -#define SILK_DEBUG_STORE_CLOSE_FILES - -#endif /* SILK_DEBUG */ - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_DEBUG_H */ diff --git a/thirdparty/opus/silk/dec_API.c b/thirdparty/opus/silk/dec_API.c deleted file mode 100644 index b7d8ed48d8..0000000000 --- a/thirdparty/opus/silk/dec_API.c +++ /dev/null @@ -1,419 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#include "API.h" -#include "main.h" -#include "stack_alloc.h" -#include "os_support.h" - -/************************/ -/* Decoder Super Struct */ -/************************/ -typedef struct { - silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; - stereo_dec_state sStereo; - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int prev_decode_only_middle; -} silk_decoder; - -/*********************/ -/* Decoder functions */ -/*********************/ - -opus_int silk_Get_Decoder_Size( /* O Returns error code */ - opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ -) -{ - opus_int ret = SILK_NO_ERROR; - - *decSizeBytes = sizeof( silk_decoder ); - - return ret; -} - -/* Reset decoder state */ -opus_int silk_InitDecoder( /* O Returns error code */ - void *decState /* I/O State */ -) -{ - opus_int n, ret = SILK_NO_ERROR; - silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; - - for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { - ret = silk_init_decoder( &channel_state[ n ] ); - } - silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); - /* Not strictly needed, but it's cleaner that way */ - ((silk_decoder *)decState)->prev_decode_only_middle = 0; - - return ret; -} - -/* Decode a frame */ -opus_int silk_Decode( /* O Returns error code */ - void* decState, /* I/O State */ - silk_DecControlStruct* decControl, /* I/O Control Structure */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 *samplesOut, /* O Decoded output speech vector */ - opus_int32 *nSamplesOut, /* O Number of samples decoded */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; - opus_int32 nSamplesOutDec, LBRR_symbol; - opus_int16 *samplesOut1_tmp[ 2 ]; - VARDECL( opus_int16, samplesOut1_tmp_storage1 ); - VARDECL( opus_int16, samplesOut1_tmp_storage2 ); - VARDECL( opus_int16, samplesOut2_tmp ); - opus_int32 MS_pred_Q13[ 2 ] = { 0 }; - opus_int16 *resample_out_ptr; - silk_decoder *psDec = ( silk_decoder * )decState; - silk_decoder_state *channel_state = psDec->channel_state; - opus_int has_side; - opus_int stereo_to_mono; - int delay_stack_alloc; - SAVE_STACK; - - silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); - - /**********************************/ - /* Test if first frame in payload */ - /**********************************/ - if( newPacketFlag ) { - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ - } - } - - /* If Mono -> Stereo transition in bitstream: init state of second channel */ - if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { - ret += silk_init_decoder( &channel_state[ 1 ] ); - } - - stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && - ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); - - if( channel_state[ 0 ].nFramesDecoded == 0 ) { - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - opus_int fs_kHz_dec; - if( decControl->payloadSize_ms == 0 ) { - /* Assuming packet loss, use 10 ms */ - channel_state[ n ].nFramesPerPacket = 1; - channel_state[ n ].nb_subfr = 2; - } else if( decControl->payloadSize_ms == 10 ) { - channel_state[ n ].nFramesPerPacket = 1; - channel_state[ n ].nb_subfr = 2; - } else if( decControl->payloadSize_ms == 20 ) { - channel_state[ n ].nFramesPerPacket = 1; - channel_state[ n ].nb_subfr = 4; - } else if( decControl->payloadSize_ms == 40 ) { - channel_state[ n ].nFramesPerPacket = 2; - channel_state[ n ].nb_subfr = 4; - } else if( decControl->payloadSize_ms == 60 ) { - channel_state[ n ].nFramesPerPacket = 3; - channel_state[ n ].nb_subfr = 4; - } else { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_DEC_INVALID_FRAME_SIZE; - } - fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; - if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_DEC_INVALID_SAMPLING_FREQUENCY; - } - ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); - } - } - - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { - silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); - silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); - silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); - } - psDec->nChannelsAPI = decControl->nChannelsAPI; - psDec->nChannelsInternal = decControl->nChannelsInternal; - - if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { - ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; - RESTORE_STACK; - return( ret ); - } - - if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { - /* First decoder call for this payload */ - /* Decode VAD flags and LBRR flag */ - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { - channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); - } - channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); - } - /* Decode LBRR flags */ - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); - if( channel_state[ n ].LBRR_flag ) { - if( channel_state[ n ].nFramesPerPacket == 1 ) { - channel_state[ n ].LBRR_flags[ 0 ] = 1; - } else { - LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; - for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { - channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; - } - } - } - } - - if( lostFlag == FLAG_DECODE_NORMAL ) { - /* Regular decoding: skip all LBRR data */ - for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - if( channel_state[ n ].LBRR_flags[ i ] ) { - opus_int16 pulses[ MAX_FRAME_LENGTH ]; - opus_int condCoding; - - if( decControl->nChannelsInternal == 2 && n == 0 ) { - silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); - if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { - silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); - } - } - /* Use conditional coding if previous frame available */ - if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { - condCoding = CODE_CONDITIONALLY; - } else { - condCoding = CODE_INDEPENDENTLY; - } - silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); - silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, - channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); - } - } - } - } - } - - /* Get MS predictor index */ - if( decControl->nChannelsInternal == 2 ) { - if( lostFlag == FLAG_DECODE_NORMAL || - ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) - { - silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); - /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ - if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || - ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) - { - silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); - } else { - decode_only_middle = 0; - } - } else { - for( n = 0; n < 2; n++ ) { - MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; - } - } - } - - /* Reset side channel decoder prediction memory for first frame with side coding */ - if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { - silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); - silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); - psDec->channel_state[ 1 ].lagPrev = 100; - psDec->channel_state[ 1 ].LastGainIndex = 10; - psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; - psDec->channel_state[ 1 ].first_frame_after_reset = 1; - } - - /* Check if the temp buffer fits into the output PCM buffer. If it fits, - we can delay allocating the temp buffer until after the SILK peak stack - usage. We need to use a < and not a <= because of the two extra samples. */ - delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal - < decControl->API_sampleRate*decControl->nChannelsAPI; - ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE - : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), - opus_int16 ); - if ( delay_stack_alloc ) - { - samplesOut1_tmp[ 0 ] = samplesOut; - samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; - } else { - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; - } - - if( lostFlag == FLAG_DECODE_NORMAL ) { - has_side = !decode_only_middle; - } else { - has_side = !psDec->prev_decode_only_middle - || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); - } - /* Call decoder for one frame */ - for( n = 0; n < decControl->nChannelsInternal; n++ ) { - if( n == 0 || has_side ) { - opus_int FrameIndex; - opus_int condCoding; - - FrameIndex = channel_state[ 0 ].nFramesDecoded - n; - /* Use independent coding if no previous frame available */ - if( FrameIndex <= 0 ) { - condCoding = CODE_INDEPENDENTLY; - } else if( lostFlag == FLAG_DECODE_LBRR ) { - condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; - } else if( n > 0 && psDec->prev_decode_only_middle ) { - /* If we skipped a side frame in this packet, we don't - need LTP scaling; the LTP state is well-defined. */ - condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; - } else { - condCoding = CODE_CONDITIONALLY; - } - ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch); - } else { - silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); - } - channel_state[ n ].nFramesDecoded++; - } - - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { - /* Convert Mid/Side to Left/Right */ - silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); - } else { - /* Buffering */ - silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); - } - - /* Number of output samples */ - *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); - - /* Set up pointers to temp buffers */ - ALLOC( samplesOut2_tmp, - decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); - if( decControl->nChannelsAPI == 2 ) { - resample_out_ptr = samplesOut2_tmp; - } else { - resample_out_ptr = samplesOut; - } - - ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc - ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) - : ALLOC_NONE, - opus_int16 ); - if ( delay_stack_alloc ) { - OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; - } - for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { - - /* Resample decoded signal to API_sampleRate */ - ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); - - /* Interleave if stereo output and stereo stream */ - if( decControl->nChannelsAPI == 2 ) { - for( i = 0; i < *nSamplesOut; i++ ) { - samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; - } - } - } - - /* Create two channel output from mono stream */ - if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { - if ( stereo_to_mono ){ - /* Resample right channel for newly collapsed stereo just in case - we weren't doing collapsing when switching to mono */ - ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); - - for( i = 0; i < *nSamplesOut; i++ ) { - samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; - } - } else { - for( i = 0; i < *nSamplesOut; i++ ) { - samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; - } - } - } - - /* Export pitch lag, measured at 48 kHz sampling rate */ - if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { - int mult_tab[ 3 ] = { 6, 4, 3 }; - decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; - } else { - decControl->prevPitchLag = 0; - } - - if( lostFlag == FLAG_PACKET_LOST ) { - /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" - if we lose packets when the energy is going down */ - for ( i = 0; i < psDec->nChannelsInternal; i++ ) - psDec->channel_state[ i ].LastGainIndex = 10; - } else { - psDec->prev_decode_only_middle = decode_only_middle; - } - RESTORE_STACK; - return ret; -} - -#if 0 -/* Getting table of contents for a packet */ -opus_int silk_get_TOC( - const opus_uint8 *payload, /* I Payload data */ - const opus_int nBytesIn, /* I Number of input bytes */ - const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ - silk_TOC_struct *Silk_TOC /* O Type of content */ -) -{ - opus_int i, flags, ret = SILK_NO_ERROR; - - if( nBytesIn < 1 ) { - return -1; - } - if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { - return -1; - } - - silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); - - /* For stereo, extract the flags for the mid channel */ - flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); - - Silk_TOC->inbandFECFlag = flags & 1; - for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { - flags = silk_RSHIFT( flags, 1 ); - Silk_TOC->VADFlags[ i ] = flags & 1; - Silk_TOC->VADFlag |= flags & 1; - } - - return ret; -} -#endif diff --git a/thirdparty/opus/silk/decode_core.c b/thirdparty/opus/silk/decode_core.c deleted file mode 100644 index e569c0e72b..0000000000 --- a/thirdparty/opus/silk/decode_core.c +++ /dev/null @@ -1,239 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/**********************************************************/ -/* Core decoder. Performs inverse NSQ operation LTP + LPC */ -/**********************************************************/ -void silk_decode_core( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I Decoder control */ - opus_int16 xq[], /* O Decoded speech */ - const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; - opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ]; - VARDECL( opus_int16, sLTP ); - VARDECL( opus_int32, sLTP_Q15 ); - opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10; - opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14; - VARDECL( opus_int32, res_Q14 ); - VARDECL( opus_int32, sLPC_Q14 ); - SAVE_STACK; - - silk_assert( psDec->prev_gain_Q16 != 0 ); - - ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); - ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); - ALLOC( res_Q14, psDec->subfr_length, opus_int32 ); - ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); - - offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ]; - - if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) { - NLSF_interpolation_flag = 1; - } else { - NLSF_interpolation_flag = 0; - } - - /* Decode excitation */ - rand_seed = psDec->indices.Seed; - for( i = 0; i < psDec->frame_length; i++ ) { - rand_seed = silk_RAND( rand_seed ); - psDec->exc_Q14[ i ] = silk_LSHIFT( (opus_int32)pulses[ i ], 14 ); - if( psDec->exc_Q14[ i ] > 0 ) { - psDec->exc_Q14[ i ] -= QUANT_LEVEL_ADJUST_Q10 << 4; - } else - if( psDec->exc_Q14[ i ] < 0 ) { - psDec->exc_Q14[ i ] += QUANT_LEVEL_ADJUST_Q10 << 4; - } - psDec->exc_Q14[ i ] += offset_Q10 << 4; - if( rand_seed < 0 ) { - psDec->exc_Q14[ i ] = -psDec->exc_Q14[ i ]; - } - - rand_seed = silk_ADD32_ovflw( rand_seed, pulses[ i ] ); - } - - /* Copy LPC state */ - silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - - pexc_Q14 = psDec->exc_Q14; - pxq = xq; - sLTP_buf_idx = psDec->ltp_mem_length; - /* Loop over subframes */ - for( k = 0; k < psDec->nb_subfr; k++ ) { - pres_Q14 = res_Q14; - A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ]; - - /* Preload LPC coeficients to array on stack. Gives small performance gain */ - silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); - B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ]; - signalType = psDec->indices.signalType; - - Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 ); - inv_gain_Q31 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 47 ); - - /* Calculate gain adjustment factor */ - if( psDecCtrl->Gains_Q16[ k ] != psDec->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[ k ], 16 ); - - /* Scale short term state */ - for( i = 0; i < MAX_LPC_ORDER; i++ ) { - sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] ); - } - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Save inv_gain */ - silk_assert( inv_gain_Q31 != 0 ); - psDec->prev_gain_Q16 = psDecCtrl->Gains_Q16[ k ]; - - /* Avoid abrupt transition from voiced PLC to unvoiced normal decoding */ - if( psDec->lossCnt && psDec->prevSignalType == TYPE_VOICED && - psDec->indices.signalType != TYPE_VOICED && k < MAX_NB_SUBFR/2 ) { - - silk_memset( B_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) ); - B_Q14[ LTP_ORDER/2 ] = SILK_FIX_CONST( 0.25, 14 ); - - signalType = TYPE_VOICED; - psDecCtrl->pitchL[ k ] = psDec->lagPrev; - } - - if( signalType == TYPE_VOICED ) { - /* Voiced */ - lag = psDecCtrl->pitchL[ k ]; - - /* Re-whitening */ - if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) { - /* Rewhiten with new A coefs */ - start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - if( k == 2 ) { - silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) ); - } - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ], - A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order, arch ); - - /* After rewhitening the LTP state is unscaled */ - if( k == 0 ) { - /* Do LTP downscaling to reduce inter-packet dependency */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, psDecCtrl->LTP_scale_Q14 ), 2 ); - } - for( i = 0; i < lag + LTP_ORDER/2; i++ ) { - sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q31, sLTP[ psDec->ltp_mem_length - i - 1 ] ); - } - } else { - /* Update LTP state when Gain changes */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - for( i = 0; i < lag + LTP_ORDER/2; i++ ) { - sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ sLTP_buf_idx - i - 1 ] ); - } - } - } - } - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Set up pointer */ - pred_lag_ptr = &sLTP_Q15[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - for( i = 0; i < psDec->subfr_length; i++ ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q13 = 2; - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); - pred_lag_ptr++; - - /* Generate LPC excitation */ - pres_Q14[ i ] = silk_ADD_LSHIFT32( pexc_Q14[ i ], LTP_pred_Q13, 1 ); - - /* Update states */ - sLTP_Q15[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q14[ i ], 1 ); - sLTP_buf_idx++; - } - } else { - pres_Q14 = pexc_Q14; - } - - for( i = 0; i < psDec->subfr_length; i++ ) { - /* Short-term prediction */ - silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12_tmp[ 0 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12_tmp[ 1 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12_tmp[ 2 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12_tmp[ 3 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12_tmp[ 4 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12_tmp[ 5 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12_tmp[ 6 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12_tmp[ 7 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12_tmp[ 8 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] ); - if( psDec->LPC_order == 16 ) { - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12_tmp[ 10 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] ); - LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] ); - } - - /* Add prediction to LPC excitation */ - sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) ); - - /* Scale with gain */ - pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) ); - } - - /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */ - - /* Update LPC filter state */ - silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); - pexc_Q14 += psDec->subfr_length; - pxq += psDec->subfr_length; - } - - /* Save LPC state */ - silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/decode_frame.c b/thirdparty/opus/silk/decode_frame.c deleted file mode 100644 index a605d95ac6..0000000000 --- a/thirdparty/opus/silk/decode_frame.c +++ /dev/null @@ -1,129 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" -#include "PLC.h" - -/****************/ -/* Decode frame */ -/****************/ -opus_int silk_decode_frame( - silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pOut[], /* O Pointer to output speech frame */ - opus_int32 *pN, /* O Pointer to size of output frame */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int condCoding, /* I The type of conditional coding to use */ - int arch /* I Run-time architecture */ -) -{ - VARDECL( silk_decoder_control, psDecCtrl ); - opus_int L, mv_len, ret = 0; - SAVE_STACK; - - L = psDec->frame_length; - ALLOC( psDecCtrl, 1, silk_decoder_control ); - psDecCtrl->LTP_scale_Q14 = 0; - - /* Safety checks */ - silk_assert( L > 0 && L <= MAX_FRAME_LENGTH ); - - if( lostFlag == FLAG_DECODE_NORMAL || - ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) - { - VARDECL( opus_int16, pulses ); - ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & - ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 ); - /*********************************************/ - /* Decode quantization indices of side info */ - /*********************************************/ - silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag, condCoding ); - - /*********************************************/ - /* Decode quantization indices of excitation */ - /*********************************************/ - silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType, - psDec->indices.quantOffsetType, psDec->frame_length ); - - /********************************************/ - /* Decode parameters and pulse signal */ - /********************************************/ - silk_decode_parameters( psDec, psDecCtrl, condCoding ); - - /********************************************************/ - /* Run inverse NSQ */ - /********************************************************/ - silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch ); - - /********************************************************/ - /* Update PLC state */ - /********************************************************/ - silk_PLC( psDec, psDecCtrl, pOut, 0, arch ); - - psDec->lossCnt = 0; - psDec->prevSignalType = psDec->indices.signalType; - silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 ); - - /* A frame has been decoded without errors */ - psDec->first_frame_after_reset = 0; - } else { - /* Handle packet loss by extrapolation */ - silk_PLC( psDec, psDecCtrl, pOut, 1, arch ); - } - - /*************************/ - /* Update output buffer. */ - /*************************/ - silk_assert( psDec->ltp_mem_length >= psDec->frame_length ); - mv_len = psDec->ltp_mem_length - psDec->frame_length; - silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); - silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); - - /************************************************/ - /* Comfort noise generation / estimation */ - /************************************************/ - silk_CNG( psDec, psDecCtrl, pOut, L ); - - /****************************************************************/ - /* Ensure smooth connection of extrapolated and good frames */ - /****************************************************************/ - silk_PLC_glue_frames( psDec, pOut, L ); - - /* Update some decoder state variables */ - psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; - - /* Set output frame length */ - *pN = L; - - RESTORE_STACK; - return ret; -} diff --git a/thirdparty/opus/silk/decode_indices.c b/thirdparty/opus/silk/decode_indices.c deleted file mode 100644 index 7afe5c26c1..0000000000 --- a/thirdparty/opus/silk/decode_indices.c +++ /dev/null @@ -1,151 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Decode side-information parameters from payload */ -void silk_decode_indices( - silk_decoder_state *psDec, /* I/O State */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i, k, Ix; - opus_int decode_absolute_lagIndex, delta_lagIndex; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - - /*******************************************/ - /* Decode signal type and quantizer offset */ - /*******************************************/ - if( decode_LBRR || psDec->VAD_flags[ FrameIndex ] ) { - Ix = ec_dec_icdf( psRangeDec, silk_type_offset_VAD_iCDF, 8 ) + 2; - } else { - Ix = ec_dec_icdf( psRangeDec, silk_type_offset_no_VAD_iCDF, 8 ); - } - psDec->indices.signalType = (opus_int8)silk_RSHIFT( Ix, 1 ); - psDec->indices.quantOffsetType = (opus_int8)( Ix & 1 ); - - /****************/ - /* Decode gains */ - /****************/ - /* First subframe */ - if( condCoding == CODE_CONDITIONALLY ) { - /* Conditional coding */ - psDec->indices.GainsIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 ); - } else { - /* Independent coding, in two stages: MSB bits followed by 3 LSBs */ - psDec->indices.GainsIndices[ 0 ] = (opus_int8)silk_LSHIFT( ec_dec_icdf( psRangeDec, silk_gain_iCDF[ psDec->indices.signalType ], 8 ), 3 ); - psDec->indices.GainsIndices[ 0 ] += (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform8_iCDF, 8 ); - } - - /* Remaining subframes */ - for( i = 1; i < psDec->nb_subfr; i++ ) { - psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 ); - } - - /**********************/ - /* Decode LSF Indices */ - /**********************/ - psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 ); - silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] ); - silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order ); - for( i = 0; i < psDec->psNLSF_CB->order; i++ ) { - Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - if( Ix == 0 ) { - Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 ); - } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) { - Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 ); - } - psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE ); - } - - /* Decode LSF interpolation factor */ - if( psDec->nb_subfr == MAX_NB_SUBFR ) { - psDec->indices.NLSFInterpCoef_Q2 = (opus_int8)ec_dec_icdf( psRangeDec, silk_NLSF_interpolation_factor_iCDF, 8 ); - } else { - psDec->indices.NLSFInterpCoef_Q2 = 4; - } - - if( psDec->indices.signalType == TYPE_VOICED ) - { - /*********************/ - /* Decode pitch lags */ - /*********************/ - /* Get lag index */ - decode_absolute_lagIndex = 1; - if( condCoding == CODE_CONDITIONALLY && psDec->ec_prevSignalType == TYPE_VOICED ) { - /* Decode Delta index */ - delta_lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_delta_iCDF, 8 ); - if( delta_lagIndex > 0 ) { - delta_lagIndex = delta_lagIndex - 9; - psDec->indices.lagIndex = (opus_int16)( psDec->ec_prevLagIndex + delta_lagIndex ); - decode_absolute_lagIndex = 0; - } - } - if( decode_absolute_lagIndex ) { - /* Absolute decoding */ - psDec->indices.lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_lag_iCDF, 8 ) * silk_RSHIFT( psDec->fs_kHz, 1 ); - psDec->indices.lagIndex += (opus_int16)ec_dec_icdf( psRangeDec, psDec->pitch_lag_low_bits_iCDF, 8 ); - } - psDec->ec_prevLagIndex = psDec->indices.lagIndex; - - /* Get countour index */ - psDec->indices.contourIndex = (opus_int8)ec_dec_icdf( psRangeDec, psDec->pitch_contour_iCDF, 8 ); - - /********************/ - /* Decode LTP gains */ - /********************/ - /* Decode PERIndex value */ - psDec->indices.PERIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_per_index_iCDF, 8 ); - - for( k = 0; k < psDec->nb_subfr; k++ ) { - psDec->indices.LTPIndex[ k ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_gain_iCDF_ptrs[ psDec->indices.PERIndex ], 8 ); - } - - /**********************/ - /* Decode LTP scaling */ - /**********************/ - if( condCoding == CODE_INDEPENDENTLY ) { - psDec->indices.LTP_scaleIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTPscale_iCDF, 8 ); - } else { - psDec->indices.LTP_scaleIndex = 0; - } - } - psDec->ec_prevSignalType = psDec->indices.signalType; - - /***************/ - /* Decode seed */ - /***************/ - psDec->indices.Seed = (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform4_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/decode_parameters.c b/thirdparty/opus/silk/decode_parameters.c deleted file mode 100644 index e345b1dcef..0000000000 --- a/thirdparty/opus/silk/decode_parameters.c +++ /dev/null @@ -1,115 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Decode parameters from payload */ -void silk_decode_parameters( - silk_decoder_state *psDec, /* I/O State */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i, k, Ix; - opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], pNLSF0_Q15[ MAX_LPC_ORDER ]; - const opus_int8 *cbk_ptr_Q7; - - /* Dequant Gains */ - silk_gains_dequant( psDecCtrl->Gains_Q16, psDec->indices.GainsIndices, - &psDec->LastGainIndex, condCoding == CODE_CONDITIONALLY, psDec->nb_subfr ); - - /****************/ - /* Decode NLSFs */ - /****************/ - silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB ); - - /* Convert NLSF parameters to AR prediction filter coefficients */ - silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order ); - - /* If just reset, e.g., because internal Fs changed, do not allow interpolation */ - /* improves the case of packet loss in the first frame after a switch */ - if( psDec->first_frame_after_reset == 1 ) { - psDec->indices.NLSFInterpCoef_Q2 = 4; - } - - if( psDec->indices.NLSFInterpCoef_Q2 < 4 ) { - /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */ - /* the previous NLSF1, and the current NLSF1 */ - for( i = 0; i < psDec->LPC_order; i++ ) { - pNLSF0_Q15[ i ] = psDec->prevNLSF_Q15[ i ] + silk_RSHIFT( silk_MUL( psDec->indices.NLSFInterpCoef_Q2, - pNLSF_Q15[ i ] - psDec->prevNLSF_Q15[ i ] ), 2 ); - } - - /* Convert NLSF parameters to AR prediction filter coefficients */ - silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order ); - } else { - /* Copy LPC coefficients for first half from second half */ - silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); - } - - silk_memcpy( psDec->prevNLSF_Q15, pNLSF_Q15, psDec->LPC_order * sizeof( opus_int16 ) ); - - /* After a packet loss do BWE of LPC coefs */ - if( psDec->lossCnt ) { - silk_bwexpander( psDecCtrl->PredCoef_Q12[ 0 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 ); - silk_bwexpander( psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 ); - } - - if( psDec->indices.signalType == TYPE_VOICED ) { - /*********************/ - /* Decode pitch lags */ - /*********************/ - - /* Decode pitch values */ - silk_decode_pitch( psDec->indices.lagIndex, psDec->indices.contourIndex, psDecCtrl->pitchL, psDec->fs_kHz, psDec->nb_subfr ); - - /* Decode Codebook Index */ - cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ psDec->indices.PERIndex ]; /* set pointer to start of codebook */ - - for( k = 0; k < psDec->nb_subfr; k++ ) { - Ix = psDec->indices.LTPIndex[ k ]; - for( i = 0; i < LTP_ORDER; i++ ) { - psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER + i ] = silk_LSHIFT( cbk_ptr_Q7[ Ix * LTP_ORDER + i ], 7 ); - } - } - - /**********************/ - /* Decode LTP scaling */ - /**********************/ - Ix = psDec->indices.LTP_scaleIndex; - psDecCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ Ix ]; - } else { - silk_memset( psDecCtrl->pitchL, 0, psDec->nb_subfr * sizeof( opus_int ) ); - silk_memset( psDecCtrl->LTPCoef_Q14, 0, LTP_ORDER * psDec->nb_subfr * sizeof( opus_int16 ) ); - psDec->indices.PERIndex = 0; - psDecCtrl->LTP_scale_Q14 = 0; - } -} diff --git a/thirdparty/opus/silk/decode_pitch.c b/thirdparty/opus/silk/decode_pitch.c deleted file mode 100644 index fedbc6a525..0000000000 --- a/thirdparty/opus/silk/decode_pitch.c +++ /dev/null @@ -1,77 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/*********************************************************** -* Pitch analyser function -********************************************************** */ -#include "SigProc_FIX.h" -#include "pitch_est_defines.h" - -void silk_decode_pitch( - opus_int16 lagIndex, /* I */ - opus_int8 contourIndex, /* O */ - opus_int pitch_lags[], /* O 4 pitch values */ - const opus_int Fs_kHz, /* I sampling frequency (kHz) */ - const opus_int nb_subfr /* I number of sub frames */ -) -{ - opus_int lag, k, min_lag, max_lag, cbk_size; - const opus_int8 *Lag_CB_ptr; - - if( Fs_kHz == 8 ) { - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE2_EXT; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); - Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE2_10MS; - } - } else { - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - } - - min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz ); - max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ); - lag = min_lag + lagIndex; - - for( k = 0; k < nb_subfr; k++ ) { - pitch_lags[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, contourIndex, cbk_size ); - pitch_lags[ k ] = silk_LIMIT( pitch_lags[ k ], min_lag, max_lag ); - } -} diff --git a/thirdparty/opus/silk/decode_pulses.c b/thirdparty/opus/silk/decode_pulses.c deleted file mode 100644 index d6bbec9225..0000000000 --- a/thirdparty/opus/silk/decode_pulses.c +++ /dev/null @@ -1,115 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/*********************************************/ -/* Decode quantization indices of excitation */ -/*********************************************/ -void silk_decode_pulses( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* O Excitation signal */ - const opus_int signalType, /* I Sigtype */ - const opus_int quantOffsetType, /* I quantOffsetType */ - const opus_int frame_length /* I Frame length */ -) -{ - opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; - opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; - opus_int16 *pulses_ptr; - const opus_uint8 *cdf_ptr; - - /*********************/ - /* Decode rate level */ - /*********************/ - RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 ); - - /* Calculate number of shell blocks */ - silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); - iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); - if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { - silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ - iter++; - } - - /***************************************************/ - /* Sum-Weighted-Pulses Decoding */ - /***************************************************/ - cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ]; - for( i = 0; i < iter; i++ ) { - nLshifts[ i ] = 0; - sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 ); - - /* LSB indication */ - while( sum_pulses[ i ] == SILK_MAX_PULSES + 1 ) { - nLshifts[ i ]++; - /* When we've already got 10 LSBs, we shift the table to not allow (SILK_MAX_PULSES + 1) */ - sum_pulses[ i ] = ec_dec_icdf( psRangeDec, - silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 ); - } - } - - /***************************************************/ - /* Shell decoding */ - /***************************************************/ - for( i = 0; i < iter; i++ ) { - if( sum_pulses[ i ] > 0 ) { - silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); - } else { - silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) ); - } - } - - /***************************************************/ - /* LSB Decoding */ - /***************************************************/ - for( i = 0; i < iter; i++ ) { - if( nLshifts[ i ] > 0 ) { - nLS = nLshifts[ i ]; - pulses_ptr = &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ]; - for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { - abs_q = pulses_ptr[ k ]; - for( j = 0; j < nLS; j++ ) { - abs_q = silk_LSHIFT( abs_q, 1 ); - abs_q += ec_dec_icdf( psRangeDec, silk_lsb_iCDF, 8 ); - } - pulses_ptr[ k ] = abs_q; - } - /* Mark the number of pulses non-zero for sign decoding. */ - sum_pulses[ i ] |= nLS << 5; - } - } - - /****************************************/ - /* Decode and add signs to pulse signal */ - /****************************************/ - silk_decode_signs( psRangeDec, pulses, frame_length, signalType, quantOffsetType, sum_pulses ); -} diff --git a/thirdparty/opus/silk/decoder_set_fs.c b/thirdparty/opus/silk/decoder_set_fs.c deleted file mode 100644 index eef0fd25e1..0000000000 --- a/thirdparty/opus/silk/decoder_set_fs.c +++ /dev/null @@ -1,108 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Set decoder sampling rate */ -opus_int silk_decoder_set_fs( - silk_decoder_state *psDec, /* I/O Decoder state pointer */ - opus_int fs_kHz, /* I Sampling frequency (kHz) */ - opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */ -) -{ - opus_int frame_length, ret = 0; - - silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); - silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 ); - - /* New (sub)frame length */ - psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz ); - frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length ); - - /* Initialize resampler when switching internal or external sampling frequency */ - if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) { - /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */ - ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz, 0 ); - - psDec->fs_API_hz = fs_API_Hz; - } - - if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) { - if( fs_kHz == 8 ) { - if( psDec->nb_subfr == MAX_NB_SUBFR ) { - psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; - } else { - psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; - } - } else { - if( psDec->nb_subfr == MAX_NB_SUBFR ) { - psDec->pitch_contour_iCDF = silk_pitch_contour_iCDF; - } else { - psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; - } - } - if( psDec->fs_kHz != fs_kHz ) { - psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); - if( fs_kHz == 8 || fs_kHz == 12 ) { - psDec->LPC_order = MIN_LPC_ORDER; - psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB; - } else { - psDec->LPC_order = MAX_LPC_ORDER; - psDec->psNLSF_CB = &silk_NLSF_CB_WB; - } - if( fs_kHz == 16 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; - } else if( fs_kHz == 12 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; - } else if( fs_kHz == 8 ) { - psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; - } else { - /* unsupported sampling rate */ - silk_assert( 0 ); - } - psDec->first_frame_after_reset = 1; - psDec->lagPrev = 100; - psDec->LastGainIndex = 10; - psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY; - silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf)); - silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) ); - } - - psDec->fs_kHz = fs_kHz; - psDec->frame_length = frame_length; - } - - /* Check that settings are valid */ - silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH ); - - return ret; -} - diff --git a/thirdparty/opus/silk/define.h b/thirdparty/opus/silk/define.h deleted file mode 100644 index 19c9b00e25..0000000000 --- a/thirdparty/opus/silk/define.h +++ /dev/null @@ -1,235 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_DEFINE_H -#define SILK_DEFINE_H - -#include "errors.h" -#include "typedef.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Max number of encoder channels (1/2) */ -#define ENCODER_NUM_CHANNELS 2 -/* Number of decoder channels (1/2) */ -#define DECODER_NUM_CHANNELS 2 - -#define MAX_FRAMES_PER_PACKET 3 - -/* Limits on bitrate */ -#define MIN_TARGET_RATE_BPS 5000 -#define MAX_TARGET_RATE_BPS 80000 -#define TARGET_RATE_TAB_SZ 8 - -/* LBRR thresholds */ -#define LBRR_NB_MIN_RATE_BPS 12000 -#define LBRR_MB_MIN_RATE_BPS 14000 -#define LBRR_WB_MIN_RATE_BPS 16000 - -/* DTX settings */ -#define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */ -#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */ - -/* Maximum sampling frequency */ -#define MAX_FS_KHZ 16 -#define MAX_API_FS_KHZ 48 - -/* Signal types */ -#define TYPE_NO_VOICE_ACTIVITY 0 -#define TYPE_UNVOICED 1 -#define TYPE_VOICED 2 - -/* Conditional coding types */ -#define CODE_INDEPENDENTLY 0 -#define CODE_INDEPENDENTLY_NO_LTP_SCALING 1 -#define CODE_CONDITIONALLY 2 - -/* Settings for stereo processing */ -#define STEREO_QUANT_TAB_SIZE 16 -#define STEREO_QUANT_SUB_STEPS 5 -#define STEREO_INTERP_LEN_MS 8 /* must be even */ -#define STEREO_RATIO_SMOOTH_COEF 0.01 /* smoothing coef for signal norms and stereo width */ - -/* Range of pitch lag estimates */ -#define PITCH_EST_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */ -#define PITCH_EST_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */ - -/* Maximum number of subframes */ -#define MAX_NB_SUBFR 4 - -/* Number of samples per frame */ -#define LTP_MEM_LENGTH_MS 20 -#define SUB_FRAME_LENGTH_MS 5 -#define MAX_SUB_FRAME_LENGTH ( SUB_FRAME_LENGTH_MS * MAX_FS_KHZ ) -#define MAX_FRAME_LENGTH_MS ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR ) -#define MAX_FRAME_LENGTH ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ ) - -/* Milliseconds of lookahead for pitch analysis */ -#define LA_PITCH_MS 2 -#define LA_PITCH_MAX ( LA_PITCH_MS * MAX_FS_KHZ ) - -/* Order of LPC used in find pitch */ -#define MAX_FIND_PITCH_LPC_ORDER 16 - -/* Length of LPC window used in find pitch */ -#define FIND_PITCH_LPC_WIN_MS ( 20 + (LA_PITCH_MS << 1) ) -#define FIND_PITCH_LPC_WIN_MS_2_SF ( 10 + (LA_PITCH_MS << 1) ) -#define FIND_PITCH_LPC_WIN_MAX ( FIND_PITCH_LPC_WIN_MS * MAX_FS_KHZ ) - -/* Milliseconds of lookahead for noise shape analysis */ -#define LA_SHAPE_MS 5 -#define LA_SHAPE_MAX ( LA_SHAPE_MS * MAX_FS_KHZ ) - -/* Maximum length of LPC window used in noise shape analysis */ -#define SHAPE_LPC_WIN_MAX ( 15 * MAX_FS_KHZ ) - -/* dB level of lowest gain quantization level */ -#define MIN_QGAIN_DB 2 -/* dB level of highest gain quantization level */ -#define MAX_QGAIN_DB 88 -/* Number of gain quantization levels */ -#define N_LEVELS_QGAIN 64 -/* Max increase in gain quantization index */ -#define MAX_DELTA_GAIN_QUANT 36 -/* Max decrease in gain quantization index */ -#define MIN_DELTA_GAIN_QUANT -4 - -/* Quantization offsets (multiples of 4) */ -#define OFFSET_VL_Q10 32 -#define OFFSET_VH_Q10 100 -#define OFFSET_UVL_Q10 100 -#define OFFSET_UVH_Q10 240 - -#define QUANT_LEVEL_ADJUST_Q10 80 - -/* Maximum numbers of iterations used to stabilize an LPC vector */ -#define MAX_LPC_STABILIZE_ITERATIONS 16 -#define MAX_PREDICTION_POWER_GAIN 1e4f -#define MAX_PREDICTION_POWER_GAIN_AFTER_RESET 1e2f - -#define MAX_LPC_ORDER 16 -#define MIN_LPC_ORDER 10 - -/* Find Pred Coef defines */ -#define LTP_ORDER 5 - -/* LTP quantization settings */ -#define NB_LTP_CBKS 3 - -/* Flag to use harmonic noise shaping */ -#define USE_HARM_SHAPING 1 - -/* Max LPC order of noise shaping filters */ -#define MAX_SHAPE_LPC_ORDER 16 - -#define HARM_SHAPE_FIR_TAPS 3 - -/* Maximum number of delayed decision states */ -#define MAX_DEL_DEC_STATES 4 - -#define LTP_BUF_LENGTH 512 -#define LTP_MASK ( LTP_BUF_LENGTH - 1 ) - -#define DECISION_DELAY 32 -#define DECISION_DELAY_MASK ( DECISION_DELAY - 1 ) - -/* Number of subframes for excitation entropy coding */ -#define SHELL_CODEC_FRAME_LENGTH 16 -#define LOG2_SHELL_CODEC_FRAME_LENGTH 4 -#define MAX_NB_SHELL_BLOCKS ( MAX_FRAME_LENGTH / SHELL_CODEC_FRAME_LENGTH ) - -/* Number of rate levels, for entropy coding of excitation */ -#define N_RATE_LEVELS 10 - -/* Maximum sum of pulses per shell coding frame */ -#define SILK_MAX_PULSES 16 - -#define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */ - -#if( MAX_LPC_ORDER > DECISION_DELAY ) -# define NSQ_LPC_BUF_LENGTH MAX_LPC_ORDER -#else -# define NSQ_LPC_BUF_LENGTH DECISION_DELAY -#endif - -/***************************/ -/* Voice activity detector */ -/***************************/ -#define VAD_N_BANDS 4 - -#define VAD_INTERNAL_SUBFRAMES_LOG2 2 -#define VAD_INTERNAL_SUBFRAMES ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 ) - -#define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 1024 /* Must be < 4096 */ -#define VAD_NOISE_LEVELS_BIAS 50 - -/* Sigmoid settings */ -#define VAD_NEGATIVE_OFFSET_Q5 128 /* sigmoid is 0 at -128 */ -#define VAD_SNR_FACTOR_Q16 45000 - -/* smoothing for SNR measurement */ -#define VAD_SNR_SMOOTH_COEF_Q18 4096 - -/* Size of the piecewise linear cosine approximation table for the LSFs */ -#define LSF_COS_TAB_SZ_FIX 128 - -/******************/ -/* NLSF quantizer */ -/******************/ -#define NLSF_W_Q 2 -#define NLSF_VQ_MAX_VECTORS 32 -#define NLSF_VQ_MAX_SURVIVORS 32 -#define NLSF_QUANT_MAX_AMPLITUDE 4 -#define NLSF_QUANT_MAX_AMPLITUDE_EXT 10 -#define NLSF_QUANT_LEVEL_ADJ 0.1 -#define NLSF_QUANT_DEL_DEC_STATES_LOG2 2 -#define NLSF_QUANT_DEL_DEC_STATES ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 ) - -/* Transition filtering for mode switching */ -#define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/ -#define TRANSITION_NB 3 /* Hardcoded in tables */ -#define TRANSITION_NA 2 /* Hardcoded in tables */ -#define TRANSITION_INT_NUM 5 /* Hardcoded in tables */ -#define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) -#define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) ) - -/* BWE factors to apply after packet loss */ -#define BWE_AFTER_LOSS_Q16 63570 - -/* Defines for CN generation */ -#define CNG_BUF_MASK_MAX 255 /* 2^floor(log2(MAX_FRAME_LENGTH))-1 */ -#define CNG_GAIN_SMTH_Q16 4634 /* 0.25^(1/4) */ -#define CNG_NLSF_SMTH_Q16 16348 /* 0.25 */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/enc_API.c b/thirdparty/opus/silk/enc_API.c deleted file mode 100644 index f8060286db..0000000000 --- a/thirdparty/opus/silk/enc_API.c +++ /dev/null @@ -1,563 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#include "define.h" -#include "API.h" -#include "control.h" -#include "typedef.h" -#include "stack_alloc.h" -#include "structs.h" -#include "tuning_parameters.h" -#ifdef FIXED_POINT -#include "main_FIX.h" -#else -#include "main_FLP.h" -#endif - -/***************************************/ -/* Read control structure from encoder */ -/***************************************/ -static opus_int silk_QueryEncoder( /* O Returns error code */ - const void *encState, /* I State */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -); - -/****************************************/ -/* Encoder functions */ -/****************************************/ - -opus_int silk_Get_Encoder_Size( /* O Returns error code */ - opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ -) -{ - opus_int ret = SILK_NO_ERROR; - - *encSizeBytes = sizeof( silk_encoder ); - - return ret; -} - -/*************************/ -/* Init or Reset encoder */ -/*************************/ -opus_int silk_InitEncoder( /* O Returns error code */ - void *encState, /* I/O State */ - int arch, /* I Run-time architecture */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -) -{ - silk_encoder *psEnc; - opus_int n, ret = SILK_NO_ERROR; - - psEnc = (silk_encoder *)encState; - - /* Reset encoder */ - silk_memset( psEnc, 0, sizeof( silk_encoder ) ); - for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { - if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { - silk_assert( 0 ); - } - } - - psEnc->nChannelsAPI = 1; - psEnc->nChannelsInternal = 1; - - /* Read control structure */ - if( ret += silk_QueryEncoder( encState, encStatus ) ) { - silk_assert( 0 ); - } - - return ret; -} - -/***************************************/ -/* Read control structure from encoder */ -/***************************************/ -static opus_int silk_QueryEncoder( /* O Returns error code */ - const void *encState, /* I State */ - silk_EncControlStruct *encStatus /* O Encoder Status */ -) -{ - opus_int ret = SILK_NO_ERROR; - silk_encoder_state_Fxx *state_Fxx; - silk_encoder *psEnc = (silk_encoder *)encState; - - state_Fxx = psEnc->state_Fxx; - - encStatus->nChannelsAPI = psEnc->nChannelsAPI; - encStatus->nChannelsInternal = psEnc->nChannelsInternal; - encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz; - encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz; - encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz; - encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz; - encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms; - encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps; - encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc; - encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity; - encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC; - encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX; - encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR; - encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); - encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch; - encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0; - - return ret; -} - - -/**************************/ -/* Encode frame with Silk */ -/**************************/ -/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ -/* encControl->payloadSize_ms is set to */ -opus_int silk_Encode( /* O Returns error code */ - void *encState, /* I/O State */ - silk_EncControlStruct *encControl, /* I Control status */ - const opus_int16 *samplesIn, /* I Speech sample input vector */ - opus_int nSamplesIn, /* I Number of samples in input vector */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ - const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ -) -{ - opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; - opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; - opus_int nSamplesFromInput = 0, nSamplesFromInputMax; - opus_int speech_act_thr_for_switch_Q8; - opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; - silk_encoder *psEnc = ( silk_encoder * )encState; - VARDECL( opus_int16, buf ); - opus_int transition, curr_block, tot_blocks; - SAVE_STACK; - - if (encControl->reducedDependency) - { - psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; - psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; - } - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; - - /* Check values in encoder control structure */ - if( ( ret = check_control_input( encControl ) ) != 0 ) { - silk_assert( 0 ); - RESTORE_STACK; - return ret; - } - - encControl->switchReady = 0; - - if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { - /* Mono -> Stereo transition: init state of second channel and stereo state */ - ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); - silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); - silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); - psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; - psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; - psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; - psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; - psEnc->sStereo.width_prev_Q14 = 0; - psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); - if( psEnc->nChannelsAPI == 2 ) { - silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); - silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); - } - } - - transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); - - psEnc->nChannelsAPI = encControl->nChannelsAPI; - psEnc->nChannelsInternal = encControl->nChannelsInternal; - - nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); - tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; - curr_block = 0; - if( prefillFlag ) { - /* Only accept input length of 10 ms */ - if( nBlocksOf10ms != 1 ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; - } - /* Reset Encoder */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); - silk_assert( !ret ); - } - tmp_payloadSize_ms = encControl->payloadSize_ms; - encControl->payloadSize_ms = 10; - tmp_complexity = encControl->complexity; - encControl->complexity = 0; - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; - psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; - } - } else { - /* Only accept input lengths that are a multiple of 10 ms */ - if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; - } - /* Make sure no more than one packet can be produced */ - if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { - silk_assert( 0 ); - RESTORE_STACK; - return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; - } - } - - TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 ); - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - /* Force the side channel to the same rate as the mid */ - opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; - if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { - silk_assert( 0 ); - RESTORE_STACK; - return ret; - } - if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { - for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { - psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; - } - } - psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; - } - silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); - - /* Input buffering/resampling and encoding */ - nSamplesToBufferMax = - 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; - nSamplesFromInputMax = - silk_DIV32_16( nSamplesToBufferMax * - psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, - psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); - ALLOC( buf, nSamplesFromInputMax, opus_int16 ); - while( 1 ) { - nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; - nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); - nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); - /* Resample and write to buffer */ - if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { - opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; - for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n ]; - } - /* Making sure to start both resamplers from the same state when switching from mono to stereo */ - if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { - silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); - } - - ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; - - nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; - nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); - for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n + 1 ]; - } - ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; - } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { - /* Combine left and right channels before resampling */ - for( n = 0; n < nSamplesFromInput; n++ ) { - sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]; - buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); - } - ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - /* On the first mono frame, average the results for the two resampler states */ - if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { - ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { - psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = - silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] - + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); - } - } - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; - } else { - silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); - silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); - ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; - } - - samplesIn += nSamplesFromInput * encControl->nChannelsAPI; - nSamplesIn -= nSamplesFromInput; - - /* Default */ - psEnc->allowBandwidthSwitch = 0; - - /* Silk encoder */ - if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { - /* Enough data in input buffer, so encode */ - silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); - silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); - - /* Deal with LBRR data */ - if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { - /* Create space at start of payload for VAD and FEC flags */ - opus_uint8 iCDF[ 2 ] = { 0, 0 }; - iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); - ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); - - /* Encode any LBRR data from previous packet */ - /* Encode LBRR flags */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - LBRR_symbol = 0; - for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { - LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); - } - psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; - if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { - ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); - } - } - - /* Code LBRR indices and excitation signals */ - for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { - opus_int condCoding; - - if( encControl->nChannelsInternal == 2 && n == 0 ) { - silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); - /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ - if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { - silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); - } - } - /* Use conditional coding if previous frame available */ - if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { - condCoding = CODE_CONDITIONALLY; - } else { - condCoding = CODE_INDEPENDENTLY; - } - silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); - silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, - psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); - } - } - } - - /* Reset LBRR flags */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); - } - - psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); - } - - silk_HP_variable_cutoff( psEnc->state_Fxx ); - - /* Total target bits for packet */ - nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); - /* Subtract bits used for LBRR */ - if( !prefillFlag ) { - nBits -= psEnc->nBitsUsedLBRR; - } - /* Divide by number of uncoded frames left in packet */ - nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); - /* Convert to bits/second */ - if( encControl->payloadSize_ms == 10 ) { - TargetRate_bps = silk_SMULBB( nBits, 100 ); - } else { - TargetRate_bps = silk_SMULBB( nBits, 50 ); - } - /* Subtract fraction of bits in excess of target in previous frames and packets */ - TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); - if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { - /* Compare actual vs target bits so far in this packet */ - opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; - TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); - } - /* Never exceed input bitrate */ - TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); - - /* Convert Left/Right to Mid/Side */ - if( encControl->nChannelsInternal == 2 ) { - silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], - psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], - MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, - psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); - if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { - /* Reset side channel encoder memory for first frame with side coding */ - if( psEnc->prev_decode_only_middle == 1 ) { - silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); - silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); - silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); - silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); - silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); - psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; - psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; - psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; - psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; - psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; - psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; - } - silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] ); - } else { - psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; - } - if( !prefillFlag ) { - silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); - if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { - silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); - } - } - } else { - /* Buffering */ - silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); - } - silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] ); - - /* Encode */ - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - opus_int maxBits, useCBR; - - /* Handling rate constraints */ - maxBits = encControl->maxBits; - if( tot_blocks == 2 && curr_block == 0 ) { - maxBits = maxBits * 3 / 5; - } else if( tot_blocks == 3 ) { - if( curr_block == 0 ) { - maxBits = maxBits * 2 / 5; - } else if( curr_block == 1 ) { - maxBits = maxBits * 3 / 4; - } - } - useCBR = encControl->useCBR && curr_block == tot_blocks - 1; - - if( encControl->nChannelsInternal == 1 ) { - channelRate_bps = TargetRate_bps; - } else { - channelRate_bps = MStargetRates_bps[ n ]; - if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { - useCBR = 0; - /* Give mid up to 1/2 of the max bits for that frame */ - maxBits -= encControl->maxBits / ( tot_blocks * 2 ); - } - } - - if( channelRate_bps > 0 ) { - opus_int condCoding; - - silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); - - /* Use independent coding if no previous frame available */ - if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { - condCoding = CODE_INDEPENDENTLY; - } else if( n > 0 && psEnc->prev_decode_only_middle ) { - /* If we skipped a side frame in this packet, we don't - need LTP scaling; the LTP state is well-defined. */ - condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; - } else { - condCoding = CODE_CONDITIONALLY; - } - if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { - silk_assert( 0 ); - } - } - psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; - psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; - psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; - } - psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; - - /* Insert VAD and FEC flags at beginning of bitstream */ - if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { - flags = 0; - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { - flags = silk_LSHIFT( flags, 1 ); - flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; - } - flags = silk_LSHIFT( flags, 1 ); - flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; - } - if( !prefillFlag ) { - ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); - } - - /* Return zero bytes if all channels DTXed */ - if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { - *nBytesOut = 0; - } - - psEnc->nBitsExceeded += *nBytesOut * 8; - psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); - psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); - - /* Update flag indicating if bandwidth switching is allowed */ - speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), - SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); - if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { - psEnc->allowBandwidthSwitch = 1; - psEnc->timeSinceSwitchAllowed_ms = 0; - } else { - psEnc->allowBandwidthSwitch = 0; - psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; - } - } - - if( nSamplesIn == 0 ) { - break; - } - } else { - break; - } - curr_block++; - } - - psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; - - encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; - encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; - encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); - encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; - if( prefillFlag ) { - encControl->payloadSize_ms = tmp_payloadSize_ms; - encControl->complexity = tmp_complexity; - for( n = 0; n < encControl->nChannelsInternal; n++ ) { - psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; - psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; - } - } - - RESTORE_STACK; - return ret; -} - diff --git a/thirdparty/opus/silk/encode_indices.c b/thirdparty/opus/silk/encode_indices.c deleted file mode 100644 index 666c8c0b13..0000000000 --- a/thirdparty/opus/silk/encode_indices.c +++ /dev/null @@ -1,181 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Encode side-information parameters to payload */ -void silk_encode_indices( - silk_encoder_state *psEncC, /* I/O Encoder state */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i, k, typeOffset; - opus_int encode_absolute_lagIndex, delta_lagIndex; - opus_int16 ec_ix[ MAX_LPC_ORDER ]; - opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; - const SideInfoIndices *psIndices; - - if( encode_LBRR ) { - psIndices = &psEncC->indices_LBRR[ FrameIndex ]; - } else { - psIndices = &psEncC->indices; - } - - /*******************************************/ - /* Encode signal type and quantizer offset */ - /*******************************************/ - typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType; - silk_assert( typeOffset >= 0 && typeOffset < 6 ); - silk_assert( encode_LBRR == 0 || typeOffset >= 2 ); - if( encode_LBRR || typeOffset >= 2 ) { - ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 ); - } else { - ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 ); - } - - /****************/ - /* Encode gains */ - /****************/ - /* first subframe */ - if( condCoding == CODE_CONDITIONALLY ) { - /* conditional coding */ - silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); - ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 ); - } else { - /* independent coding, in two stages: MSB bits followed by 3 LSBs */ - silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN ); - ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 ); - ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 ); - } - - /* remaining subframes */ - for( i = 1; i < psEncC->nb_subfr; i++ ) { - silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); - ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 ); - } - - /****************/ - /* Encode NLSFs */ - /****************/ - ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 ); - silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] ); - silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder ); - for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) { - if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) { - ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); - } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) { - ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); - } else { - ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); - } - } - - /* Encode NLSF interpolation factor */ - if( psEncC->nb_subfr == MAX_NB_SUBFR ) { - silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 ); - ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 ); - } - - if( psIndices->signalType == TYPE_VOICED ) - { - /*********************/ - /* Encode pitch lags */ - /*********************/ - /* lag index */ - encode_absolute_lagIndex = 1; - if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) { - /* Delta Encoding */ - delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex; - if( delta_lagIndex < -8 || delta_lagIndex > 11 ) { - delta_lagIndex = 0; - } else { - delta_lagIndex = delta_lagIndex + 9; - encode_absolute_lagIndex = 0; /* Only use delta */ - } - silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 ); - ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 ); - } - if( encode_absolute_lagIndex ) { - /* Absolute encoding */ - opus_int32 pitch_high_bits, pitch_low_bits; - pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); - pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); - silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 ); - silk_assert( pitch_high_bits < 32 ); - ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 ); - ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 ); - } - psEncC->ec_prevLagIndex = psIndices->lagIndex; - - /* Countour index */ - silk_assert( psIndices->contourIndex >= 0 ); - silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 4 ) || - ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) || - ( psIndices->contourIndex < 12 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 2 ) || - ( psIndices->contourIndex < 3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) ); - ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 ); - - /********************/ - /* Encode LTP gains */ - /********************/ - /* PERIndex value */ - silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 ); - ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 ); - - /* Codebook Indices */ - for( k = 0; k < psEncC->nb_subfr; k++ ) { - silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) ); - ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 ); - } - - /**********************/ - /* Encode LTP scaling */ - /**********************/ - if( condCoding == CODE_INDEPENDENTLY ) { - silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 ); - ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 ); - } - silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 ); - } - - psEncC->ec_prevSignalType = psIndices->signalType; - - /***************/ - /* Encode seed */ - /***************/ - silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 ); - ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/encode_pulses.c b/thirdparty/opus/silk/encode_pulses.c deleted file mode 100644 index ab00264f99..0000000000 --- a/thirdparty/opus/silk/encode_pulses.c +++ /dev/null @@ -1,206 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/*********************************************/ -/* Encode quantization indices of excitation */ -/*********************************************/ - -static OPUS_INLINE opus_int combine_and_check( /* return ok */ - opus_int *pulses_comb, /* O */ - const opus_int *pulses_in, /* I */ - opus_int max_pulses, /* I max value for sum of pulses */ - opus_int len /* I number of output values */ -) -{ - opus_int k, sum; - - for( k = 0; k < len; k++ ) { - sum = pulses_in[ 2 * k ] + pulses_in[ 2 * k + 1 ]; - if( sum > max_pulses ) { - return 1; - } - pulses_comb[ k ] = sum; - } - - return 0; -} - -/* Encode quantization indices of excitation */ -void silk_encode_pulses( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I quantOffsetType */ - opus_int8 pulses[], /* I quantization indices */ - const opus_int frame_length /* I Frame length */ -) -{ - opus_int i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0; - opus_int32 abs_q, minSumBits_Q5, sumBits_Q5; - VARDECL( opus_int, abs_pulses ); - VARDECL( opus_int, sum_pulses ); - VARDECL( opus_int, nRshifts ); - opus_int pulses_comb[ 8 ]; - opus_int *abs_pulses_ptr; - const opus_int8 *pulses_ptr; - const opus_uint8 *cdf_ptr; - const opus_uint8 *nBits_ptr; - SAVE_STACK; - - silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/ - - /****************************/ - /* Prepare for shell coding */ - /****************************/ - /* Calculate number of shell blocks */ - silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); - iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); - if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { - silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ - iter++; - silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8)); - } - - /* Take the absolute value of the pulses */ - ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int ); - silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) ); - for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) { - abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] ); - abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] ); - abs_pulses[i+2] = ( opus_int )silk_abs( pulses[ i + 2 ] ); - abs_pulses[i+3] = ( opus_int )silk_abs( pulses[ i + 3 ] ); - } - - /* Calc sum pulses per shell code frame */ - ALLOC( sum_pulses, iter, opus_int ); - ALLOC( nRshifts, iter, opus_int ); - abs_pulses_ptr = abs_pulses; - for( i = 0; i < iter; i++ ) { - nRshifts[ i ] = 0; - - while( 1 ) { - /* 1+1 -> 2 */ - scale_down = combine_and_check( pulses_comb, abs_pulses_ptr, silk_max_pulses_table[ 0 ], 8 ); - /* 2+2 -> 4 */ - scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 1 ], 4 ); - /* 4+4 -> 8 */ - scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 2 ], 2 ); - /* 8+8 -> 16 */ - scale_down += combine_and_check( &sum_pulses[ i ], pulses_comb, silk_max_pulses_table[ 3 ], 1 ); - - if( scale_down ) { - /* We need to downscale the quantization signal */ - nRshifts[ i ]++; - for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { - abs_pulses_ptr[ k ] = silk_RSHIFT( abs_pulses_ptr[ k ], 1 ); - } - } else { - /* Jump out of while(1) loop and go to next shell coding frame */ - break; - } - } - abs_pulses_ptr += SHELL_CODEC_FRAME_LENGTH; - } - - /**************/ - /* Rate level */ - /**************/ - /* find rate level that leads to fewest bits for coding of pulses per block info */ - minSumBits_Q5 = silk_int32_MAX; - for( k = 0; k < N_RATE_LEVELS - 1; k++ ) { - nBits_ptr = silk_pulses_per_block_BITS_Q5[ k ]; - sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ]; - for( i = 0; i < iter; i++ ) { - if( nRshifts[ i ] > 0 ) { - sumBits_Q5 += nBits_ptr[ SILK_MAX_PULSES + 1 ]; - } else { - sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ]; - } - } - if( sumBits_Q5 < minSumBits_Q5 ) { - minSumBits_Q5 = sumBits_Q5; - RateLevelIndex = k; - } - } - ec_enc_icdf( psRangeEnc, RateLevelIndex, silk_rate_levels_iCDF[ signalType >> 1 ], 8 ); - - /***************************************************/ - /* Sum-Weighted-Pulses Encoding */ - /***************************************************/ - cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ]; - for( i = 0; i < iter; i++ ) { - if( nRshifts[ i ] == 0 ) { - ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 ); - } else { - ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, cdf_ptr, 8 ); - for( k = 0; k < nRshifts[ i ] - 1; k++ ) { - ec_enc_icdf( psRangeEnc, SILK_MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); - } - ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); - } - } - - /******************/ - /* Shell Encoding */ - /******************/ - for( i = 0; i < iter; i++ ) { - if( sum_pulses[ i ] > 0 ) { - silk_shell_encoder( psRangeEnc, &abs_pulses[ i * SHELL_CODEC_FRAME_LENGTH ] ); - } - } - - /****************/ - /* LSB Encoding */ - /****************/ - for( i = 0; i < iter; i++ ) { - if( nRshifts[ i ] > 0 ) { - pulses_ptr = &pulses[ i * SHELL_CODEC_FRAME_LENGTH ]; - nLS = nRshifts[ i ] - 1; - for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { - abs_q = (opus_int8)silk_abs( pulses_ptr[ k ] ); - for( j = nLS; j > 0; j-- ) { - bit = silk_RSHIFT( abs_q, j ) & 1; - ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 ); - } - bit = abs_q & 1; - ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 ); - } - } - } - - /****************/ - /* Encode signs */ - /****************/ - silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/errors.h b/thirdparty/opus/silk/errors.h deleted file mode 100644 index 45070800f2..0000000000 --- a/thirdparty/opus/silk/errors.h +++ /dev/null @@ -1,98 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_ERRORS_H -#define SILK_ERRORS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/******************/ -/* Error messages */ -/******************/ -#define SILK_NO_ERROR 0 - -/**************************/ -/* Encoder error messages */ -/**************************/ - -/* Input length is not a multiple of 10 ms, or length is longer than the packet length */ -#define SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES -101 - -/* Sampling frequency not 8000, 12000 or 16000 Hertz */ -#define SILK_ENC_FS_NOT_SUPPORTED -102 - -/* Packet size not 10, 20, 40, or 60 ms */ -#define SILK_ENC_PACKET_SIZE_NOT_SUPPORTED -103 - -/* Allocated payload buffer too short */ -#define SILK_ENC_PAYLOAD_BUF_TOO_SHORT -104 - -/* Loss rate not between 0 and 100 percent */ -#define SILK_ENC_INVALID_LOSS_RATE -105 - -/* Complexity setting not valid, use 0...10 */ -#define SILK_ENC_INVALID_COMPLEXITY_SETTING -106 - -/* Inband FEC setting not valid, use 0 or 1 */ -#define SILK_ENC_INVALID_INBAND_FEC_SETTING -107 - -/* DTX setting not valid, use 0 or 1 */ -#define SILK_ENC_INVALID_DTX_SETTING -108 - -/* CBR setting not valid, use 0 or 1 */ -#define SILK_ENC_INVALID_CBR_SETTING -109 - -/* Internal encoder error */ -#define SILK_ENC_INTERNAL_ERROR -110 - -/* Internal encoder error */ -#define SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR -111 - -/**************************/ -/* Decoder error messages */ -/**************************/ - -/* Output sampling frequency lower than internal decoded sampling frequency */ -#define SILK_DEC_INVALID_SAMPLING_FREQUENCY -200 - -/* Payload size exceeded the maximum allowed 1024 bytes */ -#define SILK_DEC_PAYLOAD_TOO_LARGE -201 - -/* Payload has bit errors */ -#define SILK_DEC_PAYLOAD_ERROR -202 - -/* Payload has bit errors */ -#define SILK_DEC_INVALID_FRAME_SIZE -203 - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c b/thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c deleted file mode 100644 index 5574e7069f..0000000000 --- a/thirdparty/opus/silk/fixed/LTP_analysis_filter_FIX.c +++ /dev/null @@ -1,90 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -void silk_LTP_analysis_filter_FIX( - opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */ - const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */ - const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */ -) -{ - const opus_int16 *x_ptr, *x_lag_ptr; - opus_int16 Btmp_Q14[ LTP_ORDER ]; - opus_int16 *LTP_res_ptr; - opus_int k, i; - opus_int32 LTP_est; - - x_ptr = x; - LTP_res_ptr = LTP_res; - for( k = 0; k < nb_subfr; k++ ) { - - x_lag_ptr = x_ptr - pitchL[ k ]; - - Btmp_Q14[ 0 ] = LTPCoef_Q14[ k * LTP_ORDER ]; - Btmp_Q14[ 1 ] = LTPCoef_Q14[ k * LTP_ORDER + 1 ]; - Btmp_Q14[ 2 ] = LTPCoef_Q14[ k * LTP_ORDER + 2 ]; - Btmp_Q14[ 3 ] = LTPCoef_Q14[ k * LTP_ORDER + 3 ]; - Btmp_Q14[ 4 ] = LTPCoef_Q14[ k * LTP_ORDER + 4 ]; - - /* LTP analysis FIR filter */ - for( i = 0; i < subfr_length + pre_length; i++ ) { - LTP_res_ptr[ i ] = x_ptr[ i ]; - - /* Long-term prediction */ - LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 1 ], Btmp_Q14[ 1 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ 0 ], Btmp_Q14[ 2 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -1 ], Btmp_Q14[ 3 ] ); - LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ -2 ], Btmp_Q14[ 4 ] ); - - LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/ - - /* Subtract long-term prediction */ - LTP_res_ptr[ i ] = (opus_int16)silk_SAT16( (opus_int32)x_ptr[ i ] - LTP_est ); - - /* Scale residual */ - LTP_res_ptr[ i ] = silk_SMULWB( invGains_Q16[ k ], LTP_res_ptr[ i ] ); - - x_lag_ptr++; - } - - /* Update pointers */ - LTP_res_ptr += subfr_length + pre_length; - x_ptr += subfr_length; - } -} - diff --git a/thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c deleted file mode 100644 index 3dcedef891..0000000000 --- a/thirdparty/opus/silk/fixed/LTP_scale_ctrl_FIX.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -/* Calculation of LTP state scaling */ -void silk_LTP_scale_ctrl_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int round_loss; - - if( condCoding == CODE_INDEPENDENTLY ) { - /* Only scale if first frame in packet */ - round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; - psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( - silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 ); - } else { - /* Default is minimum scaling */ - psEnc->sCmn.indices.LTP_scaleIndex = 0; - } - psEncCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ]; -} diff --git a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c b/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c deleted file mode 100644 index 4502b7130e..0000000000 --- a/thirdparty/opus/silk/fixed/apply_sine_window_FIX.c +++ /dev/null @@ -1,101 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Apply sine window to signal vector. */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -/* Every other sample is linearly interpolated, for speed. */ -/* Window length must be between 16 and 120 (incl) and a multiple of 4. */ - -/* Matlab code for table: - for k=16:9*4:16+2*9*4, fprintf(' %7.d,', -round(65536*pi ./ (k:4:k+8*4))); fprintf('\n'); end -*/ -static const opus_int16 freq_table_Q16[ 27 ] = { - 12111, 9804, 8235, 7100, 6239, 5565, 5022, 4575, 4202, - 3885, 3612, 3375, 3167, 2984, 2820, 2674, 2542, 2422, - 2313, 2214, 2123, 2038, 1961, 1889, 1822, 1760, 1702, -}; - -void silk_apply_sine_window( - opus_int16 px_win[], /* O Pointer to windowed signal */ - const opus_int16 px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -) -{ - opus_int k, f_Q16, c_Q16; - opus_int32 S0_Q16, S1_Q16; - - silk_assert( win_type == 1 || win_type == 2 ); - - /* Length must be in a range from 16 to 120 and a multiple of 4 */ - silk_assert( length >= 16 && length <= 120 ); - silk_assert( ( length & 3 ) == 0 ); - - /* Frequency */ - k = ( length >> 2 ) - 4; - silk_assert( k >= 0 && k <= 26 ); - f_Q16 = (opus_int)freq_table_Q16[ k ]; - - /* Factor used for cosine approximation */ - c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 ); - silk_assert( c_Q16 >= -32768 ); - - /* initialize state */ - if( win_type == 1 ) { - /* start from 0 */ - S0_Q16 = 0; - /* approximation of sin(f) */ - S1_Q16 = f_Q16 + silk_RSHIFT( length, 3 ); - } else { - /* start from 1 */ - S0_Q16 = ( (opus_int32)1 << 16 ); - /* approximation of cos(f) */ - S1_Q16 = ( (opus_int32)1 << 16 ) + silk_RSHIFT( c_Q16, 1 ) + silk_RSHIFT( length, 4 ); - } - - /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */ - /* 4 samples at a time */ - for( k = 0; k < length; k += 4 ) { - px_win[ k ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k ] ); - px_win[ k + 1 ] = (opus_int16)silk_SMULWB( S1_Q16, px[ k + 1] ); - S0_Q16 = silk_SMULWB( S1_Q16, c_Q16 ) + silk_LSHIFT( S1_Q16, 1 ) - S0_Q16 + 1; - S0_Q16 = silk_min( S0_Q16, ( (opus_int32)1 << 16 ) ); - - px_win[ k + 2 ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k + 2] ); - px_win[ k + 3 ] = (opus_int16)silk_SMULWB( S0_Q16, px[ k + 3 ] ); - S1_Q16 = silk_SMULWB( S0_Q16, c_Q16 ) + silk_LSHIFT( S0_Q16, 1 ) - S1_Q16; - S1_Q16 = silk_min( S1_Q16, ( (opus_int32)1 << 16 ) ); - } -} diff --git a/thirdparty/opus/silk/fixed/autocorr_FIX.c b/thirdparty/opus/silk/fixed/autocorr_FIX.c deleted file mode 100644 index de95c98693..0000000000 --- a/thirdparty/opus/silk/fixed/autocorr_FIX.c +++ /dev/null @@ -1,48 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "celt_lpc.h" - -/* Compute autocorrelation */ -void silk_autocorr( - opus_int32 *results, /* O Result (length correlationCount) */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *inputData, /* I Input data to correlate */ - const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount, /* I Number of correlation taps to compute */ - int arch /* I Run-time architecture */ -) -{ - opus_int corrCount; - corrCount = silk_min_int( inputDataSize, correlationCount ); - *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch); -} diff --git a/thirdparty/opus/silk/fixed/burg_modified_FIX.c b/thirdparty/opus/silk/fixed/burg_modified_FIX.c deleted file mode 100644 index 17d0e0993c..0000000000 --- a/thirdparty/opus/silk/fixed/burg_modified_FIX.c +++ /dev/null @@ -1,280 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "define.h" -#include "tuning_parameters.h" -#include "pitch.h" - -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ - -#define QA 25 -#define N_BITS_HEAD_ROOM 2 -#define MIN_RSHIFTS -16 -#define MAX_RSHIFTS (32 - QA) - -/* Compute reflection coefficients from input signal */ -void silk_burg_modified_c( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -) -{ - opus_int k, n, s, lz, rshifts, reached_max_gain; - opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; - const opus_int16 *x_ptr; - opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; - opus_int64 C0_64; - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); - - /* Compute autocorrelations, added over subframes */ - C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); - lz = silk_CLZ64(C0_64); - rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; - if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; - if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; - - if (rshifts > 0) { - C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); - } else { - C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); - } - - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - int i; - opus_int32 d; - x_ptr = x + s * subfr_length; - celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); - for( n = 1; n < D + 1; n++ ) { - for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) - d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); - xcorr[ n - 1 ] += d; - } - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); - } - } - } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - - invGain_Q30 = (opus_int32)1 << 30; - reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - if( rshifts > -2 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ - for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp_QA = Af_QA[ k ]; - tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ - tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ - } - tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ - tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ - for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ - for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32), - but they cancel each other and the real result seems to always fit in a 32-bit - signed integer. This was determined experimentally, not theoretically (unfortunately). */ - tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ - } - tmp1 = -tmp1; /* Q17 */ - tmp2 = -tmp2; /* Q17 */ - for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, - silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, - silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ - } - } - } - - /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ - tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ - num = 0; /* Q( -rshifts ) */ - nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ - for( k = 0; k < n; k++ ) { - Atmp_QA = Af_QA[ k ]; - lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; - lz = silk_min( 32 - QA, lz ); - Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ - - tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), - Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ - } - CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ - CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ - num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ - num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ - - /* Calculate the next order reflection (parcor) coefficient */ - if( silk_abs( num ) < nrg ) { - rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); - } else { - rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; - } - - /* Update inverse prediction gain */ - tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); - if( tmp1 <= minInvGain_Q30 ) { - /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ - rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ - if( rc_Q31 > 0 ) { - /* Newton-Raphson iteration */ - rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ - rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ - if( num < 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc_Q31 = -rc_Q31; - } - } - invGain_Q30 = minInvGain_Q30; - reached_max_gain = 1; - } else { - invGain_Q30 = tmp1; - } - - /* Update the AR coefficients */ - for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af_QA[ k ]; /* QA */ - tmp2 = Af_QA[ n - k - 1 ]; /* QA */ - Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ - Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ - } - Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ - - if( reached_max_gain ) { - /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ - for( k = n + 1; k < D; k++ ) { - Af_QA[ k ] = 0; - } - break; - } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; /* Q( -rshifts ) */ - tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ - CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - } - } - - if( reached_max_gain ) { - for( k = 0; k < D; k++ ) { - /* Scale coefficients */ - A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); - } - /* Subtract energy of preceding samples from C0 */ - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts); - } - } - /* Approximate residual energy */ - *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); - *res_nrg_Q = -rshifts; - } else { - /* Return residual energy */ - nrg = CAf[ 0 ]; /* Q( -rshifts ) */ - tmp1 = (opus_int32)1 << 16; /* Q16 */ - for( k = 0; k < D; k++ ) { - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ - nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ - tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ - A_Q16[ k ] = -Atmp1; - } - *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ - *res_nrg_Q = -rshifts; - } -} diff --git a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c b/thirdparty/opus/silk/fixed/corrMatrix_FIX.c deleted file mode 100644 index c1d437c785..0000000000 --- a/thirdparty/opus/silk/fixed/corrMatrix_FIX.c +++ /dev/null @@ -1,158 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/********************************************************************** - * Correlation Matrix Computations for LS estimate. - **********************************************************************/ - -#include "main_FIX.h" - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int16 *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ - const opus_int rshifts, /* I Right shifts of correlations */ - int arch /* I Run-time architecture */ -) -{ - opus_int lag, i; - const opus_int16 *ptr1, *ptr2; - opus_int32 inner_prod; - - ptr1 = &x[ order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */ - ptr2 = t; - /* Calculate X'*t */ - if( rshifts > 0 ) { - /* Right shifting used */ - for( lag = 0; lag < order; lag++ ) { - inner_prod = 0; - for( i = 0; i < L; i++ ) { - inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); - } - Xt[ lag ] = inner_prod; /* X[:,lag]'*t */ - ptr1--; /* Go to next column of X */ - } - } else { - silk_assert( rshifts == 0 ); - for( lag = 0; lag < order; lag++ ) { - Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); /* X[:,lag]'*t */ - ptr1--; /* Go to next column of X */ - } - } -} - -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - const opus_int head_room, /* I Desired headroom */ - opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int *rshifts, /* I/O Right shifts of correlations */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, j, lag, rshifts_local, head_room_rshifts; - opus_int32 energy; - const opus_int16 *ptr1, *ptr2; - - /* Calculate energy to find shift used to fit in 32 bits */ - silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); - /* Add shifts to get the desired head room */ - head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); - - energy = silk_RSHIFT32( energy, head_room_rshifts ); - rshifts_local += head_room_rshifts; - - /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ - /* Remove contribution of first order - 1 samples */ - for( i = 0; i < order - 1; i++ ) { - energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); - } - if( rshifts_local < *rshifts ) { - /* Adjust energy */ - energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); - rshifts_local = *rshifts; - } - - /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ - /* Fill out the diagonal of the correlation matrix */ - matrix_ptr( XX, 0, 0, order ) = energy; - ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ - for( j = 1; j < order; j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); - matrix_ptr( XX, j, j, order ) = energy; - } - - ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ - /* Calculate the remaining elements of the correlation matrix */ - if( rshifts_local > 0 ) { - /* Right shifting used */ - for( lag = 1; lag < order; lag++ ) { - /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ - energy = 0; - for( i = 0; i < L; i++ ) { - energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); - } - /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ - matrix_ptr( XX, lag, 0, order ) = energy; - matrix_ptr( XX, 0, lag, order ) = energy; - for( j = 1; j < ( order - lag ); j++ ) { - energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); - energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); - matrix_ptr( XX, lag + j, j, order ) = energy; - matrix_ptr( XX, j, lag + j, order ) = energy; - } - ptr2--; /* Update pointer to first sample of next column (lag) in X */ - } - } else { - for( lag = 1; lag < order; lag++ ) { - /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ - energy = silk_inner_prod_aligned( ptr1, ptr2, L, arch ); - matrix_ptr( XX, lag, 0, order ) = energy; - matrix_ptr( XX, 0, lag, order ) = energy; - /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ - for( j = 1; j < ( order - lag ); j++ ) { - energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) ); - energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] ); - matrix_ptr( XX, lag + j, j, order ) = energy; - matrix_ptr( XX, j, lag + j, order ) = energy; - } - ptr2--;/* Update pointer to first sample of next column (lag) in X */ - } - } - *rshifts = rshifts_local; -} - diff --git a/thirdparty/opus/silk/fixed/encode_frame_FIX.c b/thirdparty/opus/silk/fixed/encode_frame_FIX.c deleted file mode 100644 index 5ef44b03fc..0000000000 --- a/thirdparty/opus/silk/fixed/encode_frame_FIX.c +++ /dev/null @@ -1,387 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int32 xfw_Q3[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -); - -void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ -) -{ - /****************************/ - /* Voice Activity Detection */ - /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - - /**************************************************/ - /* Convert speech activity into VAD and DTX flags */ - /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { - psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; - psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.inDTX = 0; - } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; - psEnc->sCmn.inDTX = 0; - } - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; - } else { - psEnc->sCmn.noSpeechCounter = 0; - psEnc->sCmn.inDTX = 0; - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - } -} - -/****************/ -/* Encode frame */ -/****************/ -opus_int silk_encode_frame_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -) -{ - silk_encoder_control_FIX sEncCtrl; - opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; - opus_int16 *x_frame; - ec_enc sRangeEnc_copy, sRangeEnc_copy2; - silk_nsq_state sNSQ_copy, sNSQ_copy2; - opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; - opus_int32 gainsID, gainsID_lower, gainsID_upper; - opus_int16 gainMult_Q8; - opus_int16 ec_prevLagIndex_copy; - opus_int ec_prevSignalType_copy; - opus_int8 LastGainIndex_copy2; - SAVE_STACK; - - /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ - LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; - - psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; - - /**************************************************************/ - /* Set up Input Pointers, and insert frame in input buffer */ - /*************************************************************/ - /* start of frame to encode */ - x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; - - /***************************************/ - /* Ensure smooth bandwidth transitions */ - /***************************************/ - silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); - - /*******************************************/ - /* Copy new frame to front of input buffer */ - /*******************************************/ - silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) ); - - if( !psEnc->sCmn.prefillFlag ) { - VARDECL( opus_int32, xfw_Q3 ); - VARDECL( opus_int16, res_pitch ); - VARDECL( opus_uint8, ec_buf_copy ); - opus_int16 *res_pitch_frame; - - ALLOC( res_pitch, - psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length - + psEnc->sCmn.ltp_mem_length, opus_int16 ); - /* start of pitch LPC residual frame */ - res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; - - /*****************************************/ - /* Find pitch lags, initial LPC analysis */ - /*****************************************/ - silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); - - /************************/ - /* Noise shape analysis */ - /************************/ - silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch ); - - /***************************************************/ - /* Find linear prediction coefficients (LPC + LTP) */ - /***************************************************/ - silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); - - /****************************************/ - /* Process gains */ - /****************************************/ - silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding ); - - /*****************************************/ - /* Prefiltering for noise shaper */ - /*****************************************/ - ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 ); - silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame ); - - /****************************************/ - /* Low Bitrate Redundant Encoding */ - /****************************************/ - silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding ); - - /* Loop over quantizer and entropy coding to control bitrate */ - maxIter = 6; - gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); - found_lower = 0; - found_upper = 0; - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - gainsID_lower = -1; - gainsID_upper = -1; - /* Copy part of the input state */ - silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); - silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - seed_copy = psEnc->sCmn.indices.Seed; - ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; - ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; - ALLOC( ec_buf_copy, 1275, opus_uint8 ); - for( iter = 0; ; iter++ ) { - if( gainsID == gainsID_lower ) { - nBits = nBits_lower; - } else if( gainsID == gainsID_upper ) { - nBits = nBits_upper; - } else { - /* Restore part of the input state */ - if( iter > 0 ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); - psEnc->sCmn.indices.Seed = seed_copy; - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - } - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, - psEnc->sCmn.arch ); - } else { - silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, - sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, - sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, - psEnc->sCmn.arch); - } - - /****************************************/ - /* Encode Parameters */ - /****************************************/ - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - /****************************************/ - /* Encode Excitation Signal */ - /****************************************/ - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { - break; - } - } - - if( iter == maxIter ) { - if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { - /* Restore output state from earlier iteration that did meet the bitrate budget */ - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - silk_assert( sRangeEnc_copy2.offs <= 1275 ); - silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); - psEnc->sShape.LastGainIndex = LastGainIndex_copy2; - } - break; - } - - if( nBits > maxBits ) { - if( found_lower == 0 && iter >= 2 ) { - /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ - sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 ); - found_upper = 0; - gainsID_upper = -1; - } else { - found_upper = 1; - nBits_upper = nBits; - gainMult_upper = gainMult_Q8; - gainsID_upper = gainsID; - } - } else if( nBits < maxBits - 5 ) { - found_lower = 1; - nBits_lower = nBits; - gainMult_lower = gainMult_Q8; - if( gainsID != gainsID_lower ) { - gainsID_lower = gainsID; - /* Copy part of the output state */ - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - silk_assert( psRangeEnc->offs <= 1275 ); - silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); - silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; - } - } else { - /* Within 5 bits of budget: close enough */ - break; - } - - if( ( found_lower & found_upper ) == 0 ) { - /* Adjust gain according to high-rate rate/distortion curve */ - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); - if( nBits > maxBits ) { - gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); - } - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); - } else { - /* Adjust gain by interpolating */ - gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower ); - /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ - if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); - } else - if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); - } - - /* Quantize gains */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16, - &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Unique identifier of gains vector */ - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - } - } - - /* Update input buffer */ - silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], - ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) ); - - /* Exit without entropy coding */ - if( psEnc->sCmn.prefillFlag ) { - /* No payload */ - *pnBytesOut = 0; - RESTORE_STACK; - return ret; - } - - /* Parameters needed for next frame */ - psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; - - /****************************************/ - /* Finalize payload */ - /****************************************/ - psEnc->sCmn.first_frame_after_reset = 0; - /* Payload size */ - *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); - - RESTORE_STACK; - return ret; -} - -/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ - const opus_int32 xfw_Q3[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -) -{ - opus_int32 TempGains_Q16[ MAX_NB_SUBFR ]; - SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; - silk_nsq_state sNSQ_LBRR; - - /*******************************************/ - /* Control use of inband LBRR */ - /*******************************************/ - if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { - psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - - /* Copy noise shaping quantizer state and quantization indices from regular encoding */ - silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); - - /* Save original gains */ - silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - - if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { - /* First frame in packet or previous frame not LBRR coded */ - psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; - - /* Increase Gains to get target LBRR rate */ - psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases; - psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); - } - - /* Decode to get gains in sync with decoder */ - /* Overwrite unquantized gains with quantized gains */ - silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices, - &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, - psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); - } else { - silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, - psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, - psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, - psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); - } - - /* Restore original gains */ - silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - } -} diff --git a/thirdparty/opus/silk/fixed/find_LPC_FIX.c b/thirdparty/opus/silk/fixed/find_LPC_FIX.c deleted file mode 100644 index e11cdc86e6..0000000000 --- a/thirdparty/opus/silk/fixed/find_LPC_FIX.c +++ /dev/null @@ -1,151 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Finds LPC vector from correlations, and converts to NLSF */ -void silk_find_LPC_FIX( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const opus_int16 x[], /* I Input signal */ - const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ -) -{ - opus_int k, subfr_length; - opus_int32 a_Q16[ MAX_LPC_ORDER ]; - opus_int isInterpLower, shift; - opus_int32 res_nrg0, res_nrg1; - opus_int rshift0, rshift1; - - /* Used only for LSF interpolation */ - opus_int32 a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg; - opus_int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q; - opus_int16 a_tmp_Q12[ MAX_LPC_ORDER ]; - opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; - SAVE_STACK; - - subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; - - /* Default: no interpolation */ - psEncC->indices.NLSFInterpCoef_Q2 = 4; - - /* Burg AR analysis for the full frame */ - silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch ); - - if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { - VARDECL( opus_int16, LPC_res ); - - /* Optimal solution for last 10 ms */ - silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch ); - - /* subtract residual energy here, as that's easier than adding it to the */ - /* residual energy of the first 10 ms in each iteration of the search below */ - shift = res_tmp_nrg_Q - res_nrg_Q; - if( shift >= 0 ) { - if( shift < 32 ) { - res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift ); - } - } else { - silk_assert( shift > -32 ); - res_nrg = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg; - res_nrg_Q = res_tmp_nrg_Q; - } - - /* Convert to NLSFs */ - silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder ); - - ALLOC( LPC_res, 2 * subfr_length, opus_int16 ); - - /* Search over interpolation indices to find the one with lowest residual energy */ - for( k = 3; k >= 0; k-- ) { - /* Interpolate NLSFs for first half */ - silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); - - /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); - - /* Calculate residual energy with NLSF interpolation */ - silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder, psEncC->arch ); - - silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ); - silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ); - - /* Add subframe energies from first half frame */ - shift = rshift0 - rshift1; - if( shift >= 0 ) { - res_nrg1 = silk_RSHIFT( res_nrg1, shift ); - res_nrg_interp_Q = -rshift0; - } else { - res_nrg0 = silk_RSHIFT( res_nrg0, -shift ); - res_nrg_interp_Q = -rshift1; - } - res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 ); - - /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */ - shift = res_nrg_interp_Q - res_nrg_Q; - if( shift >= 0 ) { - if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) { - isInterpLower = silk_TRUE; - } else { - isInterpLower = silk_FALSE; - } - } else { - if( -shift < 32 ) { - if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) { - isInterpLower = silk_TRUE; - } else { - isInterpLower = silk_FALSE; - } - } else { - isInterpLower = silk_FALSE; - } - } - - /* Determine whether current interpolated NLSFs are best so far */ - if( isInterpLower == silk_TRUE ) { - /* Interpolation has lower residual energy */ - res_nrg = res_nrg_interp; - res_nrg_Q = res_nrg_interp_Q; - psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; - } - } - } - - if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { - /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ - silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder ); - } - - silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/find_LTP_FIX.c b/thirdparty/opus/silk/fixed/find_LTP_FIX.c deleted file mode 100644 index 1314a28137..0000000000 --- a/thirdparty/opus/silk/fixed/find_LTP_FIX.c +++ /dev/null @@ -1,245 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "tuning_parameters.h" - -/* Head room for correlations */ -#define LTP_CORRS_HEAD_ROOM 2 - -void silk_fit_LTP( - opus_int32 LTP_coefs_Q16[ LTP_ORDER ], - opus_int16 LTP_coefs_Q14[ LTP_ORDER ] -); - -void silk_find_LTP_FIX( - opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ - const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ - const opus_int subfr_length, /* I subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset, /* I number of samples in LTP memory */ - opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, k, lshift; - const opus_int16 *r_ptr, *lag_ptr; - opus_int16 *b_Q14_ptr; - - opus_int32 regu; - opus_int32 *WLTP_ptr; - opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; - opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; - - opus_int32 temp32, denom32; - opus_int extra_shifts; - opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; - opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; - opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; - opus_int32 wd, m_Q12; - - b_Q14_ptr = b_Q14; - WLTP_ptr = WLTP; - r_ptr = &r_lpc[ mem_offset ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - - silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ - - /* Assure headroom */ - LZs = silk_CLZ32( rr[k] ); - if( LZs < LTP_CORRS_HEAD_ROOM ) { - rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); - rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); - } - corr_rshifts[ k ] = rr_shifts; - silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ], arch ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ - - /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ - silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ], arch ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ - if( corr_rshifts[ k ] > rr_shifts ) { - rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ - } - silk_assert( rr[ k ] >= 0 ); - - regu = 1; - regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); - regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); - regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); - silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); - - silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ - - /* Limit and store in Q14 */ - silk_fit_LTP( b_Q16, b_Q14_ptr ); - - /* Calculate residual energy */ - nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ - - /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ - extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); - denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ - silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ - denom32 = silk_max( denom32, 1 ); - silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ - temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ - temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ - - /* Limit temp such that the below scaling never wraps around */ - WLTP_max = 0; - for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { - WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); - } - lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ - silk_assert( 26 - 18 + lshift >= 0 ); - if( 26 - 18 + lshift < 31 ) { - temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); - } - - silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ - - w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ - silk_assert( w[k] >= 0 ); - - r_ptr += subfr_length; - b_Q14_ptr += LTP_ORDER; - WLTP_ptr += LTP_ORDER * LTP_ORDER; - } - - maxRshifts = 0; - for( k = 0; k < nb_subfr; k++ ) { - maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); - } - - /* Compute LTP coding gain */ - if( LTPredCodGain_Q7 != NULL ) { - LPC_LTP_res_nrg = 0; - LPC_res_nrg = 0; - silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ - for( k = 0; k < nb_subfr; k++ ) { - LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ - LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ - } - LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ - - div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); - *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); - - silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); - } - - /* smoothing */ - /* d = sum( B, 1 ); */ - b_Q14_ptr = b_Q14; - for( k = 0; k < nb_subfr; k++ ) { - d_Q14[ k ] = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - d_Q14[ k ] += b_Q14_ptr[ i ]; - } - b_Q14_ptr += LTP_ORDER; - } - - /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ - - /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ - max_abs_d_Q14 = 0; - max_w_bits = 0; - for( k = 0; k < nb_subfr; k++ ) { - max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); - /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ - /* Find bits needed in Q( 18 - maxRshifts ) */ - max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); - } - - /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ - silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); - - /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ - extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; - - /* Subtract what we got available; bits in output var plus maxRshifts */ - extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ - extra_shifts = silk_max_int( extra_shifts, 0 ); - - maxRshifts_wxtra = maxRshifts + extra_shifts; - - temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ - wd = 0; - for( k = 0; k < nb_subfr; k++ ) { - /* w has at least 2 bits of headroom so no overflow should happen */ - temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ - wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ - } - m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); - - b_Q14_ptr = b_Q14; - for( k = 0; k < nb_subfr; k++ ) { - /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ - if( 2 - corr_rshifts[k] > 0 ) { - temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); - } else { - temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); - } - - g_Q26 = silk_MUL( - silk_DIV32( - SILK_FIX_CONST( LTP_SMOOTHING, 26 ), - silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ - silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ - - temp32 = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ - temp32 += delta_b_Q14[ i ]; /* Q14 */ - } - temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ - for( i = 0; i < LTP_ORDER; i++ ) { - b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); - } - b_Q14_ptr += LTP_ORDER; - } -} - -void silk_fit_LTP( - opus_int32 LTP_coefs_Q16[ LTP_ORDER ], - opus_int16 LTP_coefs_Q14[ LTP_ORDER ] -) -{ - opus_int i; - - for( i = 0; i < LTP_ORDER; i++ ) { - LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) ); - } -} diff --git a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c b/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c deleted file mode 100644 index b8440a8247..0000000000 --- a/thirdparty/opus/silk/fixed/find_pitch_lags_FIX.c +++ /dev/null @@ -1,145 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Find pitch lags */ -void silk_find_pitch_lags_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int16 res[], /* O residual */ - const opus_int16 x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int buf_len, i, scale; - opus_int32 thrhld_Q13, res_nrg; - const opus_int16 *x_buf, *x_buf_ptr; - VARDECL( opus_int16, Wsig ); - opus_int16 *Wsig_ptr; - opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; - opus_int16 rc_Q15[ MAX_FIND_PITCH_LPC_ORDER ]; - opus_int32 A_Q24[ MAX_FIND_PITCH_LPC_ORDER ]; - opus_int16 A_Q12[ MAX_FIND_PITCH_LPC_ORDER ]; - SAVE_STACK; - - /******************************************/ - /* Set up buffer lengths etc based on Fs */ - /******************************************/ - buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; - - /* Safety check */ - silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); - - x_buf = x - psEnc->sCmn.ltp_mem_length; - - /*************************************/ - /* Estimate LPC AR coefficients */ - /*************************************/ - - /* Calculate windowed signal */ - - ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 ); - - /* First LA_LTP samples */ - x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; - Wsig_ptr = Wsig; - silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); - - /* Middle un - windowed samples */ - Wsig_ptr += psEnc->sCmn.la_pitch; - x_buf_ptr += psEnc->sCmn.la_pitch; - silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); - - /* Last LA_LTP samples */ - Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); - silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); - - /* Calculate autocorrelation sequence */ - silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); - - /* Add white noise, as fraction of energy */ - auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1; - - /* Calculate the reflection coefficients using schur */ - res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Prediction gain */ - psEncCtrl->predGain_Q16 = silk_DIV32_varQ( auto_corr[ 0 ], silk_max_int( res_nrg, 1 ), 16 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a( A_Q24, rc_Q15, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Convert From 32 bit Q24 to 16 bit Q12 coefs */ - for( i = 0; i < psEnc->sCmn.pitchEstimationLPCOrder; i++ ) { - A_Q12[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( A_Q24[ i ], 12 ) ); - } - - /* Do BWE */ - silk_bwexpander( A_Q12, psEnc->sCmn.pitchEstimationLPCOrder, SILK_FIX_CONST( FIND_PITCH_BANDWIDTH_EXPANSION, 16 ) ); - - /*****************************************/ - /* LPC analysis filtering */ - /*****************************************/ - silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.arch ); - - if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { - /* Threshold for pitch estimator */ - thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 ); - thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder ); - thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 21 ), psEnc->sCmn.speech_activity_Q8 ); - thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15, 13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) ); - thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 14 ), psEnc->sCmn.input_tilt_Q15 ); - thrhld_Q13 = silk_SAT16( thrhld_Q13 ); - - /*****************************************/ - /* Call pitch estimator */ - /*****************************************/ - if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, - &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16, - (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, - psEnc->sCmn.arch) == 0 ) - { - psEnc->sCmn.indices.signalType = TYPE_VOICED; - } else { - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - } - } else { - silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); - psEnc->sCmn.indices.lagIndex = 0; - psEnc->sCmn.indices.contourIndex = 0; - psEnc->LTPCorr_Q15 = 0; - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c b/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c deleted file mode 100644 index d308e9cf5f..0000000000 --- a/thirdparty/opus/silk/fixed/find_pred_coefs_FIX.c +++ /dev/null @@ -1,148 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" - -void silk_find_pred_coefs_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - const opus_int16 res_pitch[], /* I Residual from pitch analysis */ - const opus_int16 x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i; - opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ]; - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; - const opus_int16 *x_ptr; - opus_int16 *x_pre_ptr; - VARDECL( opus_int16, LPC_in_pre ); - opus_int32 tmp, min_gain_Q16, minInvGain_Q30; - opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ]; - SAVE_STACK; - - /* weighting for weighted least squares */ - min_gain_Q16 = silk_int32_MAX >> 6; - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - min_gain_Q16 = silk_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] ); - } - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - /* Divide to Q16 */ - silk_assert( psEncCtrl->Gains_Q16[ i ] > 0 ); - /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ - invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 ); - - /* Ensure Wght_Q15 a minimum value 1 */ - invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 ); - - /* Square the inverted gains */ - silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) ); - tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] ); - Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 ); - - /* Invert the inverted and normalized gains */ - local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] ); - } - - ALLOC( LPC_in_pre, - psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder - + psEnc->sCmn.frame_length, opus_int16 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - VARDECL( opus_int32, WLTP ); - - /**********/ - /* VOICED */ - /**********/ - silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); - - ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); - - /* LTP analysis */ - silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, - res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, - psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift, psEnc->sCmn.arch ); - - /* Quantize LTP gain parameters */ - silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, - psEnc->sCmn.arch); - - /* Control LTP scaling */ - silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); - - /* Create LTP residual */ - silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14, - psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); - - } else { - /************/ - /* UNVOICED */ - /************/ - /* Create signal with prepended subframes, scaled by inverse gains */ - x_ptr = x - psEnc->sCmn.predictLPCOrder; - x_pre_ptr = LPC_in_pre; - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ], - psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); - x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; - x_ptr += psEnc->sCmn.subfr_length; - } - - silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) ); - psEncCtrl->LTPredCodGain_Q7 = 0; - psEnc->sCmn.sum_log_gain_Q7 = 0; - } - - /* Limit on total predictive coding gain */ - if( psEnc->sCmn.first_frame_after_reset ) { - minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 ); - } else { - minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */ - minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, - silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), - silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 ); - } - - /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ - silk_find_LPC_FIX( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain_Q30 ); - - /* Quantize LSFs */ - silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); - - /* Calculate residual energy using quantized LPC coefficients */ - silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains, - psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder, psEnc->sCmn.arch ); - - /* Copy to prediction struct for use in next frame for interpolation */ - silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/k2a_FIX.c b/thirdparty/opus/silk/fixed/k2a_FIX.c deleted file mode 100644 index 5fee599bcb..0000000000 --- a/thirdparty/opus/silk/fixed/k2a_FIX.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ - const opus_int32 order /* I Prediction order */ -) -{ - opus_int k, n; - opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; - - for( k = 0; k < order; k++ ) { - for( n = 0; n < k; n++ ) { - Atmp[ n ] = A_Q24[ n ]; - } - for( n = 0; n < k; n++ ) { - A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] ); - } - A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 ); - } -} diff --git a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c b/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c deleted file mode 100644 index 3b03987544..0000000000 --- a/thirdparty/opus/silk/fixed/k2a_Q16_FIX.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a_Q16( - opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ - const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ - const opus_int32 order /* I Prediction order */ -) -{ - opus_int k, n; - opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; - - for( k = 0; k < order; k++ ) { - for( n = 0; n < k; n++ ) { - Atmp[ n ] = A_Q24[ n ]; - } - for( n = 0; n < k; n++ ) { - A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] ); - } - A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 ); - } -} diff --git a/thirdparty/opus/silk/fixed/main_FIX.h b/thirdparty/opus/silk/fixed/main_FIX.h deleted file mode 100644 index 375b5eb32e..0000000000 --- a/thirdparty/opus/silk/fixed/main_FIX.h +++ /dev/null @@ -1,272 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MAIN_FIX_H -#define SILK_MAIN_FIX_H - -#include "SigProc_FIX.h" -#include "structs_FIX.h" -#include "control.h" -#include "main.h" -#include "PLC.h" -#include "debug.h" -#include "entenc.h" - -#ifndef FORCE_CPP_BUILD -#ifdef __cplusplus -extern "C" -{ -#endif -#endif - -#define silk_encoder_state_Fxx silk_encoder_state_FIX -#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX -#define silk_encode_frame_Fxx silk_encode_frame_FIX - -/*********************/ -/* Encoder Functions */ -/*********************/ - -/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ -void silk_HP_variable_cutoff( - silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ -); - -/* Encoder main function */ -void silk_encode_do_VAD_FIX( - silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ -); - -/* Encoder main function */ -opus_int silk_encode_frame_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ - opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -); - -/* Initializes the Silk encoder state */ -opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ - int arch /* I Run-time architecture */ -); - -/* Control the Silk encoder */ -opus_int silk_control_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl, /* I Control structure */ - const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ - const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ - const opus_int channelNb, /* I Channel number */ - const opus_int force_fs_kHz -); - -/****************/ -/* Prefiltering */ -/****************/ -void silk_prefilter_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ - opus_int32 xw_Q10[], /* O Weighted signal */ - const opus_int16 x[] /* I Speech signal */ -); - -void silk_warped_LPC_analysis_filter_FIX_c( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -); - - -/**************************/ -/* Noise shaping analysis */ -/**************************/ -/* Compute noise shaping coefficients and initial gain values */ -void silk_noise_shape_analysis_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ - const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ - int arch /* I Run-time architecture */ -); - -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FIX( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -); - -/* Calculation of LTP state scaling */ -void silk_LTP_scale_ctrl_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/**********************************************/ -/* Prediction Analysis */ -/**********************************************/ -/* Find pitch lags */ -void silk_find_pitch_lags_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - opus_int16 res[], /* O residual */ - const opus_int16 x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -); - -/* Find LPC and LTP coefficients */ -void silk_find_pred_coefs_FIX( - silk_encoder_state_FIX *psEnc, /* I/O encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ - const opus_int16 res_pitch[], /* I Residual from pitch analysis */ - const opus_int16 x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* LPC analysis */ -void silk_find_LPC_FIX( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const opus_int16 x[], /* I Input signal */ - const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ -); - -/* LTP analysis */ -void silk_find_LTP_FIX( - opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ - const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ - const opus_int subfr_length, /* I subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset, /* I number of samples in LTP memory */ - opus_int corr_rshifts[ MAX_NB_SUBFR ], /* O right shifts applied to correlations */ - int arch /* I Run-time architecture */ -); - -void silk_LTP_analysis_filter_FIX( - opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */ - const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */ - const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */ -); - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FIX( - opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */ - const opus_int16 x[], /* I Input signal */ - opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int LPC_order, /* I LPC order */ - int arch /* I Run-time architecture */ -); - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -opus_int32 silk_residual_energy16_covar_FIX( - const opus_int16 *c, /* I Prediction vector */ - const opus_int32 *wXX, /* I Correlation matrix */ - const opus_int32 *wXx, /* I Correlation vector */ - opus_int32 wxx, /* I Signal energy */ - opus_int D, /* I Dimension */ - opus_int cQ /* I Q value for c vector 0 - 15 */ -); - -/* Processing of gains */ -void silk_process_gains_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/******************/ -/* Linear Algebra */ -/******************/ -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - const opus_int head_room, /* I Desired headroom */ - opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ - opus_int *rshifts, /* I/O Right shifts of correlations */ - int arch /* I Run-time architecture */ -); - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FIX( - const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ - const opus_int16 *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vectors */ - const opus_int order, /* I Max lag for correlation */ - opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ - const opus_int rshifts, /* I Right shifts of correlations */ - int arch /* I Run-time architecture */ -); - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FIX( - opus_int32 *XX, /* I/O Correlation matrices */ - opus_int32 *xx, /* I/O Correlation values */ - opus_int32 noise, /* I Noise to add */ - opus_int D /* I Dimension of XX */ -); - -/* Solves Ax = b, assuming A is symmetric */ -void silk_solve_LDL_FIX( - opus_int32 *A, /* I Pointer to symetric square matrix A */ - opus_int M, /* I Size of matrix */ - const opus_int32 *b, /* I Pointer to b vector */ - opus_int32 *x_Q16 /* O Pointer to x solution vector */ -); - -#ifndef FORCE_CPP_BUILD -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* FORCE_CPP_BUILD */ -#endif /* SILK_MAIN_FIX_H */ diff --git a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h deleted file mode 100644 index c30481e437..0000000000 --- a/thirdparty/opus/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h +++ /dev/null @@ -1,336 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - - -/**************************************************************/ -/* Compute noise shaping coefficients and initial gain values */ -/**************************************************************/ -#define OVERRIDE_silk_noise_shape_analysis_FIX - -void silk_noise_shape_analysis_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ - const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ - int arch /* I Run-time architecture */ -) -{ - silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; - opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; - VARDECL( opus_int16, x_windowed ); - const opus_int16 *x_ptr, *pitch_res_ptr; - SAVE_STACK; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] - + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); - - /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ - psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - - SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); - - /* Reduce coding SNR during low speech activity */ - if( psEnc->sCmn.useCBR == 0 ) { - b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; - b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ - silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), - SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness_Q8 = 0; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); - energy_variation_Q7 = 0; - log_energy_prev_Q7 = 0; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); - nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ - - log_energy_Q7 = silk_lin2log( nrg ); - if( k > 0 ) { - energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); - } - log_energy_prev_Q7 = log_energy_Q7; - pitch_res_ptr += nSamples; - } - - psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - - SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), - silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); - delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), - SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); - BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); - BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); - } else { - warping_Q16 = 0; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); - - silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); - shift += flat_part; - silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), - SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); - - /* Calculate the reflection coefficients using schur */ - nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); - silk_assert( nrg >= 0 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); - - Qnrg = -scale; /* range: -12...30*/ - silk_assert( Qnrg >= -12 ); - silk_assert( Qnrg <= 30 ); - - /* Make sure that Qnrg is an even number */ - if( Qnrg & 1 ) { - Qnrg -= 1; - nrg >>= 1; - } - - tmp32 = silk_SQRT_APPROX( nrg ); - Qnrg >>= 1; /* range: -6...15*/ - - psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \ - silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) )); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - } - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); - silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); - - /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ - pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); - psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); - - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); - psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); - } - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity and put lower limit on gains */ - gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); - gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); - silk_assert( gain_mult_Q16 > 0 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); - } - - gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), - psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), - SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); - strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); - psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - } - silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ - Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - - silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), - silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); - } else { - b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - - silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); - psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; - } - Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), - psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); - - /* More harmonic boost for noisy input signals */ - HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, - SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), - SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), - psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), - silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); - } else { - HarmShapeGain_Q16 = 0; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < MAX_NB_SUBFR; k++ ) { - psShapeSt->HarmBoost_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->HarmShapeGain_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->Tilt_smth_Q16 = - silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - - psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); - psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); - psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h deleted file mode 100644 index 21b256885f..0000000000 --- a/thirdparty/opus/silk/fixed/mips/prefilter_FIX_mipsr1.h +++ /dev/null @@ -1,184 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ -#ifndef __PREFILTER_FIX_MIPSR1_H__ -#define __PREFILTER_FIX_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX -void silk_warped_LPC_analysis_filter_FIX( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order, /* I Filter order (even) */ - int arch -) -{ - opus_int n, i; - opus_int32 acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4; - opus_int32 state_cur, state_next; - - (void)arch; - - /* Order must be even */ - /* Length must be even */ - - silk_assert( ( order & 1 ) == 0 ); - silk_assert( ( length & 1 ) == 0 ); - - for( n = 0; n < length; n+=2 ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state_cur = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state_next = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - - - /* Output of lowpass section */ - tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 ); - /* Output of allpass section */ - tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); - state[ 1 ] = tmp4; - acc_Q22 = silk_RSHIFT( order, 1 ); - acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] ); - - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state_cur = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state_next = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - - - /* Output of allpass section */ - tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 ); - state[ i ] = tmp3; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 ); - state[ i + 1 ] = tmp4; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] ); - } - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - - state[ order ] = tmp3; - acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] ); - res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 ); - } -} - - - -/* Prefilter for finding Quantizer input signal */ -#define OVERRIDE_silk_prefilt_FIX -static inline void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ - opus_int Tilt_Q14, /* I Tilt shaping coeficient */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -) -{ - opus_int i, idx, LTP_shp_buf_idx; - opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; - opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; - opus_int16 *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; - sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; - - if( lag > 0 ) { - for( i = 0; i < length; i++ ) { - /* unrolled loop */ - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); - } - } - else - { - for( i = 0; i < length; i++ ) { - - n_LTP_Q12 = 0; - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 ); - } - } - - /* Copy temp variable back to state */ - P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; - P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} - -#endif /* __PREFILTER_FIX_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h b/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h deleted file mode 100644 index e803ef0fce..0000000000 --- a/thirdparty/opus/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h +++ /dev/null @@ -1,165 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ -#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -#undef QC -#define QC 10 - -#undef QS -#define QS 14 - -/* Autocorrelations for a warped frequency axis */ -#define OVERRIDE_silk_warped_autocorrelation_FIX -void silk_warped_autocorrelation_FIX( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - opus_int n, i, lsh; - opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0; - opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int64 temp64; - - opus_int32 val; - val = 2 * QS - QC; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - silk_assert( 2 * QS - QC >= 0 ); - - /* Loop over samples */ - for( n = 0; n < length; n=n+4 ) { - - tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - start_1 = tmp1_QS; - tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS ); - start_2 = tmp3_QS; - tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS ); - start_3 = tmp5_QS; - tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS ); - - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, start_1); - - tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp3_QS, start_2); - - tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 ); - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp5_QS, start_3); - - tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 ); - state_QS[ i ] = tmp7_QS; - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp7_QS, state_QS[0]); - - /* Output of allpass section */ - tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, start_1); - - tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp4_QS, start_2); - - tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 ); - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp6_QS, start_3); - - tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 ); - state_QS[ i + 1 ] = tmp8_QS; - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp8_QS, state_QS[ 0 ]); - - } - state_QS[ order ] = tmp7_QS; - - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, start_1); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp3_QS, start_2); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp5_QS, start_3); - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp7_QS, state_QS[ 0 ]); - } - - for(;n< length; n++ ) { - - tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - - /* Output of allpass section */ - tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - state_QS[ i ] = tmp1_QS; - corr_QC[ i ] = __builtin_mips_madd( corr_QC[ i ], tmp1_QS, state_QS[ 0 ]); - - /* Output of allpass section */ - tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - state_QS[ i + 1 ] = tmp2_QS; - corr_QC[ i+1 ] = __builtin_mips_madd( corr_QC[ i+1 ], tmp2_QS, state_QS[ 0 ]); - } - state_QS[ order ] = tmp1_QS; - corr_QC[ order ] = __builtin_mips_madd( corr_QC[ order ], tmp1_QS, state_QS[ 0 ]); - } - - temp64 = corr_QC[ 0 ]; - temp64 = __builtin_mips_shilo(temp64, val); - - lsh = silk_CLZ64( temp64 ) - 35; - lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); - *scale = -( QC + lsh ); - silk_assert( *scale >= -30 && *scale <= 12 ); - if( lsh >= 0 ) { - for( i = 0; i < order + 1; i++ ) { - temp64 = corr_QC[ i ]; - //temp64 = __builtin_mips_shilo(temp64, val); - temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); - corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); - } - } else { - for( i = 0; i < order + 1; i++ ) { - temp64 = corr_QC[ i ]; - //temp64 = __builtin_mips_shilo(temp64, val); - temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val); - corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) ); - } - } - - corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val); - - silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ -} -#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c b/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c deleted file mode 100644 index 22a89f75ae..0000000000 --- a/thirdparty/opus/silk/fixed/noise_shape_analysis_FIX.c +++ /dev/null @@ -1,451 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ -/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ -/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ -/* coefficient in an array of coefficients, for monic filters. */ -static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/ - const opus_int32 *coefs_Q24, - opus_int lambda_Q16, - opus_int order -) { - opus_int i; - opus_int32 gain_Q24; - - lambda_Q16 = -lambda_Q16; - gain_Q24 = coefs_Q24[ order - 1 ]; - for( i = order - 2; i >= 0; i-- ) { - gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 ); - } - gain_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 ); - return silk_INVERSE32_varQ( gain_Q24, 40 ); -} - -/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ -/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ -static OPUS_INLINE void limit_warped_coefs( - opus_int32 *coefs_syn_Q24, - opus_int32 *coefs_ana_Q24, - opus_int lambda_Q16, - opus_int32 limit_Q24, - opus_int order -) { - opus_int i, iter, ind = 0; - opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16; - opus_int32 nom_Q16, den_Q24; - - /* Convert to monic coefficients */ - lambda_Q16 = -lambda_Q16; - for( i = order - 1; i > 0; i-- ) { - coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); - coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); - } - lambda_Q16 = -lambda_Q16; - nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); - gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); - gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - for( i = 0; i < order; i++ ) { - coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); - coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); - } - - for( iter = 0; iter < 10; iter++ ) { - /* Find maximum absolute value */ - maxabs_Q24 = -1; - for( i = 0; i < order; i++ ) { - tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) ); - if( tmp > maxabs_Q24 ) { - maxabs_Q24 = tmp; - ind = i; - } - } - if( maxabs_Q24 <= limit_Q24 ) { - /* Coefficients are within range - done */ - return; - } - - /* Convert back to true warped coefficients */ - for( i = 1; i < order; i++ ) { - coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); - coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); - } - gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 ); - gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 ); - for( i = 0; i < order; i++ ) { - coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); - coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); - } - - /* Apply bandwidth expansion */ - chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ( - silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), - silk_MUL( maxabs_Q24, ind + 1 ), 22 ); - silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 ); - silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 ); - - /* Convert to monic warped coefficients */ - lambda_Q16 = -lambda_Q16; - for( i = order - 1; i > 0; i-- ) { - coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); - coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); - } - lambda_Q16 = -lambda_Q16; - nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); - gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); - gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); - for( i = 0; i < order; i++ ) { - coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); - coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); - } - } - silk_assert( 0 ); -} - -#if defined(MIPSr1_ASM) -#include "mips/noise_shape_analysis_FIX_mipsr1.h" -#endif - -/**************************************************************/ -/* Compute noise shaping coefficients and initial gain values */ -/**************************************************************/ -#ifndef OVERRIDE_silk_noise_shape_analysis_FIX -void silk_noise_shape_analysis_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ - const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ - int arch /* I Run-time architecture */ -) -{ - silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; - opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; - opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; - opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; - opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; - VARDECL( opus_int16, x_windowed ); - const opus_int16 *x_ptr, *pitch_res_ptr; - SAVE_STACK; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] - + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); - - /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ - psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - - SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); - - /* Reduce coding SNR during low speech activity */ - if( psEnc->sCmn.useCBR == 0 ) { - b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; - b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ - silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, - silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), - SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness_Q8 = 0; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); - energy_variation_Q7 = 0; - log_energy_prev_Q7 = 0; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); - nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ - - log_energy_Q7 = silk_lin2log( nrg ); - if( k > 0 ) { - energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); - } - log_energy_prev_Q7 = log_energy_Q7; - pitch_res_ptr += nSamples; - } - - psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - - SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); - BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), - silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); - delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), - SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); - BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); - BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); - } else { - warping_Q16 = 0; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); - - silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); - shift += flat_part; - silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), - SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); - - /* Calculate the reflection coefficients using schur */ - nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); - silk_assert( nrg >= 0 ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); - - Qnrg = -scale; /* range: -12...30*/ - silk_assert( Qnrg >= -12 ); - silk_assert( Qnrg <= 30 ); - - /* Make sure that Qnrg is an even number */ - if( Qnrg & 1 ) { - Qnrg -= 1; - nrg >>= 1; - } - - tmp32 = silk_SQRT_APPROX( nrg ); - Qnrg >>= 1; /* range: -6...15*/ - - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( tmp32, 16 - Qnrg ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { - psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; - } else { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - } - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); - silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); - nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); - - /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ - pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); - psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); - - /* Convert from Q24 to Q13 and store in int16 */ - for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { - psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); - psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); - } - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity and put lower limit on gains */ - gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); - gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); - silk_assert( gain_mult_Q16 > 0 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); - silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); - psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); - } - - gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), - psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), - SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); - strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); - psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - } - silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ - Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - - silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), - silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); - } else { - b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ - /* Pack two coefficients in one int32 */ - psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - - silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); - psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; - } - Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), - psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); - - /* More harmonic boost for noisy input signals */ - HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, - SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), - SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), - psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), - silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); - } else { - HarmShapeGain_Q16 = 0; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < MAX_NB_SUBFR; k++ ) { - psShapeSt->HarmBoost_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->HarmShapeGain_smth_Q16 = - silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - psShapeSt->Tilt_smth_Q16 = - silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); - - psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); - psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); - psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); - } - RESTORE_STACK; -} -#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */ diff --git a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c b/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c deleted file mode 100644 index 01bb9fc0a8..0000000000 --- a/thirdparty/opus/silk/fixed/pitch_analysis_core_FIX.c +++ /dev/null @@ -1,746 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/*********************************************************** -* Pitch analyser function -********************************************************** */ -#include "SigProc_FIX.h" -#include "pitch_est_defines.h" -#include "stack_alloc.h" -#include "debug.h" -#include "pitch.h" - -#define SCRATCH_SIZE 22 -#define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 ) -#define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 ) -#define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 ) -#define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 ) -#define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 ) -#define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 ) -#define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ ) -#define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) ) -#define D_COMP_MIN ( MIN_LAG_8KHZ - 3 ) -#define D_COMP_MAX ( MAX_LAG_8KHZ + 4 ) -#define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN ) - -typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ]; - -/************************************************************/ -/* Internally used functions */ -/************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ - const opus_int16 frame[], /* I vector to correlate */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of a 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -); - -static void silk_P_Ana_calc_energy_st3( - silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ - const opus_int16 frame[], /* I vector to calc energy in */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of one 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -); - -/*************************************************************/ -/* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ -/*************************************************************/ -opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O 4 pitch lag values */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ - const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I Sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I number of 5 ms subframes */ - int arch /* I Run-time architecture */ -) -{ - VARDECL( opus_int16, frame_8kHz ); - VARDECL( opus_int16, frame_4kHz ); - opus_int32 filt_state[ 6 ]; - const opus_int16 *input_frame_ptr; - opus_int i, k, d, j; - VARDECL( opus_int16, C ); - VARDECL( opus_int32, xcorr32 ); - const opus_int16 *target_ptr, *basis_ptr; - opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; - opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; - VARDECL( opus_int16, d_comp ); - opus_int32 sum, threshold, lag_counter; - opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; - opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new; - VARDECL( silk_pe_stage3_vals, energies_st3 ); - VARDECL( silk_pe_stage3_vals, cross_corr_st3 ); - opus_int frame_length, frame_length_8kHz, frame_length_4kHz; - opus_int sf_length; - opus_int min_lag; - opus_int max_lag; - opus_int32 contour_bias_Q15, diff; - opus_int nb_cbk_search, cbk_size; - opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; - const opus_int8 *Lag_CB_ptr; - SAVE_STACK; - /* Check for valid sampling frequency */ - silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); - - /* Check for valid complexity setting */ - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) ); - silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) ); - - /* Set up frame lengths max / min lag for the sampling frequency */ - frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; - frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; - frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; - sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; - min_lag = PE_MIN_LAG_MS * Fs_kHz; - max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; - - /* Resample from input sampled at Fs_kHz to 8 kHz */ - ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 ); - if( Fs_kHz == 16 ) { - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length ); - } else if( Fs_kHz == 12 ) { - silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); - silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length ); - } else { - silk_assert( Fs_kHz == 8 ); - silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) ); - } - - /* Decimate again to 4 kHz */ - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */ - ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 ); - silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz ); - - /* Low-pass filter */ - for( i = frame_length_4kHz - 1; i > 0; i-- ) { - frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); - } - - /******************************************************************************* - ** Scale 4 kHz signal down to prevent correlations measures from overflowing - ** find scaling as max scaling for each 8kHz(?) subframe - *******************************************************************************/ - - /* Inner product is calculated with different lengths, so scale for the worst case */ - silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz ); - if( shift > 0 ) { - shift = silk_RSHIFT( shift, 1 ); - for( i = 0; i < frame_length_4kHz; i++ ) { - frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift ); - } - } - - /****************************************************************************** - * FIRST STAGE, operating in 4 khz - ******************************************************************************/ - ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 ); - ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 ); - silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) ); - target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; - for( k = 0; k < nb_subfr >> 1; k++ ) { - /* Check that we are within range of the array */ - silk_assert( target_ptr >= frame_4kHz ); - silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - - basis_ptr = target_ptr - MIN_LAG_4KHZ; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - - celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); - - /* Calculate first vector products before loop */ - cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; - normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ); - normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) ); - normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); - - matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) = - (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ - - /* From now on normalizer is computed recursively */ - for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) { - basis_ptr--; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - - cross_corr = xcorr32[ MAX_LAG_4KHZ - d ]; - - /* Add contribution of new sample and remove contribution from oldest sample */ - normalizer = silk_ADD32( normalizer, - silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) - - silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) ); - - matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) = - (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ - } - /* Update target pointer */ - target_ptr += SF_LENGTH_8KHZ; - } - - /* Combine two subframes into single correlation measure and apply short-lag bias */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { - sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ) - + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */ - sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ - C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ - } - } else { - /* Only short-lag bias */ - for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { - sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */ - sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ - C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ - } - } - - /* Sort */ - length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 ); - silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); - silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ, - length_d_srch ); - - /* Escape if correlation is very low already here */ - Cmax = (opus_int)C[ 0 ]; /* Q14 */ - if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) { - silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); - *LTPCorr_Q15 = 0; - *lagIndex = 0; - *contourIndex = 0; - RESTORE_STACK; - return 1; - } - - threshold = silk_SMULWB( search_thres1_Q16, Cmax ); - for( i = 0; i < length_d_srch; i++ ) { - /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ - if( C[ i ] > threshold ) { - d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 ); - } else { - length_d_srch = i; - break; - } - } - silk_assert( length_d_srch > 0 ); - - ALLOC( d_comp, D_COMP_STRIDE, opus_int16 ); - for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) { - d_comp[ i - D_COMP_MIN ] = 0; - } - for( i = 0; i < length_d_srch; i++ ) { - d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1; - } - - /* Convolution */ - for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { - d_comp[ i - D_COMP_MIN ] += - d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ]; - } - - length_d_srch = 0; - for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) { - if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) { - d_srch[ length_d_srch ] = i; - length_d_srch++; - } - } - - /* Convolution */ - for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { - d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ] - + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ]; - } - - length_d_comp = 0; - for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) { - if( d_comp[ i - D_COMP_MIN ] > 0 ) { - d_comp[ length_d_comp ] = i - 2; - length_d_comp++; - } - } - - /********************************************************************************** - ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation - *************************************************************************************/ - - /****************************************************************************** - ** Scale signal down to avoid correlations measures from overflowing - *******************************************************************************/ - /* find scaling as max scaling for each subframe */ - silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz ); - if( shift > 0 ) { - shift = silk_RSHIFT( shift, 1 ); - for( i = 0; i < frame_length_8kHz; i++ ) { - frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift ); - } - } - - /********************************************************************************* - * Find energy of each subframe projected onto its history, for a range of delays - *********************************************************************************/ - silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) ); - - target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; - for( k = 0; k < nb_subfr; k++ ) { - - /* Check that we are within range of the array */ - silk_assert( target_ptr >= frame_8kHz ); - silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); - - energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 ); - for( j = 0; j < length_d_comp; j++ ) { - d = d_comp[ j ]; - basis_ptr = target_ptr - d; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_8kHz ); - silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); - - cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); - if( cross_corr > 0 ) { - energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); - matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = - (opus_int16)silk_DIV32_varQ( cross_corr, - silk_ADD32( energy_target, - energy_basis ), - 13 + 1 ); /* Q13 */ - } else { - matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0; - } - } - target_ptr += SF_LENGTH_8KHZ; - } - - /* search over lag range and lags codebook */ - /* scale factor for lag codebook, as a function of center lag */ - - CCmax = silk_int32_MIN; - CCmax_b = silk_int32_MIN; - - CBimax = 0; /* To avoid returning undefined lag values */ - lag = -1; /* To check if lag with strong enough correlation has been found */ - - if( prevLag > 0 ) { - if( Fs_kHz == 12 ) { - prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 ); - } else if( Fs_kHz == 16 ) { - prevLag = silk_RSHIFT( prevLag, 1 ); - } - prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag ); - } else { - prevLag_log2_Q7 = 0; - } - silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) ); - /* Set up stage 2 codebook based on number of subframes */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - cbk_size = PE_NB_CBKS_STAGE2_EXT; - Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; - if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { - /* If input is 8 khz use a larger codebook here because it is last stage */ - nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE2; - } - } else { - cbk_size = PE_NB_CBKS_STAGE2_10MS; - Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; - } - - for( k = 0; k < length_d_srch; k++ ) { - d = d_srch[ k ]; - for( j = 0; j < nb_cbk_search; j++ ) { - CC[ j ] = 0; - for( i = 0; i < nb_subfr; i++ ) { - opus_int d_subfr; - /* Try all codebooks */ - d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ); - CC[ j ] = CC[ j ] - + (opus_int32)matrix_ptr( C, i, - d_subfr - ( MIN_LAG_8KHZ - 2 ), - CSTRIDE_8KHZ ); - } - } - /* Find best codebook */ - CCmax_new = silk_int32_MIN; - CBimax_new = 0; - for( i = 0; i < nb_cbk_search; i++ ) { - if( CC[ i ] > CCmax_new ) { - CCmax_new = CC[ i ]; - CBimax_new = i; - } - } - - /* Bias towards shorter lags */ - lag_log2_Q7 = silk_lin2log( d ); /* Q7 */ - silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) ); - silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) ); - CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */ - - /* Bias towards previous lag */ - silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) ); - if( prevLag > 0 ) { - delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; - silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) ); - delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 ); - prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */ - prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) ); - CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ - } - - if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ - CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */ - silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */ - ) { - CCmax_b = CCmax_new_b; - CCmax = CCmax_new; - lag = d; - CBimax = CBimax_new; - } - } - - if( lag == -1 ) { - /* No suitable candidate found */ - silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); - *LTPCorr_Q15 = 0; - *lagIndex = 0; - *contourIndex = 0; - RESTORE_STACK; - return 1; - } - - /* Output normalized correlation */ - *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 ); - silk_assert( *LTPCorr_Q15 >= 0 ); - - if( Fs_kHz > 8 ) { - VARDECL( opus_int16, scratch_mem ); - /***************************************************************************/ - /* Scale input signal down to avoid correlations measures from overflowing */ - /***************************************************************************/ - /* find scaling as max scaling for each subframe */ - silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); - ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); - if( shift > 0 ) { - /* Move signal to scratch mem because the input signal should be unchanged */ - shift = silk_RSHIFT( shift, 1 ); - for( i = 0; i < frame_length; i++ ) { - scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); - } - input_frame_ptr = scratch_mem; - } else { - input_frame_ptr = frame; - } - - /* Search in original signal */ - - CBimax_old = CBimax; - /* Compensate for decimation */ - silk_assert( lag == silk_SAT16( lag ) ); - if( Fs_kHz == 12 ) { - lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 ); - } else if( Fs_kHz == 16 ) { - lag = silk_LSHIFT( lag, 1 ); - } else { - lag = silk_SMULBB( lag, 3 ); - } - - lag = silk_LIMIT_int( lag, min_lag, max_lag ); - start_lag = silk_max_int( lag - 2, min_lag ); - end_lag = silk_min_int( lag + 2, max_lag ); - lag_new = lag; /* to avoid undefined lag */ - CBimax = 0; /* to avoid undefined lag */ - - CCmax = silk_int32_MIN; - /* pitch lags according to second stage */ - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ]; - } - - /* Set up codebook parameters according to complexity setting and frame length */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - } - - /* Calculate the correlations and energies needed in stage 3 */ - ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); - - lag_counter = 0; - silk_assert( lag == silk_SAT16( lag ) ); - contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); - - target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; - energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 ); - for( d = start_lag; d <= end_lag; d++ ) { - for( j = 0; j < nb_cbk_search; j++ ) { - cross_corr = 0; - energy = energy_target; - for( k = 0; k < nb_subfr; k++ ) { - cross_corr = silk_ADD32( cross_corr, - matrix_ptr( cross_corr_st3, k, j, - nb_cbk_search )[ lag_counter ] ); - energy = silk_ADD32( energy, - matrix_ptr( energies_st3, k, j, - nb_cbk_search )[ lag_counter ] ); - silk_assert( energy >= 0 ); - } - if( cross_corr > 0 ) { - CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */ - /* Reduce depending on flatness of contour */ - diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */ - silk_assert( diff == silk_SAT16( diff ) ); - CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */ - } else { - CCmax_new = 0; - } - - if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { - CCmax = CCmax_new; - lag_new = d; - CBimax = j; - } - } - lag_counter++; - } - - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); - } - *lagIndex = (opus_int16)( lag_new - min_lag); - *contourIndex = (opus_int8)CBimax; - } else { /* Fs_kHz == 8 */ - /* Save Lags */ - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 ); - } - *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ ); - *contourIndex = (opus_int8)CBimax; - } - silk_assert( *lagIndex >= 0 ); - /* return as voiced */ - RESTORE_STACK; - return 0; -} - -/*********************************************************************** - * Calculates the correlations used in stage 3 search. In order to cover - * the whole lag codebook for all the searched offset lags (lag +- 2), - * the following correlations are needed in each sub frame: - * - * sf1: lag range [-8,...,7] total 16 correlations - * sf2: lag range [-4,...,4] total 9 correlations - * sf3: lag range [-3,....4] total 8 correltions - * sf4: lag range [-6,....8] total 15 correlations - * - * In total 48 correlations. The direct implementation computed in worst - * case 4*12*5 = 240 correlations, but more likely around 120. - ***********************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ - const opus_int16 frame[], /* I vector to correlate */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of a 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -) -{ - const opus_int16 *target_ptr; - opus_int i, j, k, lag_counter, lag_low, lag_high; - opus_int nb_cbk_search, delta, idx, cbk_size; - VARDECL( opus_int32, scratch_mem ); - VARDECL( opus_int32, xcorr32 ); - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - SAVE_STACK; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); - ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 ); - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the correlations for each subframe */ - lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); - silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); - for( j = lag_low; j <= lag_high; j++ ) { - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] = - scratch_mem[ idx + j ]; - } - } - target_ptr += sf_length; - } - RESTORE_STACK; -} - -/********************************************************************/ -/* Calculate the energies for first two subframes. The energies are */ -/* calculated recursively. */ -/********************************************************************/ -static void silk_P_Ana_calc_energy_st3( - silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ - const opus_int16 frame[], /* I vector to calc energy in */ - opus_int start_lag, /* I lag offset to search around */ - opus_int sf_length, /* I length of one 5 ms subframe */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -) -{ - const opus_int16 *target_ptr, *basis_ptr; - opus_int32 energy; - opus_int k, i, j, lag_counter; - opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; - VARDECL( opus_int32, scratch_mem ); - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - SAVE_STACK; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the energy for first lag */ - basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); - energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch ); - silk_assert( energy >= 0 ); - scratch_mem[ lag_counter ] = energy; - lag_counter++; - - lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); - for( i = 1; i < lag_diff; i++ ) { - /* remove part outside new window */ - energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] ); - silk_assert( energy >= 0 ); - - /* add part that comes into window */ - energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) ); - silk_assert( energy >= 0 ); - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = energy; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] = - scratch_mem[ idx + j ]; - silk_assert( - matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 ); - } - } - target_ptr += sf_length; - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/prefilter_FIX.c b/thirdparty/opus/silk/fixed/prefilter_FIX.c deleted file mode 100644 index 6a8e35152e..0000000000 --- a/thirdparty/opus/silk/fixed/prefilter_FIX.c +++ /dev/null @@ -1,221 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -#if defined(MIPSr1_ASM) -#include "mips/prefilter_FIX_mipsr1.h" -#endif - - -#if !defined(OVERRIDE_silk_warped_LPC_analysis_filter_FIX) -#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ - ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) -#endif - -/* Prefilter for finding Quantizer input signal */ -static OPUS_INLINE void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ - opus_int Tilt_Q14, /* I Tilt shaping coeficient */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -); - -void silk_warped_LPC_analysis_filter_FIX_c( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - opus_int32 acc_Q11, tmp1, tmp2; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state[ 1 ] = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state[ i ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state[ i + 1 ] = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - } - state[ order ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - } -} - -void silk_prefilter_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ - opus_int32 xw_Q3[], /* O Weighted signal */ - const opus_int16 x[] /* I Speech signal */ -) -{ - silk_prefilter_state_FIX *P = &psEnc->sPrefilt; - opus_int j, k, lag; - opus_int32 tmp_32; - const opus_int16 *AR1_shp_Q13; - const opus_int16 *px; - opus_int32 *pxw_Q3; - opus_int HarmShapeGain_Q12, Tilt_Q14; - opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14; - VARDECL( opus_int32, x_filt_Q12 ); - VARDECL( opus_int32, st_res_Q2 ); - opus_int16 B_Q10[ 2 ]; - SAVE_STACK; - - /* Set up pointers */ - px = x; - pxw_Q3 = xw_Q3; - lag = P->lagPrev; - ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 ); - ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Update Variables that change per sub frame */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - lag = psEncCtrl->pitchL[ k ]; - } - - /* Noise shape parameters */ - HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] ); - silk_assert( HarmShapeGain_Q12 >= 0 ); - HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 ); - HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 ); - Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ]; - LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ]; - AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Short term FIR filtering*/ - silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px, - psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder, psEnc->sCmn.arch ); - - /* Reduce (mainly) low frequencies during harmonic emphasis */ - B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 ); - tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */ - tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */ - tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */ - tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */ - B_Q10[ 1 ]= silk_SAT16( tmp_32 ); - x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] ); - for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { - x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] ); - } - P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ]; - - silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length ); - - px += psEnc->sCmn.subfr_length; - pxw_Q3 += psEnc->sCmn.subfr_length; - } - - P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - RESTORE_STACK; -} - -#ifndef OVERRIDE_silk_prefilt_FIX -/* Prefilter for finding Quantizer input signal */ -static OPUS_INLINE void silk_prefilt_FIX( - silk_prefilter_state_FIX *P, /* I/O state */ - opus_int32 st_res_Q12[], /* I short term residual signal */ - opus_int32 xw_Q3[], /* O prefiltered signal */ - opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ - opus_int Tilt_Q14, /* I Tilt shaping coeficient */ - opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ - opus_int lag, /* I Lag for harmonic shaping */ - opus_int length /* I Length of signals */ -) -{ - opus_int i, idx, LTP_shp_buf_idx; - opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; - opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; - opus_int16 *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; - sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; - - for( i = 0; i < length; i++ ) { - if( lag > 0 ) { - /* unrolled loop */ - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); - } else { - n_LTP_Q12 = 0; - } - - n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); - n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); - - sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); - sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); - - xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); - } - - /* Copy temp variable back to state */ - P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; - P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} -#endif /* OVERRIDE_silk_prefilt_FIX */ diff --git a/thirdparty/opus/silk/fixed/process_gains_FIX.c b/thirdparty/opus/silk/fixed/process_gains_FIX.c deleted file mode 100644 index 05aba31788..0000000000 --- a/thirdparty/opus/silk/fixed/process_gains_FIX.c +++ /dev/null @@ -1,117 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "tuning_parameters.h" - -/* Processing of gains */ -void silk_process_gains_FIX( - silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ - silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - silk_shape_state_FIX *psShapeSt = &psEnc->sShape; - opus_int k; - opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; - - /* Gain reduction when LTP coding gain is high */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */ - s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 ); - } - } - - /* Limit the quantized signal */ - /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ - InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin( - silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length ); - - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Soft limit on ratio residual energy and squared gains */ - ResNrg = psEncCtrl->ResNrg[ k ]; - ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 ); - if( psEncCtrl->ResNrgQ[ k ] > 0 ) { - ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] ); - } else { - if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) { - ResNrgPart = silk_int32_MAX; - } else { - ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] ); - } - } - gain = psEncCtrl->Gains_Q16[ k ]; - gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) ); - if( gain_squared < silk_int16_MAX ) { - /* recalculate with higher precision */ - gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain ); - silk_assert( gain_squared > 0 ); - gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */ - gain = silk_min( gain, silk_int32_MAX >> 8 ); - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */ - } else { - gain = silk_SQRT_APPROX( gain_squared ); /* Q0 */ - gain = silk_min( gain, silk_int32_MAX >> 16 ); - psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 16 ); /* Q16 */ - } - } - - /* Save unquantized gains and gain Index */ - silk_memcpy( psEncCtrl->GainsUnq_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex; - - /* Quantize gains */ - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16, - &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - if( psEncCtrl->LTPredCodGain_Q7 + silk_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - } - - /* Quantizer boundary adjustment */ - quant_offset_Q10 = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ]; - psEncCtrl->Lambda_Q10 = SILK_FIX_CONST( LAMBDA_OFFSET, 10 ) - + silk_SMULBB( SILK_FIX_CONST( LAMBDA_DELAYED_DECISIONS, 10 ), psEnc->sCmn.nStatesDelayedDecision ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_SPEECH_ACT, 18 ), psEnc->sCmn.speech_activity_Q8 ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_INPUT_QUALITY, 12 ), psEncCtrl->input_quality_Q14 ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_CODING_QUALITY, 12 ), psEncCtrl->coding_quality_Q14 ) - + silk_SMULWB( SILK_FIX_CONST( LAMBDA_QUANT_OFFSET, 16 ), quant_offset_Q10 ); - - silk_assert( psEncCtrl->Lambda_Q10 > 0 ); - silk_assert( psEncCtrl->Lambda_Q10 < SILK_FIX_CONST( 2, 10 ) ); -} diff --git a/thirdparty/opus/silk/fixed/regularize_correlations_FIX.c b/thirdparty/opus/silk/fixed/regularize_correlations_FIX.c deleted file mode 100644 index a2836b05f4..0000000000 --- a/thirdparty/opus/silk/fixed/regularize_correlations_FIX.c +++ /dev/null @@ -1,47 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FIX( - opus_int32 *XX, /* I/O Correlation matrices */ - opus_int32 *xx, /* I/O Correlation values */ - opus_int32 noise, /* I Noise to add */ - opus_int D /* I Dimension of XX */ -) -{ - opus_int i; - for( i = 0; i < D; i++ ) { - matrix_ptr( &XX[ 0 ], i, i, D ) = silk_ADD32( matrix_ptr( &XX[ 0 ], i, i, D ), noise ); - } - xx[ 0 ] += noise; -} diff --git a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c b/thirdparty/opus/silk/fixed/residual_energy16_FIX.c deleted file mode 100644 index ebffb2a66f..0000000000 --- a/thirdparty/opus/silk/fixed/residual_energy16_FIX.c +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -opus_int32 silk_residual_energy16_covar_FIX( - const opus_int16 *c, /* I Prediction vector */ - const opus_int32 *wXX, /* I Correlation matrix */ - const opus_int32 *wXx, /* I Correlation vector */ - opus_int32 wxx, /* I Signal energy */ - opus_int D, /* I Dimension */ - opus_int cQ /* I Q value for c vector 0 - 15 */ -) -{ - opus_int i, j, lshifts, Qxtra; - opus_int32 c_max, w_max, tmp, tmp2, nrg; - opus_int cn[ MAX_MATRIX_SIZE ]; - const opus_int32 *pRow; - - /* Safety checks */ - silk_assert( D >= 0 ); - silk_assert( D <= 16 ); - silk_assert( cQ > 0 ); - silk_assert( cQ < 16 ); - - lshifts = 16 - cQ; - Qxtra = lshifts; - - c_max = 0; - for( i = 0; i < D; i++ ) { - c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) ); - } - Qxtra = silk_min_int( Qxtra, silk_CLZ32( c_max ) - 17 ); - - w_max = silk_max_32( wXX[ 0 ], wXX[ D * D - 1 ] ); - Qxtra = silk_min_int( Qxtra, silk_CLZ32( silk_MUL( D, silk_RSHIFT( silk_SMULWB( w_max, c_max ), 4 ) ) ) - 5 ); - Qxtra = silk_max_int( Qxtra, 0 ); - for( i = 0; i < D; i++ ) { - cn[ i ] = silk_LSHIFT( ( opus_int )c[ i ], Qxtra ); - silk_assert( silk_abs(cn[i]) <= ( silk_int16_MAX + 1 ) ); /* Check that silk_SMLAWB can be used */ - } - lshifts -= Qxtra; - - /* Compute wxx - 2 * wXx * c */ - tmp = 0; - for( i = 0; i < D; i++ ) { - tmp = silk_SMLAWB( tmp, wXx[ i ], cn[ i ] ); - } - nrg = silk_RSHIFT( wxx, 1 + lshifts ) - tmp; /* Q: -lshifts - 1 */ - - /* Add c' * wXX * c, assuming wXX is symmetric */ - tmp2 = 0; - for( i = 0; i < D; i++ ) { - tmp = 0; - pRow = &wXX[ i * D ]; - for( j = i + 1; j < D; j++ ) { - tmp = silk_SMLAWB( tmp, pRow[ j ], cn[ j ] ); - } - tmp = silk_SMLAWB( tmp, silk_RSHIFT( pRow[ i ], 1 ), cn[ i ] ); - tmp2 = silk_SMLAWB( tmp2, tmp, cn[ i ] ); - } - nrg = silk_ADD_LSHIFT32( nrg, tmp2, lshifts ); /* Q: -lshifts - 1 */ - - /* Keep one bit free always, because we add them for LSF interpolation */ - if( nrg < 1 ) { - nrg = 1; - } else if( nrg > silk_RSHIFT( silk_int32_MAX, lshifts + 2 ) ) { - nrg = silk_int32_MAX >> 1; - } else { - nrg = silk_LSHIFT( nrg, lshifts + 1 ); /* Q0 */ - } - return nrg; - -} diff --git a/thirdparty/opus/silk/fixed/residual_energy_FIX.c b/thirdparty/opus/silk/fixed/residual_energy_FIX.c deleted file mode 100644 index 41f74778e8..0000000000 --- a/thirdparty/opus/silk/fixed/residual_energy_FIX.c +++ /dev/null @@ -1,98 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FIX( - opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */ - const opus_int16 x[], /* I Input signal */ - opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I Number of subframes */ - const opus_int LPC_order, /* I LPC order */ - int arch /* I Run-time architecture */ -) -{ - opus_int offset, i, j, rshift, lz1, lz2; - opus_int16 *LPC_res_ptr; - VARDECL( opus_int16, LPC_res ); - const opus_int16 *x_ptr; - opus_int32 tmp32; - SAVE_STACK; - - x_ptr = x; - offset = LPC_order + subfr_length; - - /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ - ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 ); - silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); - for( i = 0; i < nb_subfr >> 1; i++ ) { - /* Calculate half frame LPC residual signal including preceding samples */ - silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order, arch ); - - /* Point to first subframe of the just calculated LPC residual signal */ - LPC_res_ptr = LPC_res + LPC_order; - for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) { - /* Measure subframe energy */ - silk_sum_sqr_shift( &nrgs[ i * ( MAX_NB_SUBFR >> 1 ) + j ], &rshift, LPC_res_ptr, subfr_length ); - - /* Set Q values for the measured energy */ - nrgsQ[ i * ( MAX_NB_SUBFR >> 1 ) + j ] = -rshift; - - /* Move to next subframe */ - LPC_res_ptr += offset; - } - /* Move to next frame half */ - x_ptr += ( MAX_NB_SUBFR >> 1 ) * offset; - } - - /* Apply the squared subframe gains */ - for( i = 0; i < nb_subfr; i++ ) { - /* Fully upscale gains and energies */ - lz1 = silk_CLZ32( nrgs[ i ] ) - 1; - lz2 = silk_CLZ32( gains[ i ] ) - 1; - - tmp32 = silk_LSHIFT32( gains[ i ], lz2 ); - - /* Find squared gains */ - tmp32 = silk_SMMUL( tmp32, tmp32 ); /* Q( 2 * lz2 - 32 )*/ - - /* Scale energies */ - nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/ - nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32; - } - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/fixed/schur64_FIX.c b/thirdparty/opus/silk/fixed/schur64_FIX.c deleted file mode 100644 index 764a10ef3e..0000000000 --- a/thirdparty/opus/silk/fixed/schur64_FIX.c +++ /dev/null @@ -1,92 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Slower than schur(), but more accurate. */ -/* Uses SMULL(), available on armv4 */ -opus_int32 silk_schur64( /* O returns residual energy */ - opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ - const opus_int32 c[], /* I Correlations [order+1] */ - opus_int32 order /* I Prediction order */ -) -{ - opus_int k, n; - opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31; - - silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); - - /* Check for invalid input */ - if( c[ 0 ] <= 0 ) { - silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) ); - return 0; - } - - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } - - for( k = 0; k < order; k++ ) { - /* Check that we won't be getting an unstable rc, otherwise stop here. */ - if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) { - if ( C[ k + 1 ][ 0 ] > 0 ) { - rc_Q16[ k ] = -SILK_FIX_CONST( .99f, 16 ); - } else { - rc_Q16[ k ] = SILK_FIX_CONST( .99f, 16 ); - } - k++; - break; - } - - /* Get reflection coefficient: divide two Q30 values and get result in Q31 */ - rc_tmp_Q31 = silk_DIV32_varQ( -C[ k + 1 ][ 0 ], C[ 0 ][ 1 ], 31 ); - - /* Save the output */ - rc_Q16[ k ] = silk_RSHIFT_ROUND( rc_tmp_Q31, 15 ); - - /* Update correlations */ - for( n = 0; n < order - k; n++ ) { - Ctmp1_Q30 = C[ n + k + 1 ][ 0 ]; - Ctmp2_Q30 = C[ n ][ 1 ]; - - /* Multiply and add the highest int32 */ - C[ n + k + 1 ][ 0 ] = Ctmp1_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp2_Q30, 1 ), rc_tmp_Q31 ); - C[ n ][ 1 ] = Ctmp2_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp1_Q30, 1 ), rc_tmp_Q31 ); - } - } - - for(; k < order; k++ ) { - rc_Q16[ k ] = 0; - } - - return silk_max_32( 1, C[ 0 ][ 1 ] ); -} diff --git a/thirdparty/opus/silk/fixed/schur_FIX.c b/thirdparty/opus/silk/fixed/schur_FIX.c deleted file mode 100644 index c4c0ef23b4..0000000000 --- a/thirdparty/opus/silk/fixed/schur_FIX.c +++ /dev/null @@ -1,106 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Faster than schur64(), but much less accurate. */ -/* uses SMLAWB(), requiring armv5E and higher. */ -opus_int32 silk_schur( /* O Returns residual energy */ - opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ - const opus_int32 *c, /* I correlations [order+1] */ - const opus_int32 order /* I prediction order */ -) -{ - opus_int k, n, lz; - opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15; - - silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); - - /* Get number of leading zeros */ - lz = silk_CLZ32( c[ 0 ] ); - - /* Copy correlations and adjust level to Q30 */ - if( lz < 2 ) { - /* lz must be 1, so shift one to the right */ - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 ); - } - } else if( lz > 2 ) { - /* Shift to the left */ - lz -= 2; - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz ); - } - } else { - /* No need to shift */ - for( k = 0; k < order + 1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; - } - } - - for( k = 0; k < order; k++ ) { - /* Check that we won't be getting an unstable rc, otherwise stop here. */ - if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) { - if ( C[ k + 1 ][ 0 ] > 0 ) { - rc_Q15[ k ] = -SILK_FIX_CONST( .99f, 15 ); - } else { - rc_Q15[ k ] = SILK_FIX_CONST( .99f, 15 ); - } - k++; - break; - } - - /* Get reflection coefficient */ - rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) ); - - /* Clip (shouldn't happen for properly conditioned inputs) */ - rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 ); - - /* Store */ - rc_Q15[ k ] = (opus_int16)rc_tmp_Q15; - - /* Update correlations */ - for( n = 0; n < order - k; n++ ) { - Ctmp1 = C[ n + k + 1 ][ 0 ]; - Ctmp2 = C[ n ][ 1 ]; - C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 ); - C[ n ][ 1 ] = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 ); - } - } - - for(; k < order; k++ ) { - rc_Q15[ k ] = 0; - } - - /* return residual energy */ - return silk_max_32( 1, C[ 0 ][ 1 ] ); -} diff --git a/thirdparty/opus/silk/fixed/solve_LS_FIX.c b/thirdparty/opus/silk/fixed/solve_LS_FIX.c deleted file mode 100644 index 51d7d49d02..0000000000 --- a/thirdparty/opus/silk/fixed/solve_LS_FIX.c +++ /dev/null @@ -1,249 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" -#include "stack_alloc.h" -#include "tuning_parameters.h" - -/*****************************/ -/* Internal function headers */ -/*****************************/ - -typedef struct { - opus_int32 Q36_part; - opus_int32 Q48_part; -} inv_D_t; - -/* Factorize square matrix A into LDL form */ -static OPUS_INLINE void silk_LDL_factorize_FIX( - opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ - inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ -); - -/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveFirst_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -); - -/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveLast_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - const opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -); - -static OPUS_INLINE void silk_LS_divide_Q16_FIX( - opus_int32 T[], /* I/O Numenator vector */ - inv_D_t *inv_D, /* I 1 / D vector */ - opus_int M /* I dimension */ -); - -/* Solves Ax = b, assuming A is symmetric */ -void silk_solve_LDL_FIX( - opus_int32 *A, /* I Pointer to symetric square matrix A */ - opus_int M, /* I Size of matrix */ - const opus_int32 *b, /* I Pointer to b vector */ - opus_int32 *x_Q16 /* O Pointer to x solution vector */ -) -{ - VARDECL( opus_int32, L_Q16 ); - opus_int32 Y[ MAX_MATRIX_SIZE ]; - inv_D_t inv_D[ MAX_MATRIX_SIZE ]; - SAVE_STACK; - - silk_assert( M <= MAX_MATRIX_SIZE ); - ALLOC( L_Q16, M * M, opus_int32 ); - - /*************************************************** - Factorize A by LDL such that A = L*D*L', - where L is lower triangular with ones on diagonal - ****************************************************/ - silk_LDL_factorize_FIX( A, M, L_Q16, inv_D ); - - /**************************************************** - * substitute D*L'*x = Y. ie: - L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b - ******************************************************/ - silk_LS_SolveFirst_FIX( L_Q16, M, b, Y ); - - /**************************************************** - D*L'*x = Y <=> L'*x = inv(D)*Y, because D is - diagonal just multiply with 1/d_i - ****************************************************/ - silk_LS_divide_Q16_FIX( Y, inv_D, M ); - - /**************************************************** - x = inv(L') * inv(D) * Y - *****************************************************/ - silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); - RESTORE_STACK; -} - -static OPUS_INLINE void silk_LDL_factorize_FIX( - opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ - inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ -) -{ - opus_int i, j, k, status, loop_count; - const opus_int32 *ptr1, *ptr2; - opus_int32 diag_min_value, tmp_32, err; - opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; - opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; - - silk_assert( M <= MAX_MATRIX_SIZE ); - - status = 1; - diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); - for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { - status = 0; - for( j = 0; j < M; j++ ) { - ptr1 = matrix_adr( L_Q16, j, 0, M ); - tmp_32 = 0; - for( i = 0; i < j; i++ ) { - v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ - tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ - } - tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); - - if( tmp_32 < diag_min_value ) { - tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); - /* Matrix not positive semi-definite, or ill conditioned */ - for( i = 0; i < M; i++ ) { - matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); - } - status = 1; - break; - } - D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ - - /* two-step division */ - one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ - one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ - err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ - one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ - - /* Save 1/Ds */ - inv_D[ j ].Q36_part = one_div_diag_Q36; - inv_D[ j ].Q48_part = one_div_diag_Q48; - - matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ - ptr1 = matrix_adr( A, j, 0, M ); - ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); - for( i = j + 1; i < M; i++ ) { - tmp_32 = 0; - for( k = 0; k < j; k++ ) { - tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ - } - tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ - - /* tmp_32 / D_Q0[j] : Divide to Q16 */ - matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), - silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); - - /* go to next column */ - ptr2 += M; - } - } - } - - silk_assert( status == 0 ); -} - -static OPUS_INLINE void silk_LS_divide_Q16_FIX( - opus_int32 T[], /* I/O Numenator vector */ - inv_D_t *inv_D, /* I 1 / D vector */ - opus_int M /* I dimension */ -) -{ - opus_int i; - opus_int32 tmp_32; - opus_int32 one_div_diag_Q36, one_div_diag_Q48; - - for( i = 0; i < M; i++ ) { - one_div_diag_Q36 = inv_D[ i ].Q36_part; - one_div_diag_Q48 = inv_D[ i ].Q48_part; - - tmp_32 = T[ i ]; - T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); - } -} - -/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveFirst_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -) -{ - opus_int i, j; - const opus_int32 *ptr32; - opus_int32 tmp_32; - - for( i = 0; i < M; i++ ) { - ptr32 = matrix_adr( L_Q16, i, 0, M ); - tmp_32 = 0; - for( j = 0; j < i; j++ ) { - tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); - } - x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); - } -} - -/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ -static OPUS_INLINE void silk_LS_SolveLast_FIX( - const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ - const opus_int M, /* I Dim of Matrix equation */ - const opus_int32 *b, /* I b Vector */ - opus_int32 *x_Q16 /* O x Vector */ -) -{ - opus_int i, j; - const opus_int32 *ptr32; - opus_int32 tmp_32; - - for( i = M - 1; i >= 0; i-- ) { - ptr32 = matrix_adr( L_Q16, 0, i, M ); - tmp_32 = 0; - for( j = M - 1; j > i; j-- ) { - tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); - } - x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); - } -} diff --git a/thirdparty/opus/silk/fixed/structs_FIX.h b/thirdparty/opus/silk/fixed/structs_FIX.h deleted file mode 100644 index 3294b25128..0000000000 --- a/thirdparty/opus/silk/fixed/structs_FIX.h +++ /dev/null @@ -1,134 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_STRUCTS_FIX_H -#define SILK_STRUCTS_FIX_H - -#include "typedef.h" -#include "main.h" -#include "structs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/********************************/ -/* Noise shaping analysis state */ -/********************************/ -typedef struct { - opus_int8 LastGainIndex; - opus_int32 HarmBoost_smth_Q16; - opus_int32 HarmShapeGain_smth_Q16; - opus_int32 Tilt_smth_Q16; -} silk_shape_state_FIX; - -/********************************/ -/* Prefilter state */ -/********************************/ -typedef struct { - opus_int16 sLTP_shp[ LTP_BUF_LENGTH ]; - opus_int32 sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int sLTP_shp_buf_idx; - opus_int32 sLF_AR_shp_Q12; - opus_int32 sLF_MA_shp_Q12; - opus_int32 sHarmHP_Q2; - opus_int32 rand_seed; - opus_int lagPrev; -} silk_prefilter_state_FIX; - -/********************************/ -/* Encoder state FIX */ -/********************************/ -typedef struct { - silk_encoder_state sCmn; /* Common struct, shared with floating-point code */ - silk_shape_state_FIX sShape; /* Shape state */ - silk_prefilter_state_FIX sPrefilt; /* Prefilter State */ - - /* Buffer for find pitch and noise shape analysis */ - silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ - opus_int LTPCorr_Q15; /* Normalized correlation from pitch lag estimator */ -} silk_encoder_state_FIX; - -/************************/ -/* Encoder control FIX */ -/************************/ -typedef struct { - /* Prediction and coding parameters */ - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; - opus_int LTP_scale_Q14; - opus_int pitchL[ MAX_NB_SUBFR ]; - - /* Noise shaping parameters */ - /* Testing */ - silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ - opus_int GainsPre_Q14[ MAX_NB_SUBFR ]; - opus_int HarmBoost_Q14[ MAX_NB_SUBFR ]; - opus_int Tilt_Q14[ MAX_NB_SUBFR ]; - opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; - opus_int Lambda_Q10; - opus_int input_quality_Q14; - opus_int coding_quality_Q14; - - /* measures */ - opus_int sparseness_Q8; - opus_int32 predGain_Q16; - opus_int LTPredCodGain_Q7; - opus_int32 ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ - opus_int ResNrgQ[ MAX_NB_SUBFR ]; /* Q domain for the residual energy > 0 */ - - /* Parameters for CBR mode */ - opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ]; - opus_int8 lastGainIndexPrev; -} silk_encoder_control_FIX; - -/************************/ -/* Encoder Super Struct */ -/************************/ -typedef struct { - silk_encoder_state_FIX state_Fxx[ ENCODER_NUM_CHANNELS ]; - stereo_enc_state sStereo; - opus_int32 nBitsUsedLBRR; - opus_int32 nBitsExceeded; - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int nPrevChannelsInternal; - opus_int timeSinceSwitchAllowed_ms; - opus_int allowBandwidthSwitch; - opus_int prev_decode_only_middle; -} silk_encoder; - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/fixed/vector_ops_FIX.c b/thirdparty/opus/silk/fixed/vector_ops_FIX.c deleted file mode 100644 index d94980014f..0000000000 --- a/thirdparty/opus/silk/fixed/vector_ops_FIX.c +++ /dev/null @@ -1,102 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "pitch.h" - -/* Copy and multiply a vector by a constant */ -void silk_scale_copy_vector16( - opus_int16 *data_out, - const opus_int16 *data_in, - opus_int32 gain_Q16, /* I Gain in Q16 */ - const opus_int dataSize /* I Length */ -) -{ - opus_int i; - opus_int32 tmp32; - - for( i = 0; i < dataSize; i++ ) { - tmp32 = silk_SMULWB( gain_Q16, data_in[ i ] ); - data_out[ i ] = (opus_int16)silk_CHECK_FIT16( tmp32 ); - } -} - -/* Multiply a vector by a constant */ -void silk_scale_vector32_Q26_lshift_18( - opus_int32 *data1, /* I/O Q0/Q18 */ - opus_int32 gain_Q26, /* I Q26 */ - opus_int dataSize /* I length */ -) -{ - opus_int i; - - for( i = 0; i < dataSize; i++ ) { - data1[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( silk_SMULL( data1[ i ], gain_Q26 ), 8 ) ); /* OUTPUT: Q18 */ - } -} - -/* sum = for(i=0;i<len;i++)inVec1[i]*inVec2[i]; --- inner product */ -/* Note for ARM asm: */ -/* * inVec1 and inVec2 should be at least 2 byte aligned. */ -/* * len should be positive 16bit integer. */ -/* * only when len>6, memory access can be reduced by half. */ -opus_int32 silk_inner_prod_aligned( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int len, /* I vector lengths */ - int arch /* I Run-time architecture */ -) -{ -#ifdef FIXED_POINT - return celt_inner_prod(inVec1, inVec2, len, arch); -#else - opus_int i; - opus_int32 sum = 0; - for( i = 0; i < len; i++ ) { - sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); - } - return sum; -#endif -} - -opus_int64 silk_inner_prod16_aligned_64_c( - const opus_int16 *inVec1, /* I input vector 1 */ - const opus_int16 *inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ -) -{ - opus_int i; - opus_int64 sum = 0; - for( i = 0; i < len; i++ ) { - sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] ); - } - return sum; -} diff --git a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c b/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c deleted file mode 100644 index 6ca6c1184d..0000000000 --- a/thirdparty/opus/silk/fixed/warped_autocorrelation_FIX.c +++ /dev/null @@ -1,95 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FIX.h" - -#define QC 10 -#define QS 14 - -#if defined(MIPSr1_ASM) -#include "mips/warped_autocorrelation_FIX_mipsr1.h" -#endif - - -#ifndef OVERRIDE_silk_warped_autocorrelation_FIX -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FIX( - opus_int32 *corr, /* O Result [order + 1] */ - opus_int *scale, /* O Scaling of the correlation vector */ - const opus_int16 *input, /* I Input data to correlate */ - const opus_int warping_Q16, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - opus_int n, i, lsh; - opus_int32 tmp1_QS, tmp2_QS; - opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - silk_assert( 2 * QS - QC >= 0 ); - - /* Loop over samples */ - for( n = 0; n < length; n++ ) { - tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); - state_QS[ i ] = tmp1_QS; - corr_QC[ i ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC ); - /* Output of allpass section */ - tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); - state_QS[ i + 1 ] = tmp2_QS; - corr_QC[ i + 1 ] += silk_RSHIFT64( silk_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC ); - } - state_QS[ order ] = tmp1_QS; - corr_QC[ order ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC ); - } - - lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35; - lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); - *scale = -( QC + lsh ); - silk_assert( *scale >= -30 && *scale <= 12 ); - if( lsh >= 0 ) { - for( i = 0; i < order + 1; i++ ) { - corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QC[ i ], lsh ) ); - } - } else { - for( i = 0; i < order + 1; i++ ) { - corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QC[ i ], -lsh ) ); - } - } - silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ -} -#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */ diff --git a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c deleted file mode 100644 index 3c3583c5fc..0000000000 --- a/thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c +++ /dev/null @@ -1,377 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> - -#include "SigProc_FIX.h" -#include "define.h" -#include "tuning_parameters.h" -#include "pitch.h" -#include "celt/x86/x86cpu.h" - -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ - -#define QA 25 -#define N_BITS_HEAD_ROOM 2 -#define MIN_RSHIFTS -16 -#define MAX_RSHIFTS (32 - QA) - -/* Compute reflection coefficients from input signal */ -void silk_burg_modified_sse4_1( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -) -{ - opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; - opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; - const opus_int16 *x_ptr; - opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; - - __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; - __m128i CONST1 = _mm_set1_epi32(1); - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); - - /* Compute autocorrelations, added over subframes */ - silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); - if( rshifts > MAX_RSHIFTS ) { - C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); - silk_assert( C0 > 0 ); - rshifts = MAX_RSHIFTS; - } else { - lz = silk_CLZ32( C0 ) - 1; - rshifts_extra = N_BITS_HEAD_ROOM - lz; - if( rshifts_extra > 0 ) { - rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); - C0 = silk_RSHIFT32( C0, rshifts_extra ); - } else { - rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); - C0 = silk_LSHIFT32( C0, -rshifts_extra ); - } - rshifts += rshifts_extra; - } - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( - silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - int i; - opus_int32 d; - x_ptr = x + s * subfr_length; - celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); - for( n = 1; n < D + 1; n++ ) { - for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) - d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); - xcorr[ n - 1 ] += d; - } - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); - } - } - } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - - invGain_Q30 = (opus_int32)1 << 30; - reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - if( rshifts > -2 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ - for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp_QA = Af_QA[ k ]; - tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ - tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ - } - tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ - tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ - for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ - } - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ - x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ - tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ - tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ - - X1_3210 = _mm_set1_epi32( x1 ); - X2_3210 = _mm_set1_epi32( x2 ); - TMP1_3210 = _mm_setzero_si128(); - TMP2_3210 = _mm_setzero_si128(); - for( k = 0; k < n - 3; k += 4 ) { - PTR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 1 - 3 ] ); - SUBFR_3210 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k ] ); - FIRST_3210 = _mm_loadu_si128( (__m128i *)&C_first_row[ k ] ); - PTR_3210 = _mm_shuffle_epi32( PTR_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - LAST_3210 = _mm_loadu_si128( (__m128i *)&C_last_row[ k ] ); - ATMP_3210 = _mm_loadu_si128( (__m128i *)&Af_QA[ k ] ); - - T1_3210 = _mm_mullo_epi32( PTR_3210, X1_3210 ); - T2_3210 = _mm_mullo_epi32( SUBFR_3210, X2_3210 ); - - ATMP_3210 = _mm_srai_epi32( ATMP_3210, 7 ); - ATMP_3210 = _mm_add_epi32( ATMP_3210, CONST1 ); - ATMP_3210 = _mm_srai_epi32( ATMP_3210, 1 ); - - FIRST_3210 = _mm_add_epi32( FIRST_3210, T1_3210 ); - LAST_3210 = _mm_add_epi32( LAST_3210, T2_3210 ); - - PTR_3210 = _mm_mullo_epi32( ATMP_3210, PTR_3210 ); - SUBFR_3210 = _mm_mullo_epi32( ATMP_3210, SUBFR_3210 ); - - _mm_storeu_si128( (__m128i *)&C_first_row[ k ], FIRST_3210 ); - _mm_storeu_si128( (__m128i *)&C_last_row[ k ], LAST_3210 ); - - TMP1_3210 = _mm_add_epi32( TMP1_3210, PTR_3210 ); - TMP2_3210 = _mm_add_epi32( TMP2_3210, SUBFR_3210 ); - } - - TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210 ) ); - TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210 ) ); - TMP1_3210 = _mm_add_epi32( TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E ) ); - TMP2_3210 = _mm_add_epi32( TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E ) ); - - tmp1 += _mm_cvtsi128_si32( TMP1_3210 ); - tmp2 += _mm_cvtsi128_si32( TMP2_3210 ); - - for( ; k < n; k++ ) { - C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ - tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ - tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ - } - - tmp1 = -tmp1; /* Q17 */ - tmp2 = -tmp2; /* Q17 */ - - { - __m128i xmm_tmp1, xmm_tmp2; - __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; - __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; - - xmm_tmp1 = _mm_set1_epi32( tmp1 ); - xmm_tmp2 = _mm_set1_epi32( tmp2 ); - - for( k = 0; k <= n - 3; k += 4 ) { - xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ n - k - 3 ] ); - xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64( &x_ptr[ subfr_length - n + k - 1 ] ); - - xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32( xmm_x_ptr_n_k_x2x0, -rshifts - 1 ); - xmm_x_ptr_sub_x2x0 = _mm_slli_epi32( xmm_x_ptr_sub_x2x0, -rshifts - 1 ); - - /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ - xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32( xmm_x_ptr_sub_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32( xmm_x_ptr_n_k_x2x0, xmm_tmp1 ); - xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32( xmm_x_ptr_n_k_x3x1, xmm_tmp1 ); - xmm_x_ptr_sub_x2x0 = _mm_mul_epi32( xmm_x_ptr_sub_x2x0, xmm_tmp2 ); - xmm_x_ptr_sub_x3x1 = _mm_mul_epi32( xmm_x_ptr_sub_x3x1, xmm_tmp2 ); - - xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64( xmm_x_ptr_n_k_x2x0, 16 ); - xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64( xmm_x_ptr_n_k_x3x1, 16 ); - xmm_x_ptr_sub_x2x0 = _mm_srli_epi64( xmm_x_ptr_sub_x2x0, 16 ); - xmm_x_ptr_sub_x3x1 = _mm_slli_epi64( xmm_x_ptr_sub_x3x1, 16 ); - - xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16( xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC ); - xmm_x_ptr_sub_x2x0 = _mm_blend_epi16( xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC ); - - X1_3210 = _mm_loadu_si128( (__m128i *)&CAf[ k ] ); - PTR_3210 = _mm_loadu_si128( (__m128i *)&CAb[ k ] ); - - X1_3210 = _mm_add_epi32( X1_3210, xmm_x_ptr_n_k_x2x0 ); - PTR_3210 = _mm_add_epi32( PTR_3210, xmm_x_ptr_sub_x2x0 ); - - _mm_storeu_si128( (__m128i *)&CAf[ k ], X1_3210 ); - _mm_storeu_si128( (__m128i *)&CAb[ k ], PTR_3210 ); - } - - for( ; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, - silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, - silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ - } - } - } - } - - /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ - tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ - num = 0; /* Q( -rshifts ) */ - nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ - for( k = 0; k < n; k++ ) { - Atmp_QA = Af_QA[ k ]; - lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; - lz = silk_min( 32 - QA, lz ); - Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ - - tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), - Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ - } - CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ - CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ - num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ - num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ - - /* Calculate the next order reflection (parcor) coefficient */ - if( silk_abs( num ) < nrg ) { - rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); - } else { - rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; - } - - /* Update inverse prediction gain */ - tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); - tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); - if( tmp1 <= minInvGain_Q30 ) { - /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ - rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ - if( rc_Q31 > 0 ) { - /* Newton-Raphson iteration */ - rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ - rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ - if( num < 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc_Q31 = -rc_Q31; - } - } - invGain_Q30 = minInvGain_Q30; - reached_max_gain = 1; - } else { - invGain_Q30 = tmp1; - } - - /* Update the AR coefficients */ - for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af_QA[ k ]; /* QA */ - tmp2 = Af_QA[ n - k - 1 ]; /* QA */ - Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ - Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ - } - Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ - - if( reached_max_gain ) { - /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ - for( k = n + 1; k < D; k++ ) { - Af_QA[ k ] = 0; - } - break; - } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; /* Q( -rshifts ) */ - tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ - CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - } - } - - if( reached_max_gain ) { - for( k = 0; k < D; k++ ) { - /* Scale coefficients */ - A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); - } - /* Subtract energy of preceding samples from C0 */ - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch ), -rshifts ); - } - } - /* Approximate residual energy */ - *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); - *res_nrg_Q = -rshifts; - } else { - /* Return residual energy */ - nrg = CAf[ 0 ]; /* Q( -rshifts ) */ - tmp1 = (opus_int32)1 << 16; /* Q16 */ - for( k = 0; k < D; k++ ) { - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ - nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ - tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ - A_Q16[ k ] = -Atmp1; - } - *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ - *res_nrg_Q = -rshifts; - } -} diff --git a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c deleted file mode 100644 index 488a603f5d..0000000000 --- a/thirdparty/opus/silk/fixed/x86/prefilter_FIX_sse.c +++ /dev/null @@ -1,160 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "main.h" -#include "celt/x86/x86cpu.h" - -void silk_warped_LPC_analysis_filter_FIX_sse4_1( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - opus_int32 acc_Q11, tmp1, tmp2; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - if (order == 10) - { - if (0 == lambda_Q16) - { - __m128i coef_Q13_3210, coef_Q13_7654; - __m128i coef_Q13_0123, coef_Q13_4567; - __m128i state_0123, state_4567; - __m128i xmm_product1, xmm_product2; - __m128i xmm_tempa, xmm_tempb; - - register opus_int32 sum; - register opus_int32 state_8, state_9, state_a; - register opus_int64 coef_Q13_8, coef_Q13_9; - - silk_assert( length > 0 ); - - coef_Q13_3210 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 0 ] ); - coef_Q13_7654 = OP_CVTEPI16_EPI32_M64( &coef_Q13[ 4 ] ); - - coef_Q13_0123 = _mm_shuffle_epi32( coef_Q13_3210, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - coef_Q13_4567 = _mm_shuffle_epi32( coef_Q13_7654, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - coef_Q13_8 = (opus_int64) coef_Q13[ 8 ]; - coef_Q13_9 = (opus_int64) coef_Q13[ 9 ]; - - state_0123 = _mm_loadu_si128( (__m128i *)(&state[ 0 ] ) ); - state_4567 = _mm_loadu_si128( (__m128i *)(&state[ 4 ] ) ); - - state_0123 = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - state_4567 = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - state_8 = state[ 8 ]; - state_9 = state[ 9 ]; - state_a = 0; - - for( n = 0; n < length; n++ ) - { - xmm_product1 = _mm_mul_epi32( coef_Q13_0123, state_0123 ); /* 64-bit multiply, only 2 pairs */ - xmm_product2 = _mm_mul_epi32( coef_Q13_4567, state_4567 ); - - xmm_tempa = _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - xmm_tempb = _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ); - - xmm_product1 = _mm_srli_epi64( xmm_product1, 16 ); /* >> 16, zero extending works */ - xmm_product2 = _mm_srli_epi64( xmm_product2, 16 ); - - xmm_tempa = _mm_mul_epi32( coef_Q13_3210, xmm_tempa ); - xmm_tempb = _mm_mul_epi32( coef_Q13_7654, xmm_tempb ); - - xmm_tempa = _mm_srli_epi64( xmm_tempa, 16 ); - xmm_tempb = _mm_srli_epi64( xmm_tempb, 16 ); - - xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_product1 ); - xmm_tempb = _mm_add_epi32( xmm_tempb, xmm_product2 ); - xmm_tempa = _mm_add_epi32( xmm_tempa, xmm_tempb ); - - sum = (coef_Q13_8 * state_8) >> 16; - sum += (coef_Q13_9 * state_9) >> 16; - - xmm_tempa = _mm_add_epi32( xmm_tempa, _mm_shuffle_epi32( xmm_tempa, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum += _mm_cvtsi128_si32( xmm_tempa); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( ( 5 + sum ), 9); - - /* move right */ - state_a = state_9; - state_9 = state_8; - state_8 = _mm_cvtsi128_si32( state_4567 ); - state_4567 = _mm_alignr_epi8( state_0123, state_4567, 4 ); - - state_0123 = _mm_alignr_epi8( _mm_cvtsi32_si128( silk_LSHIFT( input[ n ], 14 ) ), state_0123, 4 ); - } - - _mm_storeu_si128( (__m128i *)( &state[ 0 ] ), _mm_shuffle_epi32( state_0123, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); - _mm_storeu_si128( (__m128i *)( &state[ 4 ] ), _mm_shuffle_epi32( state_4567, _MM_SHUFFLE( 0, 1, 2, 3 ) ) ); - state[ 8 ] = state_8; - state[ 9 ] = state_9; - state[ 10 ] = state_a; - - return; - } - } - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); - state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); - state[ 1 ] = tmp2; - acc_Q11 = silk_RSHIFT( order, 1 ); - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); - state[ i ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); - state[ i + 1 ] = tmp2; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); - } - state[ order ] = tmp1; - acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); - res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); - } -} diff --git a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c b/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c deleted file mode 100644 index c1e90564d0..0000000000 --- a/thirdparty/opus/silk/fixed/x86/vector_ops_FIX_sse.c +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "main.h" - -#include "SigProc_FIX.h" -#include "pitch.h" - -opus_int64 silk_inner_prod16_aligned_64_sse4_1( - const opus_int16 *inVec1, /* I input vector 1 */ - const opus_int16 *inVec2, /* I input vector 2 */ - const opus_int len /* I vector lengths */ -) -{ - opus_int i, dataSize8; - opus_int64 sum; - - __m128i xmm_tempa; - __m128i inVec1_76543210, acc1; - __m128i inVec2_76543210, acc2; - - sum = 0; - dataSize8 = len & ~7; - - acc1 = _mm_setzero_si128(); - acc2 = _mm_setzero_si128(); - - for( i = 0; i < dataSize8; i += 8 ) { - inVec1_76543210 = _mm_loadu_si128( (__m128i *)(&inVec1[i + 0] ) ); - inVec2_76543210 = _mm_loadu_si128( (__m128i *)(&inVec2[i + 0] ) ); - - /* only when all 4 operands are -32768 (0x8000), this results in wrap around */ - inVec1_76543210 = _mm_madd_epi16( inVec1_76543210, inVec2_76543210 ); - - xmm_tempa = _mm_cvtepi32_epi64( inVec1_76543210 ); - /* equal shift right 8 bytes */ - inVec1_76543210 = _mm_shuffle_epi32( inVec1_76543210, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - inVec1_76543210 = _mm_cvtepi32_epi64( inVec1_76543210 ); - - acc1 = _mm_add_epi64( acc1, xmm_tempa ); - acc2 = _mm_add_epi64( acc2, inVec1_76543210 ); - } - - acc1 = _mm_add_epi64( acc1, acc2 ); - - /* equal shift right 8 bytes */ - acc2 = _mm_shuffle_epi32( acc1, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - acc1 = _mm_add_epi64( acc1, acc2 ); - - _mm_storel_epi64( (__m128i *)&sum, acc1 ); - - for( ; i < len; i++ ) { - sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); - } - - return sum; -} diff --git a/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c b/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c deleted file mode 100644 index cae89a0a18..0000000000 --- a/thirdparty/opus/silk/float/LPC_analysis_filter_FLP.c +++ /dev/null @@ -1,249 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdlib.h> -#include "main_FLP.h" - -/************************************************/ -/* LPC analysis filter */ -/* NB! State is kept internally and the */ -/* filter always starts with zero state */ -/* first Order output samples are set to zero */ -/************************************************/ - -/* 16th order LPC analysis filter, does not write first 16 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter16_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 16; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ] + - s_ptr[ -8 ] * PredCoef[ 8 ] + - s_ptr[ -9 ] * PredCoef[ 9 ] + - s_ptr[ -10 ] * PredCoef[ 10 ] + - s_ptr[ -11 ] * PredCoef[ 11 ] + - s_ptr[ -12 ] * PredCoef[ 12 ] + - s_ptr[ -13 ] * PredCoef[ 13 ] + - s_ptr[ -14 ] * PredCoef[ 14 ] + - s_ptr[ -15 ] * PredCoef[ 15 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 12th order LPC analysis filter, does not write first 12 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter12_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 12; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ] + - s_ptr[ -8 ] * PredCoef[ 8 ] + - s_ptr[ -9 ] * PredCoef[ 9 ] + - s_ptr[ -10 ] * PredCoef[ 10 ] + - s_ptr[ -11 ] * PredCoef[ 11 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 10th order LPC analysis filter, does not write first 10 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter10_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 10; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ] + - s_ptr[ -8 ] * PredCoef[ 8 ] + - s_ptr[ -9 ] * PredCoef[ 9 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 8th order LPC analysis filter, does not write first 8 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter8_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 8; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ] + - s_ptr[ -6 ] * PredCoef[ 6 ] + - s_ptr[ -7 ] * PredCoef[ 7 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/* 6th order LPC analysis filter, does not write first 6 samples */ -static OPUS_INLINE void silk_LPC_analysis_filter6_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length /* I Length of input signal */ -) -{ - opus_int ix; - silk_float LPC_pred; - const silk_float *s_ptr; - - for( ix = 6; ix < length; ix++ ) { - s_ptr = &s[ix - 1]; - - /* short-term prediction */ - LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + - s_ptr[ -1 ] * PredCoef[ 1 ] + - s_ptr[ -2 ] * PredCoef[ 2 ] + - s_ptr[ -3 ] * PredCoef[ 3 ] + - s_ptr[ -4 ] * PredCoef[ 4 ] + - s_ptr[ -5 ] * PredCoef[ 5 ]; - - /* prediction error */ - r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; - } -} - -/************************************************/ -/* LPC analysis filter */ -/* NB! State is kept internally and the */ -/* filter always starts with zero state */ -/* first Order output samples are set to zero */ -/************************************************/ -void silk_LPC_analysis_filter_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length, /* I Length of input signal */ - const opus_int Order /* I LPC order */ -) -{ - silk_assert( Order <= length ); - - switch( Order ) { - case 6: - silk_LPC_analysis_filter6_FLP( r_LPC, PredCoef, s, length ); - break; - - case 8: - silk_LPC_analysis_filter8_FLP( r_LPC, PredCoef, s, length ); - break; - - case 10: - silk_LPC_analysis_filter10_FLP( r_LPC, PredCoef, s, length ); - break; - - case 12: - silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length ); - break; - - case 16: - silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length ); - break; - - default: - silk_assert( 0 ); - break; - } - - /* Set first Order output samples to zero */ - silk_memset( r_LPC, 0, Order * sizeof( silk_float ) ); -} - diff --git a/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c b/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c deleted file mode 100644 index 25178bacdd..0000000000 --- a/thirdparty/opus/silk/float/LPC_inv_pred_gain_FLP.c +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "SigProc_FLP.h" - -#define RC_THRESHOLD 0.9999f - -/* compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -/* this code is based on silk_a2k_FLP() */ -silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */ - const silk_float *A, /* I prediction coefficients [order] */ - opus_int32 order /* I prediction order */ -) -{ - opus_int k, n; - double invGain, rc, rc_mult1, rc_mult2; - silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ]; - silk_float *Aold, *Anew; - - Anew = Atmp[ order & 1 ]; - silk_memcpy( Anew, A, order * sizeof(silk_float) ); - - invGain = 1.0; - for( k = order - 1; k > 0; k-- ) { - rc = -Anew[ k ]; - if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { - return 0.0f; - } - rc_mult1 = 1.0f - rc * rc; - rc_mult2 = 1.0f / rc_mult1; - invGain *= rc_mult1; - /* swap pointers */ - Aold = Anew; - Anew = Atmp[ k & 1 ]; - for( n = 0; n < k; n++ ) { - Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 ); - } - } - rc = -Anew[ 0 ]; - if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { - return 0.0f; - } - rc_mult1 = 1.0f - rc * rc; - invGain *= rc_mult1; - return (silk_float)invGain; -} diff --git a/thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c b/thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c deleted file mode 100644 index 849b7c1c52..0000000000 --- a/thirdparty/opus/silk/float/LTP_analysis_filter_FLP.c +++ /dev/null @@ -1,75 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -void silk_LTP_analysis_filter_FLP( - silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */ - const silk_float *x, /* I Input signal, with preceding samples */ - const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int pre_length /* I Preceding samples for each subframe */ -) -{ - const silk_float *x_ptr, *x_lag_ptr; - silk_float Btmp[ LTP_ORDER ]; - silk_float *LTP_res_ptr; - silk_float inv_gain; - opus_int k, i, j; - - x_ptr = x; - LTP_res_ptr = LTP_res; - for( k = 0; k < nb_subfr; k++ ) { - x_lag_ptr = x_ptr - pitchL[ k ]; - inv_gain = invGains[ k ]; - for( i = 0; i < LTP_ORDER; i++ ) { - Btmp[ i ] = B[ k * LTP_ORDER + i ]; - } - - /* LTP analysis FIR filter */ - for( i = 0; i < subfr_length + pre_length; i++ ) { - LTP_res_ptr[ i ] = x_ptr[ i ]; - /* Subtract long-term prediction */ - for( j = 0; j < LTP_ORDER; j++ ) { - LTP_res_ptr[ i ] -= Btmp[ j ] * x_lag_ptr[ LTP_ORDER / 2 - j ]; - } - LTP_res_ptr[ i ] *= inv_gain; - x_lag_ptr++; - } - - /* Update pointers */ - LTP_res_ptr += subfr_length + pre_length; - x_ptr += subfr_length; - } -} diff --git a/thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c b/thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c deleted file mode 100644 index 8dbe29d0fa..0000000000 --- a/thirdparty/opus/silk/float/LTP_scale_ctrl_FLP.c +++ /dev/null @@ -1,52 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -void silk_LTP_scale_ctrl_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int round_loss; - - if( condCoding == CODE_INDEPENDENTLY ) { - /* Only scale if first frame in packet */ - round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; - psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f ); - } else { - /* Default is minimum scaling */ - psEnc->sCmn.indices.LTP_scaleIndex = 0; - } - - psEncCtrl->LTP_scale = (silk_float)silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ] / 16384.0f; -} diff --git a/thirdparty/opus/silk/float/SigProc_FLP.h b/thirdparty/opus/silk/float/SigProc_FLP.h deleted file mode 100644 index f0cb3733be..0000000000 --- a/thirdparty/opus/silk/float/SigProc_FLP.h +++ /dev/null @@ -1,204 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FLP_H -#define SILK_SIGPROC_FLP_H - -#include "SigProc_FIX.h" -#include "float_cast.h" -#include <math.h> - -#ifdef __cplusplus -extern "C" -{ -#endif - -/********************************************************************/ -/* SIGNAL PROCESSING FUNCTIONS */ -/********************************************************************/ - -/* Chirp (bw expand) LP AR filter */ -void silk_bwexpander_FLP( - silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I length of ar */ - const silk_float chirp /* I chirp factor (typically in range (0..1) ) */ -); - -/* compute inverse of LPC prediction gain, and */ -/* test if LPC coefficients are stable (all poles within unit circle) */ -/* this code is based on silk_FLP_a2k() */ -silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */ - const silk_float *A, /* I prediction coefficients [order] */ - opus_int32 order /* I prediction order */ -); - -silk_float silk_schur_FLP( /* O returns residual energy */ - silk_float refl_coef[], /* O reflection coefficients (length order) */ - const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */ - opus_int order /* I order */ -); - -void silk_k2a_FLP( - silk_float *A, /* O prediction coefficients [order] */ - const silk_float *rc, /* I reflection coefficients [order] */ - opus_int32 order /* I prediction order */ -); - -/* Solve the normal equations using the Levinson-Durbin recursion */ -silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ - silk_float A[], /* O prediction coefficients [order] */ - const silk_float corr[], /* I input auto-correlations [order + 1] */ - const opus_int order /* I prediction order */ -); - -/* compute autocorrelation */ -void silk_autocorrelation_FLP( - silk_float *results, /* O result (length correlationCount) */ - const silk_float *inputData, /* I input data to correlate */ - opus_int inputDataSize, /* I length of input */ - opus_int correlationCount /* I number of correlation taps to compute */ -); - -opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ - const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I Number of 5 ms subframes */ - int arch /* I Run-time architecture */ -); - -void silk_insertion_sort_decreasing_FLP( - silk_float *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -); - -/* Compute reflection coefficients from input signal */ -silk_float silk_burg_modified_FLP( /* O returns residual energy */ - silk_float A[], /* O prediction coefficients (length order) */ - const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ - const silk_float minInvGain, /* I minimum inverse prediction gain */ - const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I number of subframes stacked in x */ - const opus_int D /* I order */ -); - -/* multiply a vector by a constant */ -void silk_scale_vector_FLP( - silk_float *data1, - silk_float gain, - opus_int dataSize -); - -/* copy and multiply a vector by a constant */ -void silk_scale_copy_vector_FLP( - silk_float *data_out, - const silk_float *data_in, - silk_float gain, - opus_int dataSize -); - -/* inner product of two silk_float arrays, with result as double */ -double silk_inner_product_FLP( - const silk_float *data1, - const silk_float *data2, - opus_int dataSize -); - -/* sum of squares of a silk_float array, with result as double */ -double silk_energy_FLP( - const silk_float *data, - opus_int dataSize -); - -/********************************************************************/ -/* MACROS */ -/********************************************************************/ - -#define PI (3.1415926536f) - -#define silk_min_float( a, b ) (((a) < (b)) ? (a) : (b)) -#define silk_max_float( a, b ) (((a) > (b)) ? (a) : (b)) -#define silk_abs_float( a ) ((silk_float)fabs(a)) - -/* sigmoid function */ -static OPUS_INLINE silk_float silk_sigmoid( silk_float x ) -{ - return (silk_float)(1.0 / (1.0 + exp(-x))); -} - -/* floating-point to integer conversion (rounding) */ -static OPUS_INLINE opus_int32 silk_float2int( silk_float x ) -{ - return (opus_int32)float2int( x ); -} - -/* floating-point to integer conversion (rounding) */ -static OPUS_INLINE void silk_float2short_array( - opus_int16 *out, - const silk_float *in, - opus_int32 length -) -{ - opus_int32 k; - for( k = length - 1; k >= 0; k-- ) { - out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) ); - } -} - -/* integer to floating-point conversion */ -static OPUS_INLINE void silk_short2float_array( - silk_float *out, - const opus_int16 *in, - opus_int32 length -) -{ - opus_int32 k; - for( k = length - 1; k >= 0; k-- ) { - out[k] = (silk_float)in[k]; - } -} - -/* using log2() helps the fixed-point conversion */ -static OPUS_INLINE silk_float silk_log2( double x ) -{ - return ( silk_float )( 3.32192809488736 * log10( x ) ); -} - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_SIGPROC_FLP_H */ diff --git a/thirdparty/opus/silk/float/apply_sine_window_FLP.c b/thirdparty/opus/silk/float/apply_sine_window_FLP.c deleted file mode 100644 index 6aae57c0ab..0000000000 --- a/thirdparty/opus/silk/float/apply_sine_window_FLP.c +++ /dev/null @@ -1,81 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Apply sine window to signal vector */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -void silk_apply_sine_window_FLP( - silk_float px_win[], /* O Pointer to windowed signal */ - const silk_float px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -) -{ - opus_int k; - silk_float freq, c, S0, S1; - - silk_assert( win_type == 1 || win_type == 2 ); - - /* Length must be multiple of 4 */ - silk_assert( ( length & 3 ) == 0 ); - - freq = PI / ( length + 1 ); - - /* Approximation of 2 * cos(f) */ - c = 2.0f - freq * freq; - - /* Initialize state */ - if( win_type < 2 ) { - /* Start from 0 */ - S0 = 0.0f; - /* Approximation of sin(f) */ - S1 = freq; - } else { - /* Start from 1 */ - S0 = 1.0f; - /* Approximation of cos(f) */ - S1 = 0.5f * c; - } - - /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */ - /* 4 samples at a time */ - for( k = 0; k < length; k += 4 ) { - px_win[ k + 0 ] = px[ k + 0 ] * 0.5f * ( S0 + S1 ); - px_win[ k + 1 ] = px[ k + 1 ] * S1; - S0 = c * S1 - S0; - px_win[ k + 2 ] = px[ k + 2 ] * 0.5f * ( S1 + S0 ); - px_win[ k + 3 ] = px[ k + 3 ] * S0; - S1 = c * S0 - S1; - } -} diff --git a/thirdparty/opus/silk/float/autocorrelation_FLP.c b/thirdparty/opus/silk/float/autocorrelation_FLP.c deleted file mode 100644 index 8b8a9e659a..0000000000 --- a/thirdparty/opus/silk/float/autocorrelation_FLP.c +++ /dev/null @@ -1,52 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "typedef.h" -#include "SigProc_FLP.h" - -/* compute autocorrelation */ -void silk_autocorrelation_FLP( - silk_float *results, /* O result (length correlationCount) */ - const silk_float *inputData, /* I input data to correlate */ - opus_int inputDataSize, /* I length of input */ - opus_int correlationCount /* I number of correlation taps to compute */ -) -{ - opus_int i; - - if( correlationCount > inputDataSize ) { - correlationCount = inputDataSize; - } - - for( i = 0; i < correlationCount; i++ ) { - results[ i ] = (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i ); - } -} diff --git a/thirdparty/opus/silk/float/burg_modified_FLP.c b/thirdparty/opus/silk/float/burg_modified_FLP.c deleted file mode 100644 index ea5dc25a93..0000000000 --- a/thirdparty/opus/silk/float/burg_modified_FLP.c +++ /dev/null @@ -1,186 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" -#include "tuning_parameters.h" -#include "define.h" - -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/ - -/* Compute reflection coefficients from input signal */ -silk_float silk_burg_modified_FLP( /* O returns residual energy */ - silk_float A[], /* O prediction coefficients (length order) */ - const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ - const silk_float minInvGain, /* I minimum inverse prediction gain */ - const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I number of subframes stacked in x */ - const opus_int D /* I order */ -) -{ - opus_int k, n, s, reached_max_gain; - double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2; - const silk_float *x_ptr; - double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ]; - double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ]; - double Af[ SILK_MAX_ORDER_LPC ]; - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); - - /* Compute autocorrelations, added over subframes */ - C0 = silk_energy_FLP( x, nb_subfr * subfr_length ); - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) ); - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n ); - } - } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) ); - - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f; - invGain = 1.0f; - reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - tmp1 = x_ptr[ n ]; - tmp2 = x_ptr[ subfr_length - n - 1 ]; - for( k = 0; k < n; k++ ) { - C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ]; - C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ]; - Atmp = Af[ k ]; - tmp1 += x_ptr[ n - k - 1 ] * Atmp; - tmp2 += x_ptr[ subfr_length - n + k ] * Atmp; - } - for( k = 0; k <= n; k++ ) { - CAf[ k ] -= tmp1 * x_ptr[ n - k ]; - CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ]; - } - } - tmp1 = C_first_row[ n ]; - tmp2 = C_last_row[ n ]; - for( k = 0; k < n; k++ ) { - Atmp = Af[ k ]; - tmp1 += C_last_row[ n - k - 1 ] * Atmp; - tmp2 += C_first_row[ n - k - 1 ] * Atmp; - } - CAf[ n + 1 ] = tmp1; - CAb[ n + 1 ] = tmp2; - - /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - num = CAb[ n + 1 ]; - nrg_b = CAb[ 0 ]; - nrg_f = CAf[ 0 ]; - for( k = 0; k < n; k++ ) { - Atmp = Af[ k ]; - num += CAb[ n - k ] * Atmp; - nrg_b += CAb[ k + 1 ] * Atmp; - nrg_f += CAf[ k + 1 ] * Atmp; - } - silk_assert( nrg_f > 0.0 ); - silk_assert( nrg_b > 0.0 ); - - /* Calculate the next order reflection (parcor) coefficient */ - rc = -2.0 * num / ( nrg_f + nrg_b ); - silk_assert( rc > -1.0 && rc < 1.0 ); - - /* Update inverse prediction gain */ - tmp1 = invGain * ( 1.0 - rc * rc ); - if( tmp1 <= minInvGain ) { - /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - rc = sqrt( 1.0 - minInvGain / invGain ); - if( num > 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ - rc = -rc; - } - invGain = minInvGain; - reached_max_gain = 1; - } else { - invGain = tmp1; - } - - /* Update the AR coefficients */ - for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af[ k ]; - tmp2 = Af[ n - k - 1 ]; - Af[ k ] = tmp1 + rc * tmp2; - Af[ n - k - 1 ] = tmp2 + rc * tmp1; - } - Af[ n ] = rc; - - if( reached_max_gain ) { - /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ - for( k = n + 1; k < D; k++ ) { - Af[ k ] = 0.0; - } - break; - } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; - CAf[ k ] += rc * CAb[ n - k + 1 ]; - CAb[ n - k + 1 ] += rc * tmp1; - } - } - - if( reached_max_gain ) { - /* Convert to silk_float */ - for( k = 0; k < D; k++ ) { - A[ k ] = (silk_float)( -Af[ k ] ); - } - /* Subtract energy of preceding samples from C0 */ - for( s = 0; s < nb_subfr; s++ ) { - C0 -= silk_energy_FLP( x + s * subfr_length, D ); - } - /* Approximate residual energy */ - nrg_f = C0 * invGain; - } else { - /* Compute residual energy and store coefficients as silk_float */ - nrg_f = CAf[ 0 ]; - tmp1 = 1.0; - for( k = 0; k < D; k++ ) { - Atmp = Af[ k ]; - nrg_f += CAf[ k + 1 ] * Atmp; - tmp1 += Atmp * Atmp; - A[ k ] = (silk_float)(-Atmp); - } - nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1; - } - - /* Return residual energy */ - return (silk_float)nrg_f; -} diff --git a/thirdparty/opus/silk/float/bwexpander_FLP.c b/thirdparty/opus/silk/float/bwexpander_FLP.c deleted file mode 100644 index d55a4d79ab..0000000000 --- a/thirdparty/opus/silk/float/bwexpander_FLP.c +++ /dev/null @@ -1,49 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* Chirp (bw expand) LP AR filter */ -void silk_bwexpander_FLP( - silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */ - const opus_int d, /* I length of ar */ - const silk_float chirp /* I chirp factor (typically in range (0..1) ) */ -) -{ - opus_int i; - silk_float cfac = chirp; - - for( i = 0; i < d - 1; i++ ) { - ar[ i ] *= cfac; - cfac *= chirp; - } - ar[ d - 1 ] *= cfac; -} diff --git a/thirdparty/opus/silk/float/corrMatrix_FLP.c b/thirdparty/opus/silk/float/corrMatrix_FLP.c deleted file mode 100644 index eae6a1cfca..0000000000 --- a/thirdparty/opus/silk/float/corrMatrix_FLP.c +++ /dev/null @@ -1,93 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/********************************************************************** - * Correlation matrix computations for LS estimate. - **********************************************************************/ - -#include "main_FLP.h" - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FLP( - const silk_float *x, /* I x vector [L+order-1] used to create X */ - const silk_float *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vecors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *Xt /* O X'*t correlation vector [order] */ -) -{ - opus_int lag; - const silk_float *ptr1; - - ptr1 = &x[ Order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */ - for( lag = 0; lag < Order; lag++ ) { - /* Calculate X[:,lag]'*t */ - Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L ); - ptr1--; /* Next column of X */ - } -} - -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FLP( - const silk_float *x, /* I x vector [ L+order-1 ] used to create X */ - const opus_int L, /* I Length of vectors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *XX /* O X'*X correlation matrix [order x order] */ -) -{ - opus_int j, lag; - double energy; - const silk_float *ptr1, *ptr2; - - ptr1 = &x[ Order - 1 ]; /* First sample of column 0 of X */ - energy = silk_energy_FLP( ptr1, L ); /* X[:,0]'*X[:,0] */ - matrix_ptr( XX, 0, 0, Order ) = ( silk_float )energy; - for( j = 1; j < Order; j++ ) { - /* Calculate X[:,j]'*X[:,j] */ - energy += ptr1[ -j ] * ptr1[ -j ] - ptr1[ L - j ] * ptr1[ L - j ]; - matrix_ptr( XX, j, j, Order ) = ( silk_float )energy; - } - - ptr2 = &x[ Order - 2 ]; /* First sample of column 1 of X */ - for( lag = 1; lag < Order; lag++ ) { - /* Calculate X[:,0]'*X[:,lag] */ - energy = silk_inner_product_FLP( ptr1, ptr2, L ); - matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy; - matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy; - /* Calculate X[:,j]'*X[:,j + lag] */ - for( j = 1; j < ( Order - lag ); j++ ) { - energy += ptr1[ -j ] * ptr2[ -j ] - ptr1[ L - j ] * ptr2[ L - j ]; - matrix_ptr( XX, lag + j, j, Order ) = ( silk_float )energy; - matrix_ptr( XX, j, lag + j, Order ) = ( silk_float )energy; - } - ptr2--; /* Next column of X */ - } -} diff --git a/thirdparty/opus/silk/float/encode_frame_FLP.c b/thirdparty/opus/silk/float/encode_frame_FLP.c deleted file mode 100644 index 2092a4d9e2..0000000000 --- a/thirdparty/opus/silk/float/encode_frame_FLP.c +++ /dev/null @@ -1,372 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float xfw[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -); - -void silk_encode_do_VAD_FLP( - silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ -) -{ - /****************************/ - /* Voice Activity Detection */ - /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); - - /**************************************************/ - /* Convert speech activity into VAD and DTX flags */ - /**************************************************/ - if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { - psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; - psEnc->sCmn.noSpeechCounter++; - if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.inDTX = 0; - } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { - psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; - psEnc->sCmn.inDTX = 0; - } - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; - } else { - psEnc->sCmn.noSpeechCounter = 0; - psEnc->sCmn.inDTX = 0; - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - } -} - -/****************/ -/* Encode frame */ -/****************/ -opus_int silk_encode_frame_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - opus_int32 *pnBytesOut, /* O Number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -) -{ - silk_encoder_control_FLP sEncCtrl; - opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; - silk_float *x_frame, *res_pitch_frame; - silk_float xfw[ MAX_FRAME_LENGTH ]; - silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; - ec_enc sRangeEnc_copy, sRangeEnc_copy2; - silk_nsq_state sNSQ_copy, sNSQ_copy2; - opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; - opus_int32 gainsID, gainsID_lower, gainsID_upper; - opus_int16 gainMult_Q8; - opus_int16 ec_prevLagIndex_copy; - opus_int ec_prevSignalType_copy; - opus_int8 LastGainIndex_copy2; - opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; - opus_uint8 ec_buf_copy[ 1275 ]; - - /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ - LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; - - psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; - - /**************************************************************/ - /* Set up Input Pointers, and insert frame in input buffer */ - /**************************************************************/ - /* pointers aligned with start of frame to encode */ - x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */ - res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */ - - /***************************************/ - /* Ensure smooth bandwidth transitions */ - /***************************************/ - silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); - - /*******************************************/ - /* Copy new frame to front of input buffer */ - /*******************************************/ - silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); - - /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ - for( i = 0; i < 8; i++ ) { - x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f; - } - - if( !psEnc->sCmn.prefillFlag ) { - /*****************************************/ - /* Find pitch lags, initial LPC analysis */ - /*****************************************/ - silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); - - /************************/ - /* Noise shape analysis */ - /************************/ - silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); - - /***************************************************/ - /* Find linear prediction coefficients (LPC + LTP) */ - /***************************************************/ - silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); - - /****************************************/ - /* Process gains */ - /****************************************/ - silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding ); - - /*****************************************/ - /* Prefiltering for noise shaper */ - /*****************************************/ - silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); - - /****************************************/ - /* Low Bitrate Redundant Encoding */ - /****************************************/ - silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding ); - - /* Loop over quantizer and entroy coding to control bitrate */ - maxIter = 6; - gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); - found_lower = 0; - found_upper = 0; - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - gainsID_lower = -1; - gainsID_upper = -1; - /* Copy part of the input state */ - silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); - silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - seed_copy = psEnc->sCmn.indices.Seed; - ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; - ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; - for( iter = 0; ; iter++ ) { - if( gainsID == gainsID_lower ) { - nBits = nBits_lower; - } else if( gainsID == gainsID_upper ) { - nBits = nBits_upper; - } else { - /* Restore part of the input state */ - if( iter > 0 ) { - silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); - psEnc->sCmn.indices.Seed = seed_copy; - psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; - psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; - } - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw ); - - /****************************************/ - /* Encode Parameters */ - /****************************************/ - silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); - - /****************************************/ - /* Encode Excitation Signal */ - /****************************************/ - silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, - psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); - - nBits = ec_tell( psRangeEnc ); - - if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { - break; - } - } - - if( iter == maxIter ) { - if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { - /* Restore output state from earlier iteration that did meet the bitrate budget */ - silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); - silk_assert( sRangeEnc_copy2.offs <= 1275 ); - silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); - silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); - psEnc->sShape.LastGainIndex = LastGainIndex_copy2; - } - break; - } - - if( nBits > maxBits ) { - if( found_lower == 0 && iter >= 2 ) { - /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ - sEncCtrl.Lambda *= 1.5f; - found_upper = 0; - gainsID_upper = -1; - } else { - found_upper = 1; - nBits_upper = nBits; - gainMult_upper = gainMult_Q8; - gainsID_upper = gainsID; - } - } else if( nBits < maxBits - 5 ) { - found_lower = 1; - nBits_lower = nBits; - gainMult_lower = gainMult_Q8; - if( gainsID != gainsID_lower ) { - gainsID_lower = gainsID; - /* Copy part of the output state */ - silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); - silk_assert( psRangeEnc->offs <= 1275 ); - silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); - silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; - } - } else { - /* Within 5 bits of budget: close enough */ - break; - } - - if( ( found_lower & found_upper ) == 0 ) { - /* Adjust gain according to high-rate rate/distortion curve */ - opus_int32 gain_factor_Q16; - gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); - gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); - if( nBits > maxBits ) { - gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); - } - gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); - } else { - /* Adjust gain by interpolating */ - gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower ); - /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ - if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); - } else - if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { - gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); - } - - /* Quantize gains */ - psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, - &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Unique identifier of gains vector */ - gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); - - /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f; - } - } - } - - /* Update input buffer */ - silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], - ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) ); - - /* Exit without entropy coding */ - if( psEnc->sCmn.prefillFlag ) { - /* No payload */ - *pnBytesOut = 0; - return ret; - } - - /* Parameters needed for next frame */ - psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; - - /****************************************/ - /* Finalize payload */ - /****************************************/ - psEnc->sCmn.first_frame_after_reset = 0; - /* Payload size */ - *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); - - return ret; -} - -/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ -static OPUS_INLINE void silk_LBRR_encode_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float xfw[], /* I Input signal */ - opus_int condCoding /* I The type of conditional coding used so far for this frame */ -) -{ - opus_int k; - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - silk_float TempGains[ MAX_NB_SUBFR ]; - SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; - silk_nsq_state sNSQ_LBRR; - - /*******************************************/ - /* Control use of inband LBRR */ - /*******************************************/ - if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { - psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; - - /* Copy noise shaping quantizer state and quantization indices from regular encoding */ - silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); - silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); - - /* Save original gains */ - silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); - - if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { - /* First frame in packet or previous frame not LBRR coded */ - psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; - - /* Increase Gains to get target LBRR rate */ - psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases; - psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); - } - - /* Decode to get gains in sync with decoder */ - silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices, - &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f ); - } - - /*****************************************/ - /* Noise shaping quantization */ - /*****************************************/ - silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR, - psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw ); - - /* Restore original gains */ - silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); - } -} diff --git a/thirdparty/opus/silk/float/energy_FLP.c b/thirdparty/opus/silk/float/energy_FLP.c deleted file mode 100644 index 24b8179f9e..0000000000 --- a/thirdparty/opus/silk/float/energy_FLP.c +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* sum of squares of a silk_float array, with result as double */ -double silk_energy_FLP( - const silk_float *data, - opus_int dataSize -) -{ - opus_int i, dataSize4; - double result; - - /* 4x unrolled loop */ - result = 0.0; - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - result += data[ i + 0 ] * (double)data[ i + 0 ] + - data[ i + 1 ] * (double)data[ i + 1 ] + - data[ i + 2 ] * (double)data[ i + 2 ] + - data[ i + 3 ] * (double)data[ i + 3 ]; - } - - /* add any remaining products */ - for( ; i < dataSize; i++ ) { - result += data[ i ] * (double)data[ i ]; - } - - silk_assert( result >= 0.0 ); - return result; -} diff --git a/thirdparty/opus/silk/float/find_LPC_FLP.c b/thirdparty/opus/silk/float/find_LPC_FLP.c deleted file mode 100644 index fcfe1c3681..0000000000 --- a/thirdparty/opus/silk/float/find_LPC_FLP.c +++ /dev/null @@ -1,104 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "define.h" -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* LPC analysis */ -void silk_find_LPC_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const silk_float x[], /* I Input signal */ - const silk_float minInvGain /* I Inverse of max prediction gain */ -) -{ - opus_int k, subfr_length; - silk_float a[ MAX_LPC_ORDER ]; - - /* Used only for NLSF interpolation */ - silk_float res_nrg, res_nrg_2nd, res_nrg_interp; - opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; - silk_float a_tmp[ MAX_LPC_ORDER ]; - silk_float LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ]; - - subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; - - /* Default: No interpolation */ - psEncC->indices.NLSFInterpCoef_Q2 = 4; - - /* Burg AR analysis for the full frame */ - res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder ); - - if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { - /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than */ - /* adding it to the residual energy of the first 10 ms in each iteration of the search below */ - res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder ); - - /* Convert to NLSFs */ - silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder ); - - /* Search over interpolation indices to find the one with lowest residual energy */ - res_nrg_2nd = silk_float_MAX; - for( k = 3; k >= 0; k-- ) { - /* Interpolate NLSFs for first half */ - silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); - - /* Convert to LPC for residual energy evaluation */ - silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder ); - - /* Calculate residual energy with LSF interpolation */ - silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder ); - res_nrg_interp = (silk_float)( - silk_energy_FLP( LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ) + - silk_energy_FLP( LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ) ); - - /* Determine whether current interpolated NLSFs are best so far */ - if( res_nrg_interp < res_nrg ) { - /* Interpolation has lower residual energy */ - res_nrg = res_nrg_interp; - psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; - } else if( res_nrg_interp > res_nrg_2nd ) { - /* No reason to continue iterating - residual energies will continue to climb */ - break; - } - res_nrg_2nd = res_nrg_interp; - } - } - - if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { - /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ - silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder ); - } - - silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || - ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); -} diff --git a/thirdparty/opus/silk/float/find_LTP_FLP.c b/thirdparty/opus/silk/float/find_LTP_FLP.c deleted file mode 100644 index 7229996014..0000000000 --- a/thirdparty/opus/silk/float/find_LTP_FLP.c +++ /dev/null @@ -1,132 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -void silk_find_LTP_FLP( - silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - silk_float *LTPredCodGain, /* O LTP coding gain */ - const silk_float r_lpc[], /* I LPC residual */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset /* I Number of samples in LTP memory */ -) -{ - opus_int i, k; - silk_float *b_ptr, temp, *WLTP_ptr; - silk_float LPC_res_nrg, LPC_LTP_res_nrg; - silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ]; - silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu; - silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; - const silk_float *r_ptr, *lag_ptr; - - b_ptr = b; - WLTP_ptr = WLTP; - r_ptr = &r_lpc[ mem_offset ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); - - silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr ); - silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr ); - - rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length ); - regu = 1.0f + rr[ k ] + - matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) + - matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ); - regu *= LTP_DAMPING / 3; - silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER ); - silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr ); - - /* Calculate residual energy */ - nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER ); - - temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); - silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER ); - w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER ); - - r_ptr += subfr_length; - b_ptr += LTP_ORDER; - WLTP_ptr += LTP_ORDER * LTP_ORDER; - } - - /* Compute LTP coding gain */ - if( LTPredCodGain != NULL ) { - LPC_LTP_res_nrg = 1e-6f; - LPC_res_nrg = 0.0f; - for( k = 0; k < nb_subfr; k++ ) { - LPC_res_nrg += rr[ k ] * Wght[ k ]; - LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ]; - } - - silk_assert( LPC_LTP_res_nrg > 0 ); - *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg ); - } - - /* Smoothing */ - /* d = sum( B, 1 ); */ - b_ptr = b; - for( k = 0; k < nb_subfr; k++ ) { - d[ k ] = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - d[ k ] += b_ptr[ i ]; - } - b_ptr += LTP_ORDER; - } - /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ - temp = 1e-3f; - for( k = 0; k < nb_subfr; k++ ) { - temp += w[ k ]; - } - m = 0; - for( k = 0; k < nb_subfr; k++ ) { - m += d[ k ] * w[ k ]; - } - m = m / temp; - - b_ptr = b; - for( k = 0; k < nb_subfr; k++ ) { - g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] ); - temp = 0; - for( i = 0; i < LTP_ORDER; i++ ) { - delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f ); - temp += delta_b[ i ]; - } - temp = g / temp; - for( i = 0; i < LTP_ORDER; i++ ) { - b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp; - } - b_ptr += LTP_ORDER; - } -} diff --git a/thirdparty/opus/silk/float/find_pitch_lags_FLP.c b/thirdparty/opus/silk/float/find_pitch_lags_FLP.c deleted file mode 100644 index f3b22d25ce..0000000000 --- a/thirdparty/opus/silk/float/find_pitch_lags_FLP.c +++ /dev/null @@ -1,132 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <stdlib.h> -#include "main_FLP.h" -#include "tuning_parameters.h" - -void silk_find_pitch_lags_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - silk_float res[], /* O Residual */ - const silk_float x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -) -{ - opus_int buf_len; - silk_float thrhld, res_nrg; - const silk_float *x_buf_ptr, *x_buf; - silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; - silk_float A[ MAX_FIND_PITCH_LPC_ORDER ]; - silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ]; - silk_float Wsig[ FIND_PITCH_LPC_WIN_MAX ]; - silk_float *Wsig_ptr; - - /******************************************/ - /* Set up buffer lengths etc based on Fs */ - /******************************************/ - buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; - - /* Safety check */ - silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); - - x_buf = x - psEnc->sCmn.ltp_mem_length; - - /******************************************/ - /* Estimate LPC AR coeficients */ - /******************************************/ - - /* Calculate windowed signal */ - - /* First LA_LTP samples */ - x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; - Wsig_ptr = Wsig; - silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); - - /* Middle non-windowed samples */ - Wsig_ptr += psEnc->sCmn.la_pitch; - x_buf_ptr += psEnc->sCmn.la_pitch; - silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) ); - - /* Last LA_LTP samples */ - Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); - x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); - silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); - - /* Calculate autocorrelation sequence */ - silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 ); - - /* Add white noise, as a fraction of the energy */ - auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1; - - /* Calculate the reflection coefficients using Schur */ - res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Prediction gain */ - psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f ); - - /* Convert reflection coefficients to prediction coefficients */ - silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder ); - - /* Bandwidth expansion */ - silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION ); - - /*****************************************/ - /* LPC analysis filtering */ - /*****************************************/ - silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder ); - - if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { - /* Threshold for pitch estimator */ - thrhld = 0.6f; - thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder; - thrhld -= 0.1f * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - thrhld -= 0.15f * (psEnc->sCmn.prevSignalType >> 1); - thrhld -= 0.1f * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ); - - /*****************************************/ - /* Call Pitch estimator */ - /*****************************************/ - if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, - &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f, - thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 ) - { - psEnc->sCmn.indices.signalType = TYPE_VOICED; - } else { - psEnc->sCmn.indices.signalType = TYPE_UNVOICED; - } - } else { - silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); - psEnc->sCmn.indices.lagIndex = 0; - psEnc->sCmn.indices.contourIndex = 0; - psEnc->LTPCorr = 0; - } -} diff --git a/thirdparty/opus/silk/float/find_pred_coefs_FLP.c b/thirdparty/opus/silk/float/find_pred_coefs_FLP.c deleted file mode 100644 index 1af4fe5f1b..0000000000 --- a/thirdparty/opus/silk/float/find_pred_coefs_FLP.c +++ /dev/null @@ -1,118 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Find LPC and LTP coefficients */ -void silk_find_pred_coefs_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float res_pitch[], /* I Residual from pitch analysis */ - const silk_float x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - opus_int i; - silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; - silk_float invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ]; - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; - const silk_float *x_ptr; - silk_float *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ]; - silk_float minInvGain; - - /* Weighting for weighted least squares */ - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - silk_assert( psEncCtrl->Gains[ i ] > 0.0f ); - invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ]; - Wght[ i ] = invGains[ i ] * invGains[ i ]; - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /**********/ - /* VOICED */ - /**********/ - silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); - - /* LTP analysis */ - silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch, - psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length ); - - /* Quantize LTP gain parameters */ - silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, - psEnc->sCmn.arch ); - - /* Control LTP scaling */ - silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); - - /* Create LTP residual */ - silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef, - psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); - } else { - /************/ - /* UNVOICED */ - /************/ - /* Create signal with prepended subframes, scaled by inverse gains */ - x_ptr = x - psEnc->sCmn.predictLPCOrder; - x_pre_ptr = LPC_in_pre; - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ], - psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); - x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; - x_ptr += psEnc->sCmn.subfr_length; - } - silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) ); - psEncCtrl->LTPredCodGain = 0.0f; - psEnc->sCmn.sum_log_gain_Q7 = 0; - } - - /* Limit on total predictive coding gain */ - if( psEnc->sCmn.first_frame_after_reset ) { - minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET; - } else { - minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) / MAX_PREDICTION_POWER_GAIN; - minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality; - } - - /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ - silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain ); - - /* Quantize LSFs */ - silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); - - /* Calculate residual energy using quantized LPC coefficients */ - silk_residual_energy_FLP( psEncCtrl->ResNrg, LPC_in_pre, psEncCtrl->PredCoef, psEncCtrl->Gains, - psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); - - /* Copy to prediction struct for use in next frame for interpolation */ - silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); -} - diff --git a/thirdparty/opus/silk/float/inner_product_FLP.c b/thirdparty/opus/silk/float/inner_product_FLP.c deleted file mode 100644 index 029c012911..0000000000 --- a/thirdparty/opus/silk/float/inner_product_FLP.c +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* inner product of two silk_float arrays, with result as double */ -double silk_inner_product_FLP( - const silk_float *data1, - const silk_float *data2, - opus_int dataSize -) -{ - opus_int i, dataSize4; - double result; - - /* 4x unrolled loop */ - result = 0.0; - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - result += data1[ i + 0 ] * (double)data2[ i + 0 ] + - data1[ i + 1 ] * (double)data2[ i + 1 ] + - data1[ i + 2 ] * (double)data2[ i + 2 ] + - data1[ i + 3 ] * (double)data2[ i + 3 ]; - } - - /* add any remaining products */ - for( ; i < dataSize; i++ ) { - result += data1[ i ] * (double)data2[ i ]; - } - - return result; -} diff --git a/thirdparty/opus/silk/float/k2a_FLP.c b/thirdparty/opus/silk/float/k2a_FLP.c deleted file mode 100644 index 12af4e7669..0000000000 --- a/thirdparty/opus/silk/float/k2a_FLP.c +++ /dev/null @@ -1,53 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* step up function, converts reflection coefficients to prediction coefficients */ -void silk_k2a_FLP( - silk_float *A, /* O prediction coefficients [order] */ - const silk_float *rc, /* I reflection coefficients [order] */ - opus_int32 order /* I prediction order */ -) -{ - opus_int k, n; - silk_float Atmp[ SILK_MAX_ORDER_LPC ]; - - for( k = 0; k < order; k++ ) { - for( n = 0; n < k; n++ ) { - Atmp[ n ] = A[ n ]; - } - for( n = 0; n < k; n++ ) { - A[ n ] += Atmp[ k - n - 1 ] * rc[ k ]; - } - A[ k ] = -rc[ k ]; - } -} diff --git a/thirdparty/opus/silk/float/levinsondurbin_FLP.c b/thirdparty/opus/silk/float/levinsondurbin_FLP.c deleted file mode 100644 index f0ba606981..0000000000 --- a/thirdparty/opus/silk/float/levinsondurbin_FLP.c +++ /dev/null @@ -1,81 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* Solve the normal equations using the Levinson-Durbin recursion */ -silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ - silk_float A[], /* O prediction coefficients [order] */ - const silk_float corr[], /* I input auto-correlations [order + 1] */ - const opus_int order /* I prediction order */ -) -{ - opus_int i, mHalf, m; - silk_float min_nrg, nrg, t, km, Atmp1, Atmp2; - - min_nrg = 1e-12f * corr[ 0 ] + 1e-9f; - nrg = corr[ 0 ]; - nrg = silk_max_float(min_nrg, nrg); - A[ 0 ] = corr[ 1 ] / nrg; - nrg -= A[ 0 ] * corr[ 1 ]; - nrg = silk_max_float(min_nrg, nrg); - - for( m = 1; m < order; m++ ) - { - t = corr[ m + 1 ]; - for( i = 0; i < m; i++ ) { - t -= A[ i ] * corr[ m - i ]; - } - - /* reflection coefficient */ - km = t / nrg; - - /* residual energy */ - nrg -= km * t; - nrg = silk_max_float(min_nrg, nrg); - - mHalf = m >> 1; - for( i = 0; i < mHalf; i++ ) { - Atmp1 = A[ i ]; - Atmp2 = A[ m - i - 1 ]; - A[ m - i - 1 ] -= km * Atmp1; - A[ i ] -= km * Atmp2; - } - if( m & 1 ) { - A[ mHalf ] -= km * A[ mHalf ]; - } - A[ m ] = km; - } - - /* return the residual energy */ - return nrg; -} - diff --git a/thirdparty/opus/silk/float/main_FLP.h b/thirdparty/opus/silk/float/main_FLP.h deleted file mode 100644 index e5a75972e5..0000000000 --- a/thirdparty/opus/silk/float/main_FLP.h +++ /dev/null @@ -1,313 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MAIN_FLP_H -#define SILK_MAIN_FLP_H - -#include "SigProc_FLP.h" -#include "SigProc_FIX.h" -#include "structs_FLP.h" -#include "main.h" -#include "define.h" -#include "debug.h" -#include "entenc.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define silk_encoder_state_Fxx silk_encoder_state_FLP -#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FLP -#define silk_encode_frame_Fxx silk_encode_frame_FLP - -/*********************/ -/* Encoder Functions */ -/*********************/ - -/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ -void silk_HP_variable_cutoff( - silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ -); - -/* Encoder main function */ -void silk_encode_do_VAD_FLP( - silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ -); - -/* Encoder main function */ -opus_int silk_encode_frame_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - opus_int32 *pnBytesOut, /* O Number of payload bytes; */ - ec_enc *psRangeEnc, /* I/O compressor data structure */ - opus_int condCoding, /* I The type of conditional coding to use */ - opus_int maxBits, /* I If > 0: maximum number of output bits */ - opus_int useCBR /* I Flag to force constant-bitrate operation */ -); - -/* Initializes the Silk encoder state */ -opus_int silk_init_encoder( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - int arch /* I Run-tim architecture */ -); - -/* Control the Silk encoder */ -opus_int silk_control_encoder( - silk_encoder_state_FLP *psEnc, /* I/O Pointer to Silk encoder state FLP */ - silk_EncControlStruct *encControl, /* I Control structure */ - const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ - const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ - const opus_int channelNb, /* I Channel number */ - const opus_int force_fs_kHz -); - -/****************/ -/* Prefiltering */ -/****************/ -void silk_prefilter_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ - silk_float xw[], /* O Weighted signal */ - const silk_float x[] /* I Speech signal */ -); - -/**************************/ -/* Noise shaping analysis */ -/**************************/ -/* Compute noise shaping coefficients and initial gain values */ -void silk_noise_shape_analysis_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float *pitch_res, /* I LPC residual from pitch analysis */ - const silk_float *x /* I Input signal [frame_length + la_shape] */ -); - -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FLP( - silk_float *corr, /* O Result [order + 1] */ - const silk_float *input, /* I Input data to correlate */ - const silk_float warping, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -); - -/* Calculation of LTP state scaling */ -void silk_LTP_scale_ctrl_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/**********************************************/ -/* Prediction Analysis */ -/**********************************************/ -/* Find pitch lags */ -void silk_find_pitch_lags_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - silk_float res[], /* O Residual */ - const silk_float x[], /* I Speech signal */ - int arch /* I Run-time architecture */ -); - -/* Find LPC and LTP coefficients */ -void silk_find_pred_coefs_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float res_pitch[], /* I Residual from pitch analysis */ - const silk_float x[], /* I Speech signal */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* LPC analysis */ -void silk_find_LPC_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 NLSF_Q15[], /* O NLSFs */ - const silk_float x[], /* I Input signal */ - const silk_float minInvGain /* I Prediction gain from LTP (dB) */ -); - -/* LTP analysis */ -void silk_find_LTP_FLP( - silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ - silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ - silk_float *LTPredCodGain, /* O LTP coding gain */ - const silk_float r_lpc[], /* I LPC residual */ - const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ - const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int mem_offset /* I Number of samples in LTP memory */ -); - -void silk_LTP_analysis_filter_FLP( - silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */ - const silk_float *x, /* I Input signal, with preceding samples */ - const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */ - const opus_int subfr_length, /* I Length of each subframe */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int pre_length /* I Preceding samples for each subframe */ -); - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FLP( - silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - const silk_float x[], /* I Input signal */ - silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const silk_float gains[], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int LPC_order /* I LPC order */ -); - -/* 16th order LPC analysis filter */ -void silk_LPC_analysis_filter_FLP( - silk_float r_LPC[], /* O LPC residual signal */ - const silk_float PredCoef[], /* I LPC coefficients */ - const silk_float s[], /* I Input signal */ - const opus_int length, /* I Length of input signal */ - const opus_int Order /* I LPC order */ -); - -/* LTP tap quantizer */ -void silk_quant_LTP_gains_FLP( - silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ - opus_int8 *periodicity_index, /* O Periodicity index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ - const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ - const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -); - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */ - const silk_float *c, /* I Filter coefficients */ - silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */ - const silk_float *wXx, /* I Weighted correlation vector */ - const silk_float wxx, /* I Weighted correlation value */ - const opus_int D /* I Dimension */ -); - -/* Processing of gains */ -void silk_process_gains_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/******************/ -/* Linear Algebra */ -/******************/ -/* Calculates correlation matrix X'*X */ -void silk_corrMatrix_FLP( - const silk_float *x, /* I x vector [ L+order-1 ] used to create X */ - const opus_int L, /* I Length of vectors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *XX /* O X'*X correlation matrix [order x order] */ -); - -/* Calculates correlation vector X'*t */ -void silk_corrVector_FLP( - const silk_float *x, /* I x vector [L+order-1] used to create X */ - const silk_float *t, /* I Target vector [L] */ - const opus_int L, /* I Length of vecors */ - const opus_int Order, /* I Max lag for correlation */ - silk_float *Xt /* O X'*t correlation vector [order] */ -); - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FLP( - silk_float *XX, /* I/O Correlation matrices */ - silk_float *xx, /* I/O Correlation values */ - const silk_float noise, /* I Noise energy to add */ - const opus_int D /* I Dimension of XX */ -); - -/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */ -void silk_solve_LDL_FLP( - silk_float *A, /* I/O Symmetric square matrix, out: reg. */ - const opus_int M, /* I Size of matrix */ - const silk_float *b, /* I Pointer to b vector */ - silk_float *x /* O Pointer to x solution vector */ -); - -/* Apply sine window to signal vector. */ -/* Window types: */ -/* 1 -> sine window from 0 to pi/2 */ -/* 2 -> sine window from pi/2 to pi */ -void silk_apply_sine_window_FLP( - silk_float px_win[], /* O Pointer to windowed signal */ - const silk_float px[], /* I Pointer to input signal */ - const opus_int win_type, /* I Selects a window type */ - const opus_int length /* I Window length, multiple of 4 */ -); - -/* Wrapper functions. Call flp / fix code */ - -/* Convert AR filter coefficients to NLSF parameters */ -void silk_A2NLSF_FLP( - opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */ - const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -); - -/* Convert NLSF parameters to AR prediction filter coefficients */ -void silk_NLSF2A_FLP( - silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ - const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -); - -/* Limit, stabilize, and quantize NLSFs */ -void silk_process_NLSFs_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -); - -/* Floating-point Silk NSQ wrapper */ -void silk_NSQ_wrapper_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - SideInfoIndices *psIndices, /* I/O Quantization indices */ - silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const silk_float x[] /* I Prefiltered input signal */ -); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c b/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c deleted file mode 100644 index 65f6ea5870..0000000000 --- a/thirdparty/opus/silk/float/noise_shape_analysis_FLP.c +++ /dev/null @@ -1,365 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ -/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ -/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ -/* coefficient in an array of coefficients, for monic filters. */ -static OPUS_INLINE silk_float warped_gain( - const silk_float *coefs, - silk_float lambda, - opus_int order -) { - opus_int i; - silk_float gain; - - lambda = -lambda; - gain = coefs[ order - 1 ]; - for( i = order - 2; i >= 0; i-- ) { - gain = lambda * gain + coefs[ i ]; - } - return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) ); -} - -/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ -/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ -static OPUS_INLINE void warped_true2monic_coefs( - silk_float *coefs_syn, - silk_float *coefs_ana, - silk_float lambda, - silk_float limit, - opus_int order -) { - opus_int i, iter, ind = 0; - silk_float tmp, maxabs, chirp, gain_syn, gain_ana; - - /* Convert to monic coefficients */ - for( i = order - 1; i > 0; i-- ) { - coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; - coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; - } - gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); - gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); - for( i = 0; i < order; i++ ) { - coefs_syn[ i ] *= gain_syn; - coefs_ana[ i ] *= gain_ana; - } - - /* Limit */ - for( iter = 0; iter < 10; iter++ ) { - /* Find maximum absolute value */ - maxabs = -1.0f; - for( i = 0; i < order; i++ ) { - tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) ); - if( tmp > maxabs ) { - maxabs = tmp; - ind = i; - } - } - if( maxabs <= limit ) { - /* Coefficients are within range - done */ - return; - } - - /* Convert back to true warped coefficients */ - for( i = 1; i < order; i++ ) { - coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ]; - coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ]; - } - gain_syn = 1.0f / gain_syn; - gain_ana = 1.0f / gain_ana; - for( i = 0; i < order; i++ ) { - coefs_syn[ i ] *= gain_syn; - coefs_ana[ i ] *= gain_ana; - } - - /* Apply bandwidth expansion */ - chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); - silk_bwexpander_FLP( coefs_syn, order, chirp ); - silk_bwexpander_FLP( coefs_ana, order, chirp ); - - /* Convert to monic warped coefficients */ - for( i = order - 1; i > 0; i-- ) { - coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; - coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; - } - gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); - gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); - for( i = 0; i < order; i++ ) { - coefs_syn[ i ] *= gain_syn; - coefs_ana[ i ] *= gain_ana; - } - } - silk_assert( 0 ); -} - -/* Compute noise shaping coefficients and initial gain values */ -void silk_noise_shape_analysis_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - const silk_float *pitch_res, /* I LPC residual from pitch analysis */ - const silk_float *x /* I Input signal [frame_length + la_shape] */ -) -{ - silk_shape_state_FLP *psShapeSt = &psEnc->sShape; - opus_int k, nSamples; - silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt; - silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation; - silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping; - silk_float x_windowed[ SHAPE_LPC_WIN_MAX ]; - silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; - const silk_float *x_ptr, *pitch_res_ptr; - - /* Point to start of first LPC analysis block */ - x_ptr = x - psEnc->sCmn.la_shape; - - /****************/ - /* GAIN CONTROL */ - /****************/ - SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ); - - /* Input quality is the average of the quality in the lowest two VAD bands */ - psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f ); - - /* Coding quality level, between 0.0 and 1.0 */ - psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) ); - - if( psEnc->sCmn.useCBR == 0 ) { - /* Reduce coding SNR during low speech activity */ - b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b; - } - - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce gains for periodic signals */ - SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr; - } else { - /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ - SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality ); - } - - /*************************/ - /* SPARSENESS PROCESSING */ - /*************************/ - /* Set quantizer offset */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Initially set to 0; may be overruled in process_gains(..) */ - psEnc->sCmn.indices.quantOffsetType = 0; - psEncCtrl->sparseness = 0.0f; - } else { - /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ - nSamples = 2 * psEnc->sCmn.fs_kHz; - energy_variation = 0.0f; - log_energy_prev = 0.0f; - pitch_res_ptr = pitch_res; - for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { - nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples ); - log_energy = silk_log2( nrg ); - if( k > 0 ) { - energy_variation += silk_abs_float( log_energy - log_energy_prev ); - } - log_energy_prev = log_energy; - pitch_res_ptr += nSamples; - } - psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) ); - - /* Set quantization offset depending on sparseness measure */ - if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - - /* Increase coding SNR for sparse signals */ - SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f ); - } - - /*******************************/ - /* Control bandwidth expansion */ - /*******************************/ - /* More BWE for signals with high prediction gain */ - strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */ - BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); - delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality ); - BWExp1 -= delta; - BWExp2 += delta; - /* BWExp1 will be applied after BWExp2, so make it relative */ - BWExp1 /= BWExp2; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ - warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; - } else { - warping = 0.0f; - } - - /********************************************/ - /* Compute noise shaping AR coefs and gains */ - /********************************************/ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Apply window: sine slope followed by flat part followed by cosine slope */ - opus_int shift, slope_part, flat_part; - flat_part = psEnc->sCmn.fs_kHz * 3; - slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2; - - silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part ); - shift = slope_part; - silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) ); - shift += flat_part; - silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part ); - - /* Update pointer: next LPC analysis block */ - x_ptr += psEnc->sCmn.subfr_length; - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Calculate warped auto correlation */ - silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping, - psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); - } else { - /* Calculate regular auto correlation */ - silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); - } - - /* Add white noise, as a fraction of energy */ - auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION; - - /* Convert correlations to prediction coefficients, and compute residual energy */ - nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder ); - psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg ); - - if( psEnc->sCmn.warping_Q16 > 0 ) { - /* Adjust gain for warping */ - psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); - } - - /* Bandwidth expansion for synthesis filter shaping */ - silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 ); - - /* Compute noise shaping filter coefficients */ - silk_memcpy( - &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], - &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], - psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) ); - - /* Bandwidth expansion for analysis filter shaping */ - silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 ); - - /* Ratio of prediction gains, in energy domain */ - pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); - nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); - psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ); - - /* Convert to monic warped prediction coefficients and limit absolute values */ - warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], - warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); - } - - /*****************/ - /* Gain tweaking */ - /*****************/ - /* Increase gains during low speech activity */ - gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB ); - gain_add = (silk_float)pow( 2.0f, 0.16f * MIN_QGAIN_DB ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] *= gain_mult; - psEncCtrl->Gains[ k ] += gain_add; - } - - gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT; - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->GainsPre[ k ] *= gain_mult; - } - - /************************************************/ - /* Control low-frequency shaping and noise tilt */ - /************************************************/ - /* Less low frequency shaping for noisy inputs */ - strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) ); - strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ - /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ]; - psEncCtrl->LF_MA_shp[ k ] = -1.0f + b; - psEncCtrl->LF_AR_shp[ k ] = 1.0f - b - b * strength; - } - Tilt = - HP_NOISE_COEF - - (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); - } else { - b = 1.3f / psEnc->sCmn.fs_kHz; - psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b; - psEncCtrl->LF_AR_shp[ 0 ] = 1.0f - b - b * strength * 0.6f; - for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ]; - psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ]; - } - Tilt = -HP_NOISE_COEF; - } - - /****************************/ - /* HARMONIC SHAPING CONTROL */ - /****************************/ - /* Control boosting of harmonic frequencies */ - HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr; - - /* More harmonic boost for noisy input signals */ - HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality ); - - if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - /* Harmonic noise shaping */ - HarmShapeGain = HARMONIC_SHAPING; - - /* More harmonic noise shaping for high bitrates or noisy input */ - HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING * - ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality ); - - /* Less harmonic noise shaping for less periodic signals */ - HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr ); - } else { - HarmShapeGain = 0.0f; - } - - /*************************/ - /* Smooth over subframes */ - /*************************/ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth ); - psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth; - psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth ); - psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth; - psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth ); - psEncCtrl->Tilt[ k ] = psShapeSt->Tilt_smth; - } -} diff --git a/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c b/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c deleted file mode 100644 index d0e637a29d..0000000000 --- a/thirdparty/opus/silk/float/pitch_analysis_core_FLP.c +++ /dev/null @@ -1,630 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/***************************************************************************** -* Pitch analyser function -******************************************************************************/ -#include "SigProc_FLP.h" -#include "SigProc_FIX.h" -#include "pitch_est_defines.h" -#include "pitch.h" - -#define SCRATCH_SIZE 22 - -/************************************************************/ -/* Internally used functions */ -/************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -); - -static void silk_P_Ana_calc_energy_st3( - silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ -); - -/************************************************************/ -/* CORE PITCH ANALYSIS FUNCTION */ -/************************************************************/ -opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */ - const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ - opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */ - opus_int16 *lagIndex, /* O Lag Index */ - opus_int8 *contourIndex, /* O Pitch contour Index */ - silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ - opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ - const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ - const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ - const opus_int Fs_kHz, /* I sample frequency (kHz) */ - const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr, /* I Number of 5 ms subframes */ - int arch /* I Run-time architecture */ -) -{ - opus_int i, k, d, j; - silk_float frame_8kHz[ PE_MAX_FRAME_LENGTH_MS * 8 ]; - silk_float frame_4kHz[ PE_MAX_FRAME_LENGTH_MS * 4 ]; - opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ]; - opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ]; - opus_int32 filt_state[ 6 ]; - silk_float threshold, contour_bias; - silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ]; - opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ]; - silk_float CC[ PE_NB_CBKS_STAGE2_EXT ]; - const silk_float *target_ptr, *basis_ptr; - double cross_corr, normalizer, energy, energy_tmp; - opus_int d_srch[ PE_D_SRCH_LENGTH ]; - opus_int16 d_comp[ (PE_MAX_LAG >> 1) + 5 ]; - opus_int length_d_srch, length_d_comp; - silk_float Cmax, CCmax, CCmax_b, CCmax_new_b, CCmax_new; - opus_int CBimax, CBimax_new, lag, start_lag, end_lag, lag_new; - opus_int cbk_size; - silk_float lag_log2, prevLag_log2, delta_lag_log2_sqr; - silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ]; - silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ]; - opus_int lag_counter; - opus_int frame_length, frame_length_8kHz, frame_length_4kHz; - opus_int sf_length, sf_length_8kHz, sf_length_4kHz; - opus_int min_lag, min_lag_8kHz, min_lag_4kHz; - opus_int max_lag, max_lag_8kHz, max_lag_4kHz; - opus_int nb_cbk_search; - const opus_int8 *Lag_CB_ptr; - - /* Check for valid sampling frequency */ - silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); - - /* Check for valid complexity setting */ - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f ); - silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f ); - - /* Set up frame lengths max / min lag for the sampling frequency */ - frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; - frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; - frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; - sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; - sf_length_4kHz = PE_SUBFR_LENGTH_MS * 4; - sf_length_8kHz = PE_SUBFR_LENGTH_MS * 8; - min_lag = PE_MIN_LAG_MS * Fs_kHz; - min_lag_4kHz = PE_MIN_LAG_MS * 4; - min_lag_8kHz = PE_MIN_LAG_MS * 8; - max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; - max_lag_4kHz = PE_MAX_LAG_MS * 4; - max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1; - - /* Resample from input sampled at Fs_kHz to 8 kHz */ - if( Fs_kHz == 16 ) { - /* Resample to 16 -> 8 khz */ - opus_int16 frame_16_FIX[ 16 * PE_MAX_FRAME_LENGTH_MS ]; - silk_float2short_array( frame_16_FIX, frame, frame_length ); - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_8_FIX, frame_16_FIX, frame_length ); - silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); - } else if( Fs_kHz == 12 ) { - /* Resample to 12 -> 8 khz */ - opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ]; - silk_float2short_array( frame_12_FIX, frame, frame_length ); - silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); - silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length ); - silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); - } else { - silk_assert( Fs_kHz == 8 ); - silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz ); - } - - /* Decimate again to 4 kHz */ - silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); - silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz ); - silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz ); - - /* Low-pass filter */ - for( i = frame_length_4kHz - 1; i > 0; i-- ) { - frame_4kHz[ i ] += frame_4kHz[ i - 1 ]; - } - - /****************************************************************************** - * FIRST STAGE, operating in 4 khz - ******************************************************************************/ - silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); - target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ]; - for( k = 0; k < nb_subfr >> 1; k++ ) { - /* Check that we are within range of the array */ - silk_assert( target_ptr >= frame_4kHz ); - silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - - basis_ptr = target_ptr - min_lag_4kHz; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - - celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch ); - - /* Calculate first vector products before loop */ - cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; - normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + - silk_energy_FLP( basis_ptr, sf_length_8kHz ) + - sf_length_8kHz * 4000.0f; - - C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); - - /* From now on normalizer is computed recursively */ - for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) { - basis_ptr--; - - /* Check that we are within range of the array */ - silk_assert( basis_ptr >= frame_4kHz ); - silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - - cross_corr = xcorr[ max_lag_4kHz - d ]; - - /* Add contribution of new sample and remove contribution from oldest sample */ - normalizer += - basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] - - basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ]; - C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer ); - } - /* Update target pointer */ - target_ptr += sf_length_8kHz; - } - - /* Apply short-lag bias */ - for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) { - C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f; - } - - /* Sort */ - length_d_srch = 4 + 2 * complexity; - silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); - silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch ); - - /* Escape if correlation is very low already here */ - Cmax = C[ 0 ][ min_lag_4kHz ]; - if( Cmax < 0.2f ) { - silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); - *LTPCorr = 0.0f; - *lagIndex = 0; - *contourIndex = 0; - return 1; - } - - threshold = search_thres1 * Cmax; - for( i = 0; i < length_d_srch; i++ ) { - /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ - if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) { - d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 ); - } else { - length_d_srch = i; - break; - } - } - silk_assert( length_d_srch > 0 ); - - for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) { - d_comp[ i ] = 0; - } - for( i = 0; i < length_d_srch; i++ ) { - d_comp[ d_srch[ i ] ] = 1; - } - - /* Convolution */ - for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) { - d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ]; - } - - length_d_srch = 0; - for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) { - if( d_comp[ i + 1 ] > 0 ) { - d_srch[ length_d_srch ] = i; - length_d_srch++; - } - } - - /* Convolution */ - for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) { - d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ]; - } - - length_d_comp = 0; - for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) { - if( d_comp[ i ] > 0 ) { - d_comp[ length_d_comp ] = (opus_int16)( i - 2 ); - length_d_comp++; - } - } - - /********************************************************************************** - ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation - *************************************************************************************/ - /********************************************************************************* - * Find energy of each subframe projected onto its history, for a range of delays - *********************************************************************************/ - silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float)); - - if( Fs_kHz == 8 ) { - target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ]; - } else { - target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; - } - for( k = 0; k < nb_subfr; k++ ) { - energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0; - for( j = 0; j < length_d_comp; j++ ) { - d = d_comp[ j ]; - basis_ptr = target_ptr - d; - cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz ); - if( cross_corr > 0.0f ) { - energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); - C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) ); - } else { - C[ k ][ d ] = 0.0f; - } - } - target_ptr += sf_length_8kHz; - } - - /* search over lag range and lags codebook */ - /* scale factor for lag codebook, as a function of center lag */ - - CCmax = 0.0f; /* This value doesn't matter */ - CCmax_b = -1000.0f; - - CBimax = 0; /* To avoid returning undefined lag values */ - lag = -1; /* To check if lag with strong enough correlation has been found */ - - if( prevLag > 0 ) { - if( Fs_kHz == 12 ) { - prevLag = silk_LSHIFT( prevLag, 1 ) / 3; - } else if( Fs_kHz == 16 ) { - prevLag = silk_RSHIFT( prevLag, 1 ); - } - prevLag_log2 = silk_log2( (silk_float)prevLag ); - } else { - prevLag_log2 = 0; - } - - /* Set up stage 2 codebook based on number of subframes */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - cbk_size = PE_NB_CBKS_STAGE2_EXT; - Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; - if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { - /* If input is 8 khz use a larger codebook here because it is last stage */ - nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE2; - } - } else { - cbk_size = PE_NB_CBKS_STAGE2_10MS; - Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; - } - - for( k = 0; k < length_d_srch; k++ ) { - d = d_srch[ k ]; - for( j = 0; j < nb_cbk_search; j++ ) { - CC[j] = 0.0f; - for( i = 0; i < nb_subfr; i++ ) { - /* Try all codebooks */ - CC[ j ] += C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )]; - } - } - /* Find best codebook */ - CCmax_new = -1000.0f; - CBimax_new = 0; - for( i = 0; i < nb_cbk_search; i++ ) { - if( CC[ i ] > CCmax_new ) { - CCmax_new = CC[ i ]; - CBimax_new = i; - } - } - - /* Bias towards shorter lags */ - lag_log2 = silk_log2( (silk_float)d ); - CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2; - - /* Bias towards previous lag */ - if( prevLag > 0 ) { - delta_lag_log2_sqr = lag_log2 - prevLag_log2; - delta_lag_log2_sqr *= delta_lag_log2_sqr; - CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f ); - } - - if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ - CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */ - ) { - CCmax_b = CCmax_new_b; - CCmax = CCmax_new; - lag = d; - CBimax = CBimax_new; - } - } - - if( lag == -1 ) { - /* No suitable candidate found */ - silk_memset( pitch_out, 0, PE_MAX_NB_SUBFR * sizeof(opus_int) ); - *LTPCorr = 0.0f; - *lagIndex = 0; - *contourIndex = 0; - return 1; - } - - /* Output normalized correlation */ - *LTPCorr = (silk_float)( CCmax / nb_subfr ); - silk_assert( *LTPCorr >= 0.0f ); - - if( Fs_kHz > 8 ) { - /* Search in original signal */ - - /* Compensate for decimation */ - silk_assert( lag == silk_SAT16( lag ) ); - if( Fs_kHz == 12 ) { - lag = silk_RSHIFT_ROUND( silk_SMULBB( lag, 3 ), 1 ); - } else { /* Fs_kHz == 16 */ - lag = silk_LSHIFT( lag, 1 ); - } - - lag = silk_LIMIT_int( lag, min_lag, max_lag ); - start_lag = silk_max_int( lag - 2, min_lag ); - end_lag = silk_min_int( lag + 2, max_lag ); - lag_new = lag; /* to avoid undefined lag */ - CBimax = 0; /* to avoid undefined lag */ - - CCmax = -1000.0f; - - /* Calculate the correlations and energies needed in stage 3 */ - silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); - silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity ); - - lag_counter = 0; - silk_assert( lag == silk_SAT16( lag ) ); - contour_bias = PE_FLATCONTOUR_BIAS / lag; - - /* Set up cbk parameters according to complexity setting and frame length */ - if( nb_subfr == PE_MAX_NB_SUBFR ) { - nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - } else { - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - } - - target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; - energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0; - for( d = start_lag; d <= end_lag; d++ ) { - for( j = 0; j < nb_cbk_search; j++ ) { - cross_corr = 0.0; - energy = energy_tmp; - for( k = 0; k < nb_subfr; k++ ) { - cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ]; - energy += energies_st3[ k ][ j ][ lag_counter ]; - } - if( cross_corr > 0.0 ) { - CCmax_new = (silk_float)( 2 * cross_corr / energy ); - /* Reduce depending on flatness of contour */ - CCmax_new *= 1.0f - contour_bias * j; - } else { - CCmax_new = 0.0f; - } - - if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { - CCmax = CCmax_new; - lag_new = d; - CBimax = j; - } - } - lag_counter++; - } - - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); - } - *lagIndex = (opus_int16)( lag_new - min_lag ); - *contourIndex = (opus_int8)CBimax; - } else { /* Fs_kHz == 8 */ - /* Save Lags */ - for( k = 0; k < nb_subfr; k++ ) { - pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 ); - } - *lagIndex = (opus_int16)( lag - min_lag_8kHz ); - *contourIndex = (opus_int8)CBimax; - } - silk_assert( *lagIndex >= 0 ); - /* return as voiced */ - return 0; -} - -/*********************************************************************** - * Calculates the correlations used in stage 3 search. In order to cover - * the whole lag codebook for all the searched offset lags (lag +- 2), - * the following correlations are needed in each sub frame: - * - * sf1: lag range [-8,...,7] total 16 correlations - * sf2: lag range [-4,...,4] total 9 correlations - * sf3: lag range [-3,....4] total 8 correltions - * sf4: lag range [-6,....8] total 15 correlations - * - * In total 48 correlations. The direct implementation computed in worst - * case 4*12*5 = 240 correlations, but more likely around 120. - ***********************************************************************/ -static void silk_P_Ana_calc_corr_st3( - silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity, /* I Complexity setting */ - int arch /* I Run-time architecture */ -) -{ - const silk_float *target_ptr; - opus_int i, j, k, lag_counter, lag_low, lag_high; - opus_int nb_cbk_search, delta, idx, cbk_size; - silk_float scratch_mem[ SCRATCH_SIZE ]; - opus_val32 xcorr[ SCRATCH_SIZE ]; - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the correlations for each subframe */ - lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); - silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch ); - for( j = lag_low; j <= lag_high; j++ ) { - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ]; - } - } - target_ptr += sf_length; - } -} - -/********************************************************************/ -/* Calculate the energies for first two subframes. The energies are */ -/* calculated recursively. */ -/********************************************************************/ -static void silk_P_Ana_calc_energy_st3( - silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ - const silk_float frame[], /* I vector to correlate */ - opus_int start_lag, /* I start lag */ - opus_int sf_length, /* I sub frame length */ - opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ -) -{ - const silk_float *target_ptr, *basis_ptr; - double energy; - opus_int k, i, j, lag_counter; - opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; - silk_float scratch_mem[ SCRATCH_SIZE ]; - const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; - - silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); - silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); - - if( nb_subfr == PE_MAX_NB_SUBFR ) { - Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; - nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; - cbk_size = PE_NB_CBKS_STAGE3_MAX; - } else { - silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); - Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; - Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; - nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; - cbk_size = PE_NB_CBKS_STAGE3_10MS; - } - - target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; - for( k = 0; k < nb_subfr; k++ ) { - lag_counter = 0; - - /* Calculate the energy for first lag */ - basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); - energy = silk_energy_FLP( basis_ptr, sf_length ) + 1e-3; - silk_assert( energy >= 0.0 ); - scratch_mem[lag_counter] = (silk_float)energy; - lag_counter++; - - lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); - for( i = 1; i < lag_diff; i++ ) { - /* remove part outside new window */ - energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i]; - silk_assert( energy >= 0.0 ); - - /* add part that comes into window */ - energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ]; - silk_assert( energy >= 0.0 ); - silk_assert( lag_counter < SCRATCH_SIZE ); - scratch_mem[lag_counter] = (silk_float)energy; - lag_counter++; - } - - delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); - for( i = 0; i < nb_cbk_search; i++ ) { - /* Fill out the 3 dim array that stores the correlations for */ - /* each code_book vector for each start lag */ - idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; - for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { - silk_assert( idx + j < SCRATCH_SIZE ); - silk_assert( idx + j < lag_counter ); - energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ]; - silk_assert( energies_st3[ k ][ i ][ j ] >= 0.0f ); - } - } - target_ptr += sf_length; - } -} diff --git a/thirdparty/opus/silk/float/prefilter_FLP.c b/thirdparty/opus/silk/float/prefilter_FLP.c deleted file mode 100644 index 8bc32fb410..0000000000 --- a/thirdparty/opus/silk/float/prefilter_FLP.c +++ /dev/null @@ -1,206 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* -* Prefilter for finding Quantizer input signal -*/ -static OPUS_INLINE void silk_prefilt_FLP( - silk_prefilter_state_FLP *P, /* I/O state */ - silk_float st_res[], /* I */ - silk_float xw[], /* O */ - silk_float *HarmShapeFIR, /* I */ - silk_float Tilt, /* I */ - silk_float LF_MA_shp, /* I */ - silk_float LF_AR_shp, /* I */ - opus_int lag, /* I */ - opus_int length /* I */ -); - -static void silk_warped_LPC_analysis_filter_FLP( - silk_float state[], /* I/O State [order + 1] */ - silk_float res[], /* O Residual signal [length] */ - const silk_float coef[], /* I Coefficients [order] */ - const silk_float input[], /* I Input signal [length] */ - const silk_float lambda, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) -{ - opus_int n, i; - silk_float acc, tmp1, tmp2; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - for( n = 0; n < length; n++ ) { - /* Output of lowpass section */ - tmp2 = state[ 0 ] + lambda * state[ 1 ]; - state[ 0 ] = input[ n ]; - /* Output of allpass section */ - tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 ); - state[ 1 ] = tmp2; - acc = coef[ 0 ] * tmp2; - /* Loop over allpass sections */ - for( i = 2; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 ); - state[ i ] = tmp1; - acc += coef[ i - 1 ] * tmp1; - /* Output of allpass section */ - tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 ); - state[ i + 1 ] = tmp2; - acc += coef[ i ] * tmp2; - } - state[ order ] = tmp1; - acc += coef[ order - 1 ] * tmp1; - res[ n ] = input[ n ] - acc; - } -} - -/* -* silk_prefilter. Main prefilter function -*/ -void silk_prefilter_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ - silk_float xw[], /* O Weighted signal */ - const silk_float x[] /* I Speech signal */ -) -{ - silk_prefilter_state_FLP *P = &psEnc->sPrefilt; - opus_int j, k, lag; - silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp; - silk_float B[ 2 ]; - const silk_float *AR1_shp; - const silk_float *px; - silk_float *pxw; - silk_float HarmShapeFIR[ 3 ]; - silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ]; - - /* Set up pointers */ - px = x; - pxw = xw; - lag = P->lagPrev; - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Update Variables that change per sub frame */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - lag = psEncCtrl->pitchL[ k ]; - } - - /* Noise shape parameters */ - HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] ); - HarmShapeFIR[ 0 ] = 0.25f * HarmShapeGain; - HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain; - HarmShapeFIR[ 2 ] = 0.25f * HarmShapeGain; - Tilt = psEncCtrl->Tilt[ k ]; - LF_MA_shp = psEncCtrl->LF_MA_shp[ k ]; - LF_AR_shp = psEncCtrl->LF_AR_shp[ k ]; - AR1_shp = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Short term FIR filtering */ - silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px, - (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder ); - - /* Reduce (mainly) low frequencies during harmonic emphasis */ - B[ 0 ] = psEncCtrl->GainsPre[ k ]; - B[ 1 ] = -psEncCtrl->GainsPre[ k ] * - ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT ); - pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP; - for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { - pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ]; - } - P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ]; - - silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length ); - - px += psEnc->sCmn.subfr_length; - pxw += psEnc->sCmn.subfr_length; - } - P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; -} - -/* -* Prefilter for finding Quantizer input signal -*/ -static OPUS_INLINE void silk_prefilt_FLP( - silk_prefilter_state_FLP *P, /* I/O state */ - silk_float st_res[], /* I */ - silk_float xw[], /* O */ - silk_float *HarmShapeFIR, /* I */ - silk_float Tilt, /* I */ - silk_float LF_MA_shp, /* I */ - silk_float LF_AR_shp, /* I */ - opus_int lag, /* I */ - opus_int length /* I */ -) -{ - opus_int i; - opus_int idx, LTP_shp_buf_idx; - silk_float n_Tilt, n_LF, n_LTP; - silk_float sLF_AR_shp, sLF_MA_shp; - silk_float *LTP_shp_buf; - - /* To speed up use temp variables instead of using the struct */ - LTP_shp_buf = P->sLTP_shp; - LTP_shp_buf_idx = P->sLTP_shp_buf_idx; - sLF_AR_shp = P->sLF_AR_shp; - sLF_MA_shp = P->sLF_MA_shp; - - for( i = 0; i < length; i++ ) { - if( lag > 0 ) { - silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); - idx = lag + LTP_shp_buf_idx; - n_LTP = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ]; - n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ] * HarmShapeFIR[ 1 ]; - n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ]; - } else { - n_LTP = 0; - } - - n_Tilt = sLF_AR_shp * Tilt; - n_LF = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp; - - sLF_AR_shp = st_res[ i ] - n_Tilt; - sLF_MA_shp = sLF_AR_shp - n_LF; - - LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; - LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp; - - xw[ i ] = sLF_MA_shp - n_LTP; - } - /* Copy temp variable back to state */ - P->sLF_AR_shp = sLF_AR_shp; - P->sLF_MA_shp = sLF_MA_shp; - P->sLTP_shp_buf_idx = LTP_shp_buf_idx; -} diff --git a/thirdparty/opus/silk/float/process_gains_FLP.c b/thirdparty/opus/silk/float/process_gains_FLP.c deleted file mode 100644 index c0da0dae44..0000000000 --- a/thirdparty/opus/silk/float/process_gains_FLP.c +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/* Processing of gains */ -void silk_process_gains_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - opus_int condCoding /* I The type of conditional coding to use */ -) -{ - silk_shape_state_FLP *psShapeSt = &psEnc->sShape; - opus_int k; - opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; - silk_float s, InvMaxSqrVal, gain, quant_offset; - - /* Gain reduction when LTP coding gain is high */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - s = 1.0f - 0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] *= s; - } - } - - /* Limit the quantized signal */ - InvMaxSqrVal = ( silk_float )( pow( 2.0f, 0.33f * ( 21.0f - psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) ) ) / psEnc->sCmn.subfr_length ); - - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - /* Soft limit on ratio residual energy and squared gains */ - gain = psEncCtrl->Gains[ k ]; - gain = ( silk_float )sqrt( gain * gain + psEncCtrl->ResNrg[ k ] * InvMaxSqrVal ); - psEncCtrl->Gains[ k ] = silk_min_float( gain, 32767.0f ); - } - - /* Prepare gains for noise shaping quantization */ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - pGains_Q16[ k ] = (opus_int32)( psEncCtrl->Gains[ k ] * 65536.0f ); - } - - /* Save unquantized gains and gain Index */ - silk_memcpy( psEncCtrl->GainsUnq_Q16, pGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); - psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex; - - /* Quantize gains */ - silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, - &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); - - /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ - for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { - psEncCtrl->Gains[ k ] = pGains_Q16[ k ] / 65536.0f; - } - - /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ - if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { - if( psEncCtrl->LTPredCodGain + psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ) > 1.0f ) { - psEnc->sCmn.indices.quantOffsetType = 0; - } else { - psEnc->sCmn.indices.quantOffsetType = 1; - } - } - - /* Quantizer boundary adjustment */ - quant_offset = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ] / 1024.0f; - psEncCtrl->Lambda = LAMBDA_OFFSET - + LAMBDA_DELAYED_DECISIONS * psEnc->sCmn.nStatesDelayedDecision - + LAMBDA_SPEECH_ACT * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ) - + LAMBDA_INPUT_QUALITY * psEncCtrl->input_quality - + LAMBDA_CODING_QUALITY * psEncCtrl->coding_quality - + LAMBDA_QUANT_OFFSET * quant_offset; - - silk_assert( psEncCtrl->Lambda > 0.0f ); - silk_assert( psEncCtrl->Lambda < 2.0f ); -} diff --git a/thirdparty/opus/silk/float/regularize_correlations_FLP.c b/thirdparty/opus/silk/float/regularize_correlations_FLP.c deleted file mode 100644 index df4612604c..0000000000 --- a/thirdparty/opus/silk/float/regularize_correlations_FLP.c +++ /dev/null @@ -1,48 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Add noise to matrix diagonal */ -void silk_regularize_correlations_FLP( - silk_float *XX, /* I/O Correlation matrices */ - silk_float *xx, /* I/O Correlation values */ - const silk_float noise, /* I Noise energy to add */ - const opus_int D /* I Dimension of XX */ -) -{ - opus_int i; - - for( i = 0; i < D; i++ ) { - matrix_ptr( &XX[ 0 ], i, i, D ) += noise; - } - xx[ 0 ] += noise; -} diff --git a/thirdparty/opus/silk/float/residual_energy_FLP.c b/thirdparty/opus/silk/float/residual_energy_FLP.c deleted file mode 100644 index b2e03a86a4..0000000000 --- a/thirdparty/opus/silk/float/residual_energy_FLP.c +++ /dev/null @@ -1,117 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -#define MAX_ITERATIONS_RESIDUAL_NRG 10 -#define REGULARIZATION_FACTOR 1e-8f - -/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ -silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */ - const silk_float *c, /* I Filter coefficients */ - silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */ - const silk_float *wXx, /* I Weighted correlation vector */ - const silk_float wxx, /* I Weighted correlation value */ - const opus_int D /* I Dimension */ -) -{ - opus_int i, j, k; - silk_float tmp, nrg = 0.0f, regularization; - - /* Safety checks */ - silk_assert( D >= 0 ); - - regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] ); - for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) { - nrg = wxx; - - tmp = 0.0f; - for( i = 0; i < D; i++ ) { - tmp += wXx[ i ] * c[ i ]; - } - nrg -= 2.0f * tmp; - - /* compute c' * wXX * c, assuming wXX is symmetric */ - for( i = 0; i < D; i++ ) { - tmp = 0.0f; - for( j = i + 1; j < D; j++ ) { - tmp += matrix_c_ptr( wXX, i, j, D ) * c[ j ]; - } - nrg += c[ i ] * ( 2.0f * tmp + matrix_c_ptr( wXX, i, i, D ) * c[ i ] ); - } - if( nrg > 0 ) { - break; - } else { - /* Add white noise */ - for( i = 0; i < D; i++ ) { - matrix_c_ptr( wXX, i, i, D ) += regularization; - } - /* Increase noise for next run */ - regularization *= 2.0f; - } - } - if( k == MAX_ITERATIONS_RESIDUAL_NRG ) { - silk_assert( nrg == 0 ); - nrg = 1.0f; - } - - return nrg; -} - -/* Calculates residual energies of input subframes where all subframes have LPC_order */ -/* of preceding samples */ -void silk_residual_energy_FLP( - silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ - const silk_float x[], /* I Input signal */ - silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ - const silk_float gains[], /* I Quantization gains */ - const opus_int subfr_length, /* I Subframe length */ - const opus_int nb_subfr, /* I number of subframes */ - const opus_int LPC_order /* I LPC order */ -) -{ - opus_int shift; - silk_float *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ]; - - LPC_res_ptr = LPC_res + LPC_order; - shift = LPC_order + subfr_length; - - /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ - silk_LPC_analysis_filter_FLP( LPC_res, a[ 0 ], x + 0 * shift, 2 * shift, LPC_order ); - nrgs[ 0 ] = ( silk_float )( gains[ 0 ] * gains[ 0 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) ); - nrgs[ 1 ] = ( silk_float )( gains[ 1 ] * gains[ 1 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) ); - - if( nb_subfr == MAX_NB_SUBFR ) { - silk_LPC_analysis_filter_FLP( LPC_res, a[ 1 ], x + 2 * shift, 2 * shift, LPC_order ); - nrgs[ 2 ] = ( silk_float )( gains[ 2 ] * gains[ 2 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) ); - nrgs[ 3 ] = ( silk_float )( gains[ 3 ] * gains[ 3 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) ); - } -} diff --git a/thirdparty/opus/silk/float/scale_copy_vector_FLP.c b/thirdparty/opus/silk/float/scale_copy_vector_FLP.c deleted file mode 100644 index 20db32b3b1..0000000000 --- a/thirdparty/opus/silk/float/scale_copy_vector_FLP.c +++ /dev/null @@ -1,57 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* copy and multiply a vector by a constant */ -void silk_scale_copy_vector_FLP( - silk_float *data_out, - const silk_float *data_in, - silk_float gain, - opus_int dataSize -) -{ - opus_int i, dataSize4; - - /* 4x unrolled loop */ - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - data_out[ i + 0 ] = gain * data_in[ i + 0 ]; - data_out[ i + 1 ] = gain * data_in[ i + 1 ]; - data_out[ i + 2 ] = gain * data_in[ i + 2 ]; - data_out[ i + 3 ] = gain * data_in[ i + 3 ]; - } - - /* any remaining elements */ - for( ; i < dataSize; i++ ) { - data_out[ i ] = gain * data_in[ i ]; - } -} diff --git a/thirdparty/opus/silk/float/scale_vector_FLP.c b/thirdparty/opus/silk/float/scale_vector_FLP.c deleted file mode 100644 index 108fdcbed5..0000000000 --- a/thirdparty/opus/silk/float/scale_vector_FLP.c +++ /dev/null @@ -1,56 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -/* multiply a vector by a constant */ -void silk_scale_vector_FLP( - silk_float *data1, - silk_float gain, - opus_int dataSize -) -{ - opus_int i, dataSize4; - - /* 4x unrolled loop */ - dataSize4 = dataSize & 0xFFFC; - for( i = 0; i < dataSize4; i += 4 ) { - data1[ i + 0 ] *= gain; - data1[ i + 1 ] *= gain; - data1[ i + 2 ] *= gain; - data1[ i + 3 ] *= gain; - } - - /* any remaining elements */ - for( ; i < dataSize; i++ ) { - data1[ i ] *= gain; - } -} diff --git a/thirdparty/opus/silk/float/schur_FLP.c b/thirdparty/opus/silk/float/schur_FLP.c deleted file mode 100644 index ee436f8351..0000000000 --- a/thirdparty/opus/silk/float/schur_FLP.c +++ /dev/null @@ -1,70 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FLP.h" - -silk_float silk_schur_FLP( /* O returns residual energy */ - silk_float refl_coef[], /* O reflection coefficients (length order) */ - const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */ - opus_int order /* I order */ -) -{ - opus_int k, n; - silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; - silk_float Ctmp1, Ctmp2, rc_tmp; - - silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); - - /* Copy correlations */ - for( k = 0; k < order+1; k++ ) { - C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ]; - } - - for( k = 0; k < order; k++ ) { - /* Get reflection coefficient */ - rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f ); - - /* Save the output */ - refl_coef[ k ] = rc_tmp; - - /* Update correlations */ - for( n = 0; n < order - k; n++ ) { - Ctmp1 = C[ n + k + 1 ][ 0 ]; - Ctmp2 = C[ n ][ 1 ]; - C[ n + k + 1 ][ 0 ] = Ctmp1 + Ctmp2 * rc_tmp; - C[ n ][ 1 ] = Ctmp2 + Ctmp1 * rc_tmp; - } - } - - /* Return residual energy */ - return C[ 0 ][ 1 ]; -} - diff --git a/thirdparty/opus/silk/float/solve_LS_FLP.c b/thirdparty/opus/silk/float/solve_LS_FLP.c deleted file mode 100644 index 7c90d665a0..0000000000 --- a/thirdparty/opus/silk/float/solve_LS_FLP.c +++ /dev/null @@ -1,207 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" -#include "tuning_parameters.h" - -/********************************************************************** - * LDL Factorisation. Finds the upper triangular matrix L and the diagonal - * Matrix D (only the diagonal elements returned in a vector)such that - * the symmetric matric A is given by A = L*D*L'. - **********************************************************************/ -static OPUS_INLINE void silk_LDL_FLP( - silk_float *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ - silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ -); - -/********************************************************************** - * Function to solve linear equation Ax = b, when A is a MxM lower - * triangular matrix, with ones on the diagonal. - **********************************************************************/ -static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -); - -/********************************************************************** - * Function to solve linear equation (A^T)x = b, when A is a MxM lower - * triangular, with ones on the diagonal. (ie then A^T is upper triangular) - **********************************************************************/ -static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -); - -/********************************************************************** - * Function to solve linear equation Ax = b, when A is a MxM - * symmetric square matrix - using LDL factorisation - **********************************************************************/ -void silk_solve_LDL_FLP( - silk_float *A, /* I/O Symmetric square matrix, out: reg. */ - const opus_int M, /* I Size of matrix */ - const silk_float *b, /* I Pointer to b vector */ - silk_float *x /* O Pointer to x solution vector */ -) -{ - opus_int i; - silk_float L[ MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ]; - silk_float T[ MAX_MATRIX_SIZE ]; - silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/ - - silk_assert( M <= MAX_MATRIX_SIZE ); - - /*************************************************** - Factorize A by LDL such that A = L*D*(L^T), - where L is lower triangular with ones on diagonal - ****************************************************/ - silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv ); - - /**************************************************** - * substitute D*(L^T) = T. ie: - L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b - ******************************************************/ - silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T ); - - /**************************************************** - D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is - diagonal just multiply with 1/d_i - ****************************************************/ - for( i = 0; i < M; i++ ) { - T[ i ] = T[ i ] * Dinv[ i ]; - } - /**************************************************** - x = inv(L') * inv(D) * T - *****************************************************/ - silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x ); -} - -static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -) -{ - opus_int i, j; - silk_float temp; - const silk_float *ptr1; - - for( i = M - 1; i >= 0; i-- ) { - ptr1 = matrix_adr( L, 0, i, M ); - temp = 0; - for( j = M - 1; j > i ; j-- ) { - temp += ptr1[ j * M ] * x[ j ]; - } - temp = b[ i ] - temp; - x[ i ] = temp; - } -} - -static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( - const silk_float *L, /* I Pointer to Lower Triangular Matrix */ - opus_int M, /* I Dim of Matrix equation */ - const silk_float *b, /* I b Vector */ - silk_float *x /* O x Vector */ -) -{ - opus_int i, j; - silk_float temp; - const silk_float *ptr1; - - for( i = 0; i < M; i++ ) { - ptr1 = matrix_adr( L, i, 0, M ); - temp = 0; - for( j = 0; j < i; j++ ) { - temp += ptr1[ j ] * x[ j ]; - } - temp = b[ i ] - temp; - x[ i ] = temp; - } -} - -static OPUS_INLINE void silk_LDL_FLP( - silk_float *A, /* I/O Pointer to Symetric Square Matrix */ - opus_int M, /* I Size of Matrix */ - silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ - silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ -) -{ - opus_int i, j, k, loop_count, err = 1; - silk_float *ptr1, *ptr2; - double temp, diag_min_value; - silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/ - - silk_assert( M <= MAX_MATRIX_SIZE ); - - diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] ); - for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) { - err = 0; - for( j = 0; j < M; j++ ) { - ptr1 = matrix_adr( L, j, 0, M ); - temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/ - for( i = 0; i < j; i++ ) { - v[ i ] = ptr1[ i ] * D[ i ]; - temp -= ptr1[ i ] * v[ i ]; - } - if( temp < diag_min_value ) { - /* Badly conditioned matrix: add white noise and run again */ - temp = ( loop_count + 1 ) * diag_min_value - temp; - for( i = 0; i < M; i++ ) { - matrix_ptr( A, i, i, M ) += ( silk_float )temp; - } - err = 1; - break; - } - D[ j ] = ( silk_float )temp; - Dinv[ j ] = ( silk_float )( 1.0f / temp ); - matrix_ptr( L, j, j, M ) = 1.0f; - - ptr1 = matrix_adr( A, j, 0, M ); - ptr2 = matrix_adr( L, j + 1, 0, M); - for( i = j + 1; i < M; i++ ) { - temp = 0.0; - for( k = 0; k < j; k++ ) { - temp += ptr2[ k ] * v[ k ]; - } - matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] ); - ptr2 += M; /* go to next column*/ - } - } - } - silk_assert( err == 0 ); -} - diff --git a/thirdparty/opus/silk/float/sort_FLP.c b/thirdparty/opus/silk/float/sort_FLP.c deleted file mode 100644 index f08d7592c5..0000000000 --- a/thirdparty/opus/silk/float/sort_FLP.c +++ /dev/null @@ -1,83 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Insertion sort (fast for already almost sorted arrays): */ -/* Best case: O(n) for an already sorted array */ -/* Worst case: O(n^2) for an inversely sorted array */ - -#include "typedef.h" -#include "SigProc_FLP.h" - -void silk_insertion_sort_decreasing_FLP( - silk_float *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -) -{ - silk_float value; - opus_int i, j; - - /* Safety checks */ - silk_assert( K > 0 ); - silk_assert( L > 0 ); - silk_assert( L >= K ); - - /* Write start indices in index vector */ - for( i = 0; i < K; i++ ) { - idx[ i ] = i; - } - - /* Sort vector elements by value, decreasing order */ - for( i = 1; i < K; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - - /* If less than L values are asked check the remaining values, */ - /* but only spend CPU to ensure that the K first values are correct */ - for( i = K; i < L; i++ ) { - value = a[ i ]; - if( value > a[ K - 1 ] ) { - for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - } -} diff --git a/thirdparty/opus/silk/float/structs_FLP.h b/thirdparty/opus/silk/float/structs_FLP.h deleted file mode 100644 index 14d647ced2..0000000000 --- a/thirdparty/opus/silk/float/structs_FLP.h +++ /dev/null @@ -1,132 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_STRUCTS_FLP_H -#define SILK_STRUCTS_FLP_H - -#include "typedef.h" -#include "main.h" -#include "structs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/********************************/ -/* Noise shaping analysis state */ -/********************************/ -typedef struct { - opus_int8 LastGainIndex; - silk_float HarmBoost_smth; - silk_float HarmShapeGain_smth; - silk_float Tilt_smth; -} silk_shape_state_FLP; - -/********************************/ -/* Prefilter state */ -/********************************/ -typedef struct { - silk_float sLTP_shp[ LTP_BUF_LENGTH ]; - silk_float sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; - opus_int sLTP_shp_buf_idx; - silk_float sLF_AR_shp; - silk_float sLF_MA_shp; - silk_float sHarmHP; - opus_int32 rand_seed; - opus_int lagPrev; -} silk_prefilter_state_FLP; - -/********************************/ -/* Encoder state FLP */ -/********************************/ -typedef struct { - silk_encoder_state sCmn; /* Common struct, shared with fixed-point code */ - silk_shape_state_FLP sShape; /* Noise shaping state */ - silk_prefilter_state_FLP sPrefilt; /* Prefilter State */ - - /* Buffer for find pitch and noise shape analysis */ - silk_float x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ - silk_float LTPCorr; /* Normalized correlation from pitch lag estimator */ -} silk_encoder_state_FLP; - -/************************/ -/* Encoder control FLP */ -/************************/ -typedef struct { - /* Prediction and coding parameters */ - silk_float Gains[ MAX_NB_SUBFR ]; - silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ]; /* holds interpolated and final coefficients */ - silk_float LTPCoef[LTP_ORDER * MAX_NB_SUBFR]; - silk_float LTP_scale; - opus_int pitchL[ MAX_NB_SUBFR ]; - - /* Noise shaping parameters */ - silk_float AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - silk_float AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - silk_float LF_MA_shp[ MAX_NB_SUBFR ]; - silk_float LF_AR_shp[ MAX_NB_SUBFR ]; - silk_float GainsPre[ MAX_NB_SUBFR ]; - silk_float HarmBoost[ MAX_NB_SUBFR ]; - silk_float Tilt[ MAX_NB_SUBFR ]; - silk_float HarmShapeGain[ MAX_NB_SUBFR ]; - silk_float Lambda; - silk_float input_quality; - silk_float coding_quality; - - /* Measures */ - silk_float sparseness; - silk_float predGain; - silk_float LTPredCodGain; - silk_float ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ - - /* Parameters for CBR mode */ - opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ]; - opus_int8 lastGainIndexPrev; -} silk_encoder_control_FLP; - -/************************/ -/* Encoder Super Struct */ -/************************/ -typedef struct { - silk_encoder_state_FLP state_Fxx[ ENCODER_NUM_CHANNELS ]; - stereo_enc_state sStereo; - opus_int32 nBitsUsedLBRR; - opus_int32 nBitsExceeded; - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int nPrevChannelsInternal; - opus_int timeSinceSwitchAllowed_ms; - opus_int allowBandwidthSwitch; - opus_int prev_decode_only_middle; -} silk_encoder; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c b/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c deleted file mode 100644 index 542414f48e..0000000000 --- a/thirdparty/opus/silk/float/warped_autocorrelation_FLP.c +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Autocorrelations for a warped frequency axis */ -void silk_warped_autocorrelation_FLP( - silk_float *corr, /* O Result [order + 1] */ - const silk_float *input, /* I Input data to correlate */ - const silk_float warping, /* I Warping coefficient */ - const opus_int length, /* I Length of input */ - const opus_int order /* I Correlation order (even) */ -) -{ - opus_int n, i; - double tmp1, tmp2; - double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; - double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; - - /* Order must be even */ - silk_assert( ( order & 1 ) == 0 ); - - /* Loop over samples */ - for( n = 0; n < length; n++ ) { - tmp1 = input[ n ]; - /* Loop over allpass sections */ - for( i = 0; i < order; i += 2 ) { - /* Output of allpass section */ - tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 ); - state[ i ] = tmp1; - C[ i ] += state[ 0 ] * tmp1; - /* Output of allpass section */ - tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 ); - state[ i + 1 ] = tmp2; - C[ i + 1 ] += state[ 0 ] * tmp2; - } - state[ order ] = tmp1; - C[ order ] += state[ 0 ] * tmp1; - } - - /* Copy correlations in silk_float output format */ - for( i = 0; i < order + 1; i++ ) { - corr[ i ] = ( silk_float )C[ i ]; - } -} diff --git a/thirdparty/opus/silk/float/wrappers_FLP.c b/thirdparty/opus/silk/float/wrappers_FLP.c deleted file mode 100644 index 6666b8efaa..0000000000 --- a/thirdparty/opus/silk/float/wrappers_FLP.c +++ /dev/null @@ -1,202 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main_FLP.h" - -/* Wrappers. Calls flp / fix code */ - -/* Convert AR filter coefficients to NLSF parameters */ -void silk_A2NLSF_FLP( - opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */ - const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -) -{ - opus_int i; - opus_int32 a_fix_Q16[ MAX_LPC_ORDER ]; - - for( i = 0; i < LPC_order; i++ ) { - a_fix_Q16[ i ] = silk_float2int( pAR[ i ] * 65536.0f ); - } - - silk_A2NLSF( NLSF_Q15, a_fix_Q16, LPC_order ); -} - -/* Convert LSF parameters to AR prediction filter coefficients */ -void silk_NLSF2A_FLP( - silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ - const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ - const opus_int LPC_order /* I LPC order */ -) -{ - opus_int i; - opus_int16 a_fix_Q12[ MAX_LPC_ORDER ]; - - silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order ); - - for( i = 0; i < LPC_order; i++ ) { - pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f ); - } -} - -/******************************************/ -/* Floating-point NLSF processing wrapper */ -/******************************************/ -void silk_process_NLSFs_FLP( - silk_encoder_state *psEncC, /* I/O Encoder state */ - silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -) -{ - opus_int i, j; - opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - - silk_process_NLSFs( psEncC, PredCoef_Q12, NLSF_Q15, prev_NLSF_Q15); - - for( j = 0; j < 2; j++ ) { - for( i = 0; i < psEncC->predictLPCOrder; i++ ) { - PredCoef[ j ][ i ] = ( silk_float )PredCoef_Q12[ j ][ i ] * ( 1.0f / 4096.0f ); - } - } -} - -/****************************************/ -/* Floating-point Silk NSQ wrapper */ -/****************************************/ -void silk_NSQ_wrapper_FLP( - silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ - silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ - SideInfoIndices *psIndices, /* I/O Quantization indices */ - silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const silk_float x[] /* I Prefiltered input signal */ -) -{ - opus_int i, j; - opus_int32 x_Q3[ MAX_FRAME_LENGTH ]; - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; - opus_int LTP_scale_Q14; - - /* Noise shaping parameters */ - opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ - opus_int Lambda_Q10; - opus_int Tilt_Q14[ MAX_NB_SUBFR ]; - opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; - - /* Convert control struct to fix control struct */ - /* Noise shape parameters */ - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) { - AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - LF_shp_Q14[ i ] = silk_LSHIFT32( silk_float2int( psEncCtrl->LF_AR_shp[ i ] * 16384.0f ), 16 ) | - (opus_uint16)silk_float2int( psEncCtrl->LF_MA_shp[ i ] * 16384.0f ); - Tilt_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->Tilt[ i ] * 16384.0f ); - HarmShapeGain_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->HarmShapeGain[ i ] * 16384.0f ); - } - Lambda_Q10 = ( opus_int )silk_float2int( psEncCtrl->Lambda * 1024.0f ); - - /* prediction and coding parameters */ - for( i = 0; i < psEnc->sCmn.nb_subfr * LTP_ORDER; i++ ) { - LTPCoef_Q14[ i ] = (opus_int16)silk_float2int( psEncCtrl->LTPCoef[ i ] * 16384.0f ); - } - - for( j = 0; j < 2; j++ ) { - for( i = 0; i < psEnc->sCmn.predictLPCOrder; i++ ) { - PredCoef_Q12[ j ][ i ] = (opus_int16)silk_float2int( psEncCtrl->PredCoef[ j ][ i ] * 4096.0f ); - } - } - - for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { - Gains_Q16[ i ] = silk_float2int( psEncCtrl->Gains[ i ] * 65536.0f ); - silk_assert( Gains_Q16[ i ] > 0 ); - } - - if( psIndices->signalType == TYPE_VOICED ) { - LTP_scale_Q14 = silk_LTPScales_table_Q14[ psIndices->LTP_scaleIndex ]; - } else { - LTP_scale_Q14 = 0; - } - - /* Convert input to fix */ - for( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] ); - } - - /* Call NSQ */ - if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { - silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); - } else { - silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); - } -} - -/***********************************************/ -/* Floating-point Silk LTP quantiation wrapper */ -/***********************************************/ -void silk_quant_LTP_gains_FLP( - silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ - opus_int8 *periodicity_index, /* O Periodicity index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ - const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ - const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -) -{ - opus_int i; - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ]; - opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ]; - - for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { - B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f ); - } - for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { - W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); - } - - silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch ); - - for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { - B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); - } -} diff --git a/thirdparty/opus/silk/gain_quant.c b/thirdparty/opus/silk/gain_quant.c deleted file mode 100644 index 64ccd0611b..0000000000 --- a/thirdparty/opus/silk/gain_quant.c +++ /dev/null @@ -1,141 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -#define OFFSET ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 ) -#define SCALE_Q16 ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) -#define INV_SCALE_Q16 ( ( 65536 * ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) / ( N_LEVELS_QGAIN - 1 ) ) - -/* Gain scalar quantization with hysteresis, uniform on log scale */ -void silk_gains_quant( - opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -) -{ - opus_int k, double_step_size_threshold; - - for( k = 0; k < nb_subfr; k++ ) { - /* Convert to log scale, scale, floor() */ - ind[ k ] = silk_SMULWB( SCALE_Q16, silk_lin2log( gain_Q16[ k ] ) - OFFSET ); - - /* Round towards previous quantized gain (hysteresis) */ - if( ind[ k ] < *prev_ind ) { - ind[ k ]++; - } - ind[ k ] = silk_LIMIT_int( ind[ k ], 0, N_LEVELS_QGAIN - 1 ); - - /* Compute delta indices and limit */ - if( k == 0 && conditional == 0 ) { - /* Full index */ - ind[ k ] = silk_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 ); - *prev_ind = ind[ k ]; - } else { - /* Delta index */ - ind[ k ] = ind[ k ] - *prev_ind; - - /* Double the quantization step size for large gain increases, so that the max gain level can be reached */ - double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; - if( ind[ k ] > double_step_size_threshold ) { - ind[ k ] = double_step_size_threshold + silk_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 ); - } - - ind[ k ] = silk_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT ); - - /* Accumulate deltas */ - if( ind[ k ] > double_step_size_threshold ) { - *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold; - } else { - *prev_ind += ind[ k ]; - } - - /* Shift to make non-negative */ - ind[ k ] -= MIN_DELTA_GAIN_QUANT; - } - - /* Scale and convert to linear scale */ - gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ - } -} - -/* Gains scalar dequantization, uniform on log scale */ -void silk_gains_dequant( - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -) -{ - opus_int k, ind_tmp, double_step_size_threshold; - - for( k = 0; k < nb_subfr; k++ ) { - if( k == 0 && conditional == 0 ) { - /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */ - *prev_ind = silk_max_int( ind[ k ], *prev_ind - 16 ); - } else { - /* Delta index */ - ind_tmp = ind[ k ] + MIN_DELTA_GAIN_QUANT; - - /* Accumulate deltas */ - double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; - if( ind_tmp > double_step_size_threshold ) { - *prev_ind += silk_LSHIFT( ind_tmp, 1 ) - double_step_size_threshold; - } else { - *prev_ind += ind_tmp; - } - } - *prev_ind = silk_LIMIT_int( *prev_ind, 0, N_LEVELS_QGAIN - 1 ); - - /* Scale and convert to linear scale */ - gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ - } -} - -/* Compute unique identifier of gain indices vector */ -opus_int32 silk_gains_ID( /* O returns unique identifier of gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - const opus_int nb_subfr /* I number of subframes */ -) -{ - opus_int k; - opus_int32 gainsID; - - gainsID = 0; - for( k = 0; k < nb_subfr; k++ ) { - gainsID = silk_ADD_LSHIFT32( ind[ k ], gainsID, 8 ); - } - - return gainsID; -} diff --git a/thirdparty/opus/silk/init_decoder.c b/thirdparty/opus/silk/init_decoder.c deleted file mode 100644 index f887c67886..0000000000 --- a/thirdparty/opus/silk/init_decoder.c +++ /dev/null @@ -1,56 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/************************/ -/* Init Decoder State */ -/************************/ -opus_int silk_init_decoder( - silk_decoder_state *psDec /* I/O Decoder state pointer */ -) -{ - /* Clear the entire encoder state, except anything copied */ - silk_memset( psDec, 0, sizeof( silk_decoder_state ) ); - - /* Used to deactivate LSF interpolation */ - psDec->first_frame_after_reset = 1; - psDec->prev_gain_Q16 = 65536; - - /* Reset CNG state */ - silk_CNG_Reset( psDec ); - - /* Reset PLC state */ - silk_PLC_Reset( psDec ); - - return(0); -} - diff --git a/thirdparty/opus/silk/init_encoder.c b/thirdparty/opus/silk/init_encoder.c deleted file mode 100644 index 65995c33fa..0000000000 --- a/thirdparty/opus/silk/init_encoder.c +++ /dev/null @@ -1,64 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif -#ifdef FIXED_POINT -#include "main_FIX.h" -#else -#include "main_FLP.h" -#endif -#include "tuning_parameters.h" -#include "cpu_support.h" - -/*********************************/ -/* Initialize Silk Encoder state */ -/*********************************/ -opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ - int arch /* I Run-time architecture */ -) -{ - opus_int ret = 0; - - /* Clear the entire encoder state */ - silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) ); - - psEnc->sCmn.arch = arch; - - psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 ); - psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15; - - /* Used to deactivate LSF interpolation, pitch prediction */ - psEnc->sCmn.first_frame_after_reset = 1; - - /* Initialize Silk VAD */ - ret += silk_VAD_Init( &psEnc->sCmn.sVAD ); - - return ret; -} diff --git a/thirdparty/opus/silk/inner_prod_aligned.c b/thirdparty/opus/silk/inner_prod_aligned.c deleted file mode 100644 index 257ae9e04e..0000000000 --- a/thirdparty/opus/silk/inner_prod_aligned.c +++ /dev/null @@ -1,47 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -opus_int32 silk_inner_prod_aligned_scale( - const opus_int16 *const inVec1, /* I input vector 1 */ - const opus_int16 *const inVec2, /* I input vector 2 */ - const opus_int scale, /* I number of bits to shift */ - const opus_int len /* I vector lengths */ -) -{ - opus_int i; - opus_int32 sum = 0; - for( i = 0; i < len; i++ ) { - sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale ); - } - return sum; -} diff --git a/thirdparty/opus/silk/interpolate.c b/thirdparty/opus/silk/interpolate.c deleted file mode 100644 index 1bd8ca4d53..0000000000 --- a/thirdparty/opus/silk/interpolate.c +++ /dev/null @@ -1,51 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Interpolate two vectors */ -void silk_interpolate( - opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */ - const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */ - const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */ - const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */ - const opus_int d /* I number of parameters */ -) -{ - opus_int i; - - silk_assert( ifact_Q2 >= 0 ); - silk_assert( ifact_Q2 <= 4 ); - - for( i = 0; i < d; i++ ) { - xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 ); - } -} diff --git a/thirdparty/opus/silk/lin2log.c b/thirdparty/opus/silk/lin2log.c deleted file mode 100644 index d4fe515321..0000000000 --- a/thirdparty/opus/silk/lin2log.c +++ /dev/null @@ -1,46 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -/* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */ -/* Convert input to a log scale */ -opus_int32 silk_lin2log( - const opus_int32 inLin /* I input in linear scale */ -) -{ - opus_int32 lz, frac_Q7; - - silk_CLZ_FRAC( inLin, &lz, &frac_Q7 ); - - /* Piece-wise parabolic approximation */ - return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 ); -} - diff --git a/thirdparty/opus/silk/log2lin.c b/thirdparty/opus/silk/log2lin.c deleted file mode 100644 index b7c48e4740..0000000000 --- a/thirdparty/opus/silk/log2lin.c +++ /dev/null @@ -1,58 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Approximation of 2^() (very close inverse of silk_lin2log()) */ -/* Convert input to a linear scale */ -opus_int32 silk_log2lin( - const opus_int32 inLog_Q7 /* I input on log scale */ -) -{ - opus_int32 out, frac_Q7; - - if( inLog_Q7 < 0 ) { - return 0; - } else if ( inLog_Q7 >= 3967 ) { - return silk_int32_MAX; - } - - out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) ); - frac_Q7 = inLog_Q7 & 0x7F; - if( inLog_Q7 < 2048 ) { - /* Piece-wise parabolic approximation */ - out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 ); - } else { - /* Piece-wise parabolic approximation */ - out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ); - } - return out; -} diff --git a/thirdparty/opus/silk/macros.h b/thirdparty/opus/silk/macros.h deleted file mode 100644 index d3ca347520..0000000000 --- a/thirdparty/opus/silk/macros.h +++ /dev/null @@ -1,159 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MACROS_H -#define SILK_MACROS_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "opus_types.h" -#include "opus_defines.h" -#include "arch.h" - -#if OPUS_GNUC_PREREQ(3, 0) -#define opus_likely(x) (__builtin_expect(!!(x), 1)) -#define opus_unlikely(x) (__builtin_expect(!!(x), 0)) -#else -#define opus_likely(x) (!!(x)) -#define opus_unlikely(x) (!!(x)) -#endif - -/* This is an OPUS_INLINE header file for general platform. */ - -/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ -#if OPUS_FAST_INT64 -#define silk_SMULWB(a32, b32) ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16)) -#else -#define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) -#endif - -/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ -#if OPUS_FAST_INT64 -#define silk_SMLAWB(a32, b32, c32) ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))) -#else -#define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) -#endif - -/* (a32 * (b32 >> 16)) >> 16 */ -#if OPUS_FAST_INT64 -#define silk_SMULWT(a32, b32) ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16)) -#else -#define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16)) -#endif - -/* a32 + (b32 * (c32 >> 16)) >> 16 */ -#if OPUS_FAST_INT64 -#define silk_SMLAWT(a32, b32, c32) ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))) -#else -#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16)) -#endif - -/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ -#define silk_SMULBB(a32, b32) ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32))) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ -#define silk_SMLABB(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) - -/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ -#define silk_SMULBT(a32, b32) ((opus_int32)((opus_int16)(a32)) * ((b32) >> 16)) - -/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ -#define silk_SMLABT(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16)) - -/* a64 + (b32 * c32) */ -#define silk_SMLAL(a64, b32, c32) (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32)))) - -/* (a32 * b32) >> 16 */ -#if OPUS_FAST_INT64 -#define silk_SMULWW(a32, b32) ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16)) -#else -#define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)) -#endif - -/* a32 + ((b32 * c32) >> 16) */ -#if OPUS_FAST_INT64 -#define silk_SMLAWW(a32, b32, c32) ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16))) -#else -#define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)) -#endif - -/* add/subtract with output saturated */ -#define silk_ADD_SAT32(a, b) ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ? \ - ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) : \ - ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) ) - -#define silk_SUB_SAT32(a, b) ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ? \ - (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ - ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) - -#if defined(MIPSr1_ASM) -#include "mips/macros_mipsr1.h" -#endif - -#include "ecintrin.h" -#ifndef OVERRIDE_silk_CLZ16 -static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) -{ - return 32 - EC_ILOG(in16<<16|0x8000); -} -#endif - -#ifndef OVERRIDE_silk_CLZ32 -static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) -{ - return in32 ? 32 - EC_ILOG(in32) : 32; -} -#endif - -/* Row based */ -#define matrix_ptr(Matrix_base_adr, row, column, N) \ - (*((Matrix_base_adr) + ((row)*(N)+(column)))) -#define matrix_adr(Matrix_base_adr, row, column, N) \ - ((Matrix_base_adr) + ((row)*(N)+(column))) - -/* Column based */ -#ifndef matrix_c_ptr -# define matrix_c_ptr(Matrix_base_adr, row, column, M) \ - (*((Matrix_base_adr) + ((row)+(M)*(column)))) -#endif - -#ifdef OPUS_ARM_INLINE_ASM -#include "arm/macros_armv4.h" -#endif - -#ifdef OPUS_ARM_INLINE_EDSP -#include "arm/macros_armv5e.h" -#endif - -#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR -#include "arm/macros_arm64.h" -#endif - -#endif /* SILK_MACROS_H */ - diff --git a/thirdparty/opus/silk/main.h b/thirdparty/opus/silk/main.h deleted file mode 100644 index 2f90d68f7d..0000000000 --- a/thirdparty/opus/silk/main.h +++ /dev/null @@ -1,471 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_MAIN_H -#define SILK_MAIN_H - -#include "SigProc_FIX.h" -#include "define.h" -#include "structs.h" -#include "tables.h" -#include "PLC.h" -#include "control.h" -#include "debug.h" -#include "entenc.h" -#include "entdec.h" - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -#include "x86/main_sse.h" -#endif - -/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ -void silk_stereo_LR_to_MS( - stereo_enc_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ - opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ - opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ - opus_int32 total_rate_bps, /* I Total bitrate */ - opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ - opus_int toMono, /* I Last frame before a stereo->mono transition */ - opus_int fs_kHz, /* I Sample rate (kHz) */ - opus_int frame_length /* I Number of samples */ -); - -/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ -void silk_stereo_MS_to_LR( - stereo_dec_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - const opus_int32 pred_Q13[], /* I Predictors */ - opus_int fs_kHz, /* I Samples rate (kHz) */ - opus_int frame_length /* I Number of samples */ -); - -/* Find least-squares prediction gain for one signal based on another and quantize it */ -opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */ - opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */ - const opus_int16 x[], /* I Basis signal */ - const opus_int16 y[], /* I Target signal */ - opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */ - opus_int length, /* I Number of samples */ - opus_int smooth_coef_Q16 /* I Smoothing coefficient */ -); - -/* Quantize mid/side predictors */ -void silk_stereo_quant_pred( - opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ - opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ -); - -/* Entropy code the mid/side quantization indices */ -void silk_stereo_encode_pred( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */ -); - -/* Entropy code the mid-only flag */ -void silk_stereo_encode_mid_only( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 mid_only_flag -); - -/* Decode mid/side predictors */ -void silk_stereo_decode_pred( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int32 pred_Q13[] /* O Predictors */ -); - -/* Decode mid-only flag */ -void silk_stereo_decode_mid_only( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int *decode_only_mid /* O Flag that only mid channel has been coded */ -); - -/* Encodes signs of excitation */ -void silk_encode_signs( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - const opus_int8 pulses[], /* I pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -); - -/* Decodes signs of excitation */ -void silk_decode_signs( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* I/O pulse signal */ - opus_int length, /* I length of input */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I Quantization offset type */ - const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ -); - -/* Check encoder control struct */ -opus_int check_control_input( - silk_EncControlStruct *encControl /* I Control structure */ -); - -/* Control internal sampling rate */ -opus_int silk_control_audio_bandwidth( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - silk_EncControlStruct *encControl /* I Control structure */ -); - -/* Control SNR of redidual quantizer */ -opus_int silk_control_SNR( - silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ - opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ -); - -/***************/ -/* Shell coder */ -/***************/ - -/* Encode quantization indices of excitation */ -void silk_encode_pulses( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int signalType, /* I Signal type */ - const opus_int quantOffsetType, /* I quantOffsetType */ - opus_int8 pulses[], /* I quantization indices */ - const opus_int frame_length /* I Frame length */ -); - -/* Shell encoder, operates on one shell code frame of 16 pulses */ -void silk_shell_encoder( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */ -); - -/* Shell decoder, operates on one shell code frame of 16 pulses */ -void silk_shell_decoder( - opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - const opus_int pulses4 /* I number of pulses per pulse-subframe */ -); - -/* Gain scalar quantization with hysteresis, uniform on log scale */ -void silk_gains_quant( - opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -); - -/* Gains scalar dequantization, uniform on log scale */ -void silk_gains_dequant( - opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - opus_int8 *prev_ind, /* I/O last index in previous frame */ - const opus_int conditional, /* I first gain is delta coded if 1 */ - const opus_int nb_subfr /* I number of subframes */ -); - -/* Compute unique identifier of gain indices vector */ -opus_int32 silk_gains_ID( /* O returns unique identifier of gains */ - const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ - const opus_int nb_subfr /* I number of subframes */ -); - -/* Interpolate two vectors */ -void silk_interpolate( - opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */ - const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */ - const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */ - const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */ - const opus_int d /* I number of parameters */ -); - -/* LTP tap quantizer */ -void silk_quant_LTP_gains( - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ - opus_int8 *periodicity_index, /* O Periodicity Index */ - opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ - const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ - opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ - opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -); - -/* Entropy constrained matrix-weighted VQ, for a single input data vector */ -void silk_VQ_WMat_EC_c( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -); - -#if !defined(OVERRIDE_silk_VQ_WMat_EC) -#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_c(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) -#endif - -/************************************/ -/* Noise shaping quantization (NSQ) */ -/************************************/ - -void silk_NSQ_c( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if !defined(OVERRIDE_silk_NSQ) -#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#endif - -/* Noise shaping using delayed decision */ -void silk_NSQ_del_dec_c( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if !defined(OVERRIDE_silk_NSQ_del_dec) -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) -#endif - -/************/ -/* Silk VAD */ -/************/ -/* Initialize the Silk VAD */ -opus_int silk_VAD_Init( /* O Return value, 0 if success */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -); - -/* Get speech activity level in Q8 */ -opus_int silk_VAD_GetSA_Q8_c( /* O Return value, 0 if success */ - silk_encoder_state *psEncC, /* I/O Encoder state */ - const opus_int16 pIn[] /* I PCM input */ -); - -#if !defined(OVERRIDE_silk_VAD_GetSA_Q8) -#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_c(psEnC, pIn)) -#endif - -/* Low-pass filter with variable cutoff frequency based on */ -/* piece-wise linear interpolation between elliptic filters */ -/* Start by setting transition_frame_no = 1; */ -void silk_LP_variable_cutoff( - silk_LP_state *psLP, /* I/O LP filter state */ - opus_int16 *frame, /* I/O Low-pass filtered output signal */ - const opus_int frame_length /* I Frame length */ -); - -/******************/ -/* NLSF Quantizer */ -/******************/ -/* Limit, stabilize, convert and quantize NLSFs */ -void silk_process_NLSFs( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -); - -opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ - const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ - const opus_int nSurvivors, /* I Max survivors after first stage */ - const opus_int signalType /* I Signal type: 0/1/2 */ -); - -/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ -void silk_NLSF_VQ( - opus_int32 err_Q26[], /* O Quantization errors [K] */ - const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ - const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ - const opus_int K, /* I Number of codebook vectors */ - const opus_int LPC_order /* I Number of LPCs */ -); - -/* Delayed-decision quantizer for NLSF residuals */ -opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */ - opus_int8 indices[], /* O Quantization indices [ order ] */ - const opus_int16 x_Q10[], /* I Input [ order ] */ - const opus_int16 w_Q5[], /* I Weights [ order ] */ - const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ - const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */ - const opus_uint8 ec_rates_Q5[], /* I Rates [] */ - const opus_int quant_step_size_Q16, /* I Quantization step size */ - const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */ - const opus_int32 mu_Q20, /* I R/D tradeoff */ - const opus_int16 order /* I Number of input values */ -); - -/* Unpack predictor values and indices for entropy coding tables */ -void silk_NLSF_unpack( - opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */ - opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */ - const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ - const opus_int CB1_index /* I Index of vector in first LSF codebook */ -); - -/***********************/ -/* NLSF vector decoder */ -/***********************/ -void silk_NLSF_decode( - opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ - opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ - const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ -); - -/****************************************************/ -/* Decoder Functions */ -/****************************************************/ -opus_int silk_init_decoder( - silk_decoder_state *psDec /* I/O Decoder state pointer */ -); - -/* Set decoder sampling rate */ -opus_int silk_decoder_set_fs( - silk_decoder_state *psDec, /* I/O Decoder state pointer */ - opus_int fs_kHz, /* I Sampling frequency (kHz) */ - opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */ -); - -/****************/ -/* Decode frame */ -/****************/ -opus_int silk_decode_frame( - silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pOut[], /* O Pointer to output speech frame */ - opus_int32 *pN, /* O Pointer to size of output frame */ - opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ - opus_int condCoding, /* I The type of conditional coding to use */ - int arch /* I Run-time architecture */ -); - -/* Decode indices from bitstream */ -void silk_decode_indices( - silk_decoder_state *psDec, /* I/O State */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* Decode parameters from payload */ -void silk_decode_parameters( - silk_decoder_state *psDec, /* I/O State */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -/* Core decoder. Performs inverse NSQ operation LTP + LPC */ -void silk_decode_core( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I Decoder control */ - opus_int16 xq[], /* O Decoded speech */ - const opus_int16 pulses[ MAX_FRAME_LENGTH ], /* I Pulse signal */ - int arch /* I Run-time architecture */ -); - -/* Decode quantization indices of excitation (Shell coding) */ -void silk_decode_pulses( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int16 pulses[], /* O Excitation signal */ - const opus_int signalType, /* I Sigtype */ - const opus_int quantOffsetType, /* I quantOffsetType */ - const opus_int frame_length /* I Frame length */ -); - -/******************/ -/* CNG */ -/******************/ - -/* Reset CNG */ -void silk_CNG_Reset( - silk_decoder_state *psDec /* I/O Decoder state */ -); - -/* Updates CNG estimate, and applies the CNG when packet was lost */ -void silk_CNG( - silk_decoder_state *psDec, /* I/O Decoder state */ - silk_decoder_control *psDecCtrl, /* I/O Decoder control */ - opus_int16 frame[], /* I/O Signal */ - opus_int length /* I Length of residual */ -); - -/* Encoding of various parameters */ -void silk_encode_indices( - silk_encoder_state *psEncC, /* I/O Encoder state */ - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int FrameIndex, /* I Frame number */ - opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ - opus_int condCoding /* I The type of conditional coding to use */ -); - -#endif diff --git a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h b/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h deleted file mode 100644 index ad1cfe2a9b..0000000000 --- a/thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h +++ /dev/null @@ -1,409 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef __NSQ_DEL_DEC_MIPSR1_H__ -#define __NSQ_DEL_DEC_MIPSR1_H__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -#define OVERRIDE_silk_noise_shape_quantizer_del_dec -static inline void silk_noise_shape_quantizer_del_dec( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay, /* I */ - int arch /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; - NSQ_sample_struct psSampleState[ MAX_DEL_DEC_STATES ][ 2 ]; - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4; - opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6; - opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13; - opus_int16 a_Q12_14, a_Q12_15; - - opus_int32 cur, prev, next; - - /*Unused.*/ - (void)arch; - - //Intialize b_Q14 variables - b_Q14_0 = b_Q14[ 0 ]; - b_Q14_1 = b_Q14[ 1 ]; - b_Q14_2 = b_Q14[ 2 ]; - b_Q14_3 = b_Q14[ 3 ]; - b_Q14_4 = b_Q14[ 4 ]; - - //Intialize a_Q12 variables - a_Q12_0 = a_Q12[0]; - a_Q12_1 = a_Q12[1]; - a_Q12_2 = a_Q12[2]; - a_Q12_3 = a_Q12[3]; - a_Q12_4 = a_Q12[4]; - a_Q12_5 = a_Q12[5]; - a_Q12_6 = a_Q12[6]; - a_Q12_7 = a_Q12[7]; - a_Q12_8 = a_Q12[8]; - a_Q12_9 = a_Q12[9]; - a_Q12_10 = a_Q12[10]; - a_Q12_11 = a_Q12[11]; - a_Q12_12 = a_Q12[12]; - a_Q12_13 = a_Q12[13]; - a_Q12_14 = a_Q12[14]; - a_Q12_15 = a_Q12[15]; - - long long temp64; - - silk_assert( nStatesDelayedDecision > 0 ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 ); - temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 ); - temp64 += 32768; - LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - temp64 = __builtin_mips_mult(psLPC_Q14[ 0 ], a_Q12_0 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 ); - if( predictLPCOrder == 16 ) { - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 ); - temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 ); - } - temp64 += 32768; - LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16); - - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - - temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] ); - - prev = psDD->sAR2_Q14[ 1 ]; - - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - cur = psDD->sAR2_Q14[ j ]; - next = psDD->sAR2_Q14[ j+1 ]; - /* Output of allpass section */ - tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] ); - temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - prev = next; - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - temp64 += 32768; - n_AR_Q14 = __builtin_mips_extr_w(temp64, 16); - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } -} - -#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/mips/macros_mipsr1.h b/thirdparty/opus/silk/mips/macros_mipsr1.h deleted file mode 100644 index 12ed981a6e..0000000000 --- a/thirdparty/opus/silk/mips/macros_mipsr1.h +++ /dev/null @@ -1,92 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - - -#ifndef __SILK_MACROS_MIPSR1_H__ -#define __SILK_MACROS_MIPSR1_H__ - -#define mips_clz(x) __builtin_clz(x) - -#undef silk_SMULWB -static inline int silk_SMULWB(int a, int b) -{ - long long ac; - int c; - - ac = __builtin_mips_mult(a, (opus_int32)(opus_int16)b); - c = __builtin_mips_extr_w(ac, 16); - - return c; -} - -#undef silk_SMLAWB -#define silk_SMLAWB(a32, b32, c32) ((a32) + silk_SMULWB(b32, c32)) - -#undef silk_SMULWW -static inline int silk_SMULWW(int a, int b) -{ - long long ac; - int c; - - ac = __builtin_mips_mult(a, b); - c = __builtin_mips_extr_w(ac, 16); - - return c; -} - -#undef silk_SMLAWW -static inline int silk_SMLAWW(int a, int b, int c) -{ - long long ac; - int res; - - ac = __builtin_mips_mult(b, c); - res = __builtin_mips_extr_w(ac, 16); - res += a; - - return res; -} - -#define OVERRIDE_silk_CLZ16 -static inline opus_int32 silk_CLZ16(opus_int16 in16) -{ - int re32; - opus_int32 in32 = (opus_int32 )in16; - re32 = mips_clz(in32); - re32-=16; - return re32; -} - -#define OVERRIDE_silk_CLZ32 -static inline opus_int32 silk_CLZ32(opus_int32 in32) -{ - int re32; - re32 = mips_clz(in32); - return re32; -} - -#endif /* __SILK_MACROS_MIPSR1_H__ */ diff --git a/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h b/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h deleted file mode 100644 index 3b0a695365..0000000000 --- a/thirdparty/opus/silk/mips/sigproc_fix_mipsr1.h +++ /dev/null @@ -1,65 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_SIGPROC_FIX_MIPSR1_H -#define SILK_SIGPROC_FIX_MIPSR1_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#undef silk_SAT16 -static inline short int silk_SAT16(int a) -{ - int c; - c = __builtin_mips_shll_s_w(a, 16); - c = c>>16; - - return c; -} - -#undef silk_LSHIFT_SAT32 -static inline int silk_LSHIFT_SAT32(int a, int shift) -{ - int r; - - r = __builtin_mips_shll_s_w(a, shift); - - return r; -} - -#undef silk_RSHIFT_ROUND -static inline int silk_RSHIFT_ROUND(int a, int shift) -{ - int r; - - r = __builtin_mips_shra_r_w(a, shift); - return r; -} - -#endif /* SILK_SIGPROC_FIX_MIPSR1_H */ diff --git a/thirdparty/opus/silk/pitch_est_defines.h b/thirdparty/opus/silk/pitch_est_defines.h deleted file mode 100644 index e1e4b5d768..0000000000 --- a/thirdparty/opus/silk/pitch_est_defines.h +++ /dev/null @@ -1,88 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_PE_DEFINES_H -#define SILK_PE_DEFINES_H - -#include "SigProc_FIX.h" - -/********************************************************/ -/* Definitions for pitch estimator */ -/********************************************************/ - -#define PE_MAX_FS_KHZ 16 /* Maximum sampling frequency used */ - -#define PE_MAX_NB_SUBFR 4 -#define PE_SUBFR_LENGTH_MS 5 /* 5 ms */ - -#define PE_LTP_MEM_LENGTH_MS ( 4 * PE_SUBFR_LENGTH_MS ) - -#define PE_MAX_FRAME_LENGTH_MS ( PE_LTP_MEM_LENGTH_MS + PE_MAX_NB_SUBFR * PE_SUBFR_LENGTH_MS ) -#define PE_MAX_FRAME_LENGTH ( PE_MAX_FRAME_LENGTH_MS * PE_MAX_FS_KHZ ) -#define PE_MAX_FRAME_LENGTH_ST_1 ( PE_MAX_FRAME_LENGTH >> 2 ) -#define PE_MAX_FRAME_LENGTH_ST_2 ( PE_MAX_FRAME_LENGTH >> 1 ) - -#define PE_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */ -#define PE_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */ -#define PE_MAX_LAG ( PE_MAX_LAG_MS * PE_MAX_FS_KHZ ) -#define PE_MIN_LAG ( PE_MIN_LAG_MS * PE_MAX_FS_KHZ ) - -#define PE_D_SRCH_LENGTH 24 - -#define PE_NB_STAGE3_LAGS 5 - -#define PE_NB_CBKS_STAGE2 3 -#define PE_NB_CBKS_STAGE2_EXT 11 - -#define PE_NB_CBKS_STAGE3_MAX 34 -#define PE_NB_CBKS_STAGE3_MID 24 -#define PE_NB_CBKS_STAGE3_MIN 16 - -#define PE_NB_CBKS_STAGE3_10MS 12 -#define PE_NB_CBKS_STAGE2_10MS 3 - -#define PE_SHORTLAG_BIAS 0.2f /* for logarithmic weighting */ -#define PE_PREVLAG_BIAS 0.2f /* for logarithmic weighting */ -#define PE_FLATCONTOUR_BIAS 0.05f - -#define SILK_PE_MIN_COMPLEX 0 -#define SILK_PE_MID_COMPLEX 1 -#define SILK_PE_MAX_COMPLEX 2 - -/* Tables for 20 ms frames */ -extern const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ]; -extern const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ]; -extern const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ]; -extern const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ]; - -/* Tables for 10 ms frames */ -extern const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ 3 ]; -extern const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 12 ]; -extern const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ]; - -#endif - diff --git a/thirdparty/opus/silk/pitch_est_tables.c b/thirdparty/opus/silk/pitch_est_tables.c deleted file mode 100644 index 81a8bacaca..0000000000 --- a/thirdparty/opus/silk/pitch_est_tables.c +++ /dev/null @@ -1,99 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "typedef.h" -#include "pitch_est_defines.h" - -const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] = -{ - {0, 1, 0}, - {0, 0, 1} -}; - -const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ PE_NB_CBKS_STAGE3_10MS ] = -{ - { 0, 0, 1,-1, 1,-1, 2,-2, 2,-2, 3,-3}, - { 0, 1, 0, 1,-1, 2,-1, 2,-2, 3,-2, 3} -}; - -const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ] = -{ - {-3, 7}, - {-2, 7} -}; - -const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ] = -{ - {0, 2,-1,-1,-1, 0, 0, 1, 1, 0, 1}, - {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, - {0,-1, 2, 1, 0, 1, 1, 0, 0,-1,-1} -}; - -const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ] = -{ - {0, 0, 1,-1, 0, 1,-1, 0,-1, 1,-2, 2,-2,-2, 2,-3, 2, 3,-3,-4, 3,-4, 4, 4,-5, 5,-6,-5, 6,-7, 6, 5, 8,-9}, - {0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0, 0, 1,-1, 0, 1,-1,-1, 1,-1, 2, 1,-1, 2,-2,-2, 2,-2, 2, 2, 3,-3}, - {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,-1, 1, 0, 0, 2, 1,-1, 2,-1,-1, 2,-1, 2, 2,-1, 3,-2,-2,-2, 3}, - {0, 1, 0, 0, 1, 0, 1,-1, 2,-1, 2,-1, 2, 3,-2, 3,-2,-2, 4, 4,-3, 5,-3,-4, 6,-4, 6, 5,-5, 8,-6,-5,-7, 9} -}; - -const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ] = -{ - /* Lags to search for low number of stage3 cbks */ - { - {-5,8}, - {-1,6}, - {-1,6}, - {-4,10} - }, - /* Lags to search for middle number of stage3 cbks */ - { - {-6,10}, - {-2,6}, - {-1,6}, - {-5,10} - }, - /* Lags to search for max number of stage3 cbks */ - { - {-9,12}, - {-3,7}, - {-2,7}, - {-7,13} - } -}; - -const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ] = -{ - PE_NB_CBKS_STAGE3_MIN, - PE_NB_CBKS_STAGE3_MID, - PE_NB_CBKS_STAGE3_MAX -}; diff --git a/thirdparty/opus/silk/process_NLSFs.c b/thirdparty/opus/silk/process_NLSFs.c deleted file mode 100644 index 0ab71f0163..0000000000 --- a/thirdparty/opus/silk/process_NLSFs.c +++ /dev/null @@ -1,107 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Limit, stabilize, convert and quantize NLSFs */ -void silk_process_NLSFs( - silk_encoder_state *psEncC, /* I/O Encoder state */ - opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ - opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ - const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ -) -{ - opus_int i, doInterpolate; - opus_int NLSF_mu_Q20; - opus_int16 i_sqr_Q15; - opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ]; - opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ]; - opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ]; - - silk_assert( psEncC->speech_activity_Q8 >= 0 ); - silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) ); - silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) ); - - /***********************/ - /* Calculate mu values */ - /***********************/ - /* NLSF_mu = 0.003 - 0.0015 * psEnc->speech_activity; */ - NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 ); - if( psEncC->nb_subfr == 2 ) { - /* Multiply by 1.5 for 10 ms packets */ - NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 ); - } - - silk_assert( NLSF_mu_Q20 > 0 ); - silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) ); - - /* Calculate NLSF weights */ - silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder ); - - /* Update NLSF weights for interpolated NLSFs */ - doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 ); - if( doInterpolate ) { - /* Calculate the interpolated NLSF vector for the first half */ - silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15, - psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); - - /* Calculate first half NLSF weights for the interpolated NLSFs */ - silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder ); - - /* Update NLSF weights with contribution from first half */ - i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 ); - for( i = 0; i < psEncC->predictLPCOrder; i++ ) { - pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT( - silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) ); - silk_assert( pNLSFW_QW[ i ] >= 1 ); - } - } - - silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW, - NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType ); - - /* Convert quantized NLSFs back to LPC coefficients */ - silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder ); - - if( doInterpolate ) { - /* Calculate the interpolated, quantized LSF vector for the first half */ - silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15, - psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); - - /* Convert back to LPC coefficients */ - silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder ); - - } else { - /* Copy LPC coefficients for first half from second half */ - silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER ); - silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) ); - } -} diff --git a/thirdparty/opus/silk/quant_LTP_gains.c b/thirdparty/opus/silk/quant_LTP_gains.c deleted file mode 100644 index 513a8c4468..0000000000 --- a/thirdparty/opus/silk/quant_LTP_gains.c +++ /dev/null @@ -1,129 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "tuning_parameters.h" - -void silk_quant_LTP_gains( - opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ - opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ - opus_int8 *periodicity_index, /* O Periodicity Index */ - opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ - const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ - opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ - opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr, /* I number of subframes */ - int arch /* I Run-time architecture */ -) -{ - opus_int j, k, cbk_size; - opus_int8 temp_idx[ MAX_NB_SUBFR ]; - const opus_uint8 *cl_ptr_Q5; - const opus_int8 *cbk_ptr_Q7; - const opus_uint8 *cbk_gain_ptr_Q7; - const opus_int16 *b_Q14_ptr; - const opus_int32 *W_Q18_ptr; - opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; - opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; - - /***************************************************/ - /* iterate over different codebooks with different */ - /* rates/distortions, and choose best */ - /***************************************************/ - min_rate_dist_Q14 = silk_int32_MAX; - best_sum_log_gain_Q7 = 0; - for( k = 0; k < 3; k++ ) { - /* Safety margin for pitch gain control, to take into account factors - such as state rescaling/rewhitening. */ - opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 ); - - cl_ptr_Q5 = silk_LTP_gain_BITS_Q5_ptrs[ k ]; - cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ k ]; - cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ]; - cbk_size = silk_LTP_vq_sizes[ k ]; - - /* Set up pointer to first subframe */ - W_Q18_ptr = W_Q18; - b_Q14_ptr = B_Q14; - - rate_dist_Q14 = 0; - sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; - for( j = 0; j < nb_subfr; j++ ) { - max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) - + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; - - silk_VQ_WMat_EC( - &temp_idx[ j ], /* O index of best codebook vector */ - &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */ - &gain_Q7, /* O sum of absolute LTP coefficients */ - b_Q14_ptr, /* I input vector to be quantized */ - W_Q18_ptr, /* I weighting matrix */ - cbk_ptr_Q7, /* I codebook */ - cbk_gain_ptr_Q7, /* I codebook effective gains */ - cl_ptr_Q5, /* I code length for each codebook vector */ - mu_Q9, /* I tradeoff between weighted error and rate */ - max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - cbk_size, /* I number of vectors in codebook */ - arch /* I Run-time architecture */ - ); - - rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr ); - sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7 - + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 )); - - b_Q14_ptr += LTP_ORDER; - W_Q18_ptr += LTP_ORDER * LTP_ORDER; - } - - /* Avoid never finding a codebook */ - rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 ); - - if( rate_dist_Q14 < min_rate_dist_Q14 ) { - min_rate_dist_Q14 = rate_dist_Q14; - *periodicity_index = (opus_int8)k; - silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) ); - best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; - } - - /* Break early in low-complexity mode if rate distortion is below threshold */ - if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) { - break; - } - } - - cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ]; - for( j = 0; j < nb_subfr; j++ ) { - for( k = 0; k < LTP_ORDER; k++ ) { - B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 ); - } - } - *sum_log_gain_Q7 = best_sum_log_gain_Q7; -} diff --git a/thirdparty/opus/silk/resampler.c b/thirdparty/opus/silk/resampler.c deleted file mode 100644 index 374fbb3722..0000000000 --- a/thirdparty/opus/silk/resampler.c +++ /dev/null @@ -1,215 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* - * Matrix of resampling methods used: - * Fs_out (kHz) - * 8 12 16 24 48 - * - * 8 C UF U UF UF - * 12 AF C UF U UF - * Fs_in (kHz) 16 D AF C UF UF - * 24 AF D AF C U - * 48 AF AF AF D C - * - * C -> Copy (no resampling) - * D -> Allpass-based 2x downsampling - * U -> Allpass-based 2x upsampling - * UF -> Allpass-based 2x upsampling followed by FIR interpolation - * AF -> AR2 filter followed by FIR interpolation - */ - -#include "resampler_private.h" - -/* Tables with delay compensation values to equalize total delay for different modes */ -static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = { -/* in \ out 8 12 16 */ -/* 8 */ { 6, 0, 3 }, -/* 12 */ { 0, 7, 3 }, -/* 16 */ { 0, 1, 10 }, -/* 24 */ { 0, 2, 6 }, -/* 48 */ { 18, 10, 12 } -}; - -static const opus_int8 delay_matrix_dec[ 3 ][ 5 ] = { -/* in \ out 8 12 16 24 48 */ -/* 8 */ { 4, 0, 2, 0, 0 }, -/* 12 */ { 0, 9, 4, 7, 4 }, -/* 16 */ { 0, 3, 12, 7, 7 } -}; - -/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0, 1, 2, 3, 4] */ -#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 ) - -#define USE_silk_resampler_copy (0) -#define USE_silk_resampler_private_up2_HQ_wrapper (1) -#define USE_silk_resampler_private_IIR_FIR (2) -#define USE_silk_resampler_private_down_FIR (3) - -/* Initialize/reset the resampler state for a given pair of input/output sampling rates */ -opus_int silk_resampler_init( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ - opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ - opus_int forEnc /* I If 1: encoder; if 0: decoder */ -) -{ - opus_int up2x; - - /* Clear state */ - silk_memset( S, 0, sizeof( silk_resampler_state_struct ) ); - - /* Input checking */ - if( forEnc ) { - if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) || - ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) { - silk_assert( 0 ); - return -1; - } - S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; - } else { - if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 ) || - ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) { - silk_assert( 0 ); - return -1; - } - S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; - } - - S->Fs_in_kHz = silk_DIV32_16( Fs_Hz_in, 1000 ); - S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 ); - - /* Number of samples processed per batch */ - S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS; - - /* Find resampler with the right sampling ratio */ - up2x = 0; - if( Fs_Hz_out > Fs_Hz_in ) { - /* Upsample */ - if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 1 */ - /* Special case: directly use 2x upsampler */ - S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper; - } else { - /* Default resampler */ - S->resampler_function = USE_silk_resampler_private_IIR_FIR; - up2x = 1; - } - } else if ( Fs_Hz_out < Fs_Hz_in ) { - /* Downsample */ - S->resampler_function = USE_silk_resampler_private_down_FIR; - if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) { /* Fs_out : Fs_in = 3 : 4 */ - S->FIR_Fracs = 3; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; - S->Coefs = silk_Resampler_3_4_COEFS; - } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 3 */ - S->FIR_Fracs = 2; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; - S->Coefs = silk_Resampler_2_3_COEFS; - } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 2 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1; - S->Coefs = silk_Resampler_1_2_COEFS; - } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 3 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; - S->Coefs = silk_Resampler_1_3_COEFS; - } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 4 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; - S->Coefs = silk_Resampler_1_4_COEFS; - } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 6 */ - S->FIR_Fracs = 1; - S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; - S->Coefs = silk_Resampler_1_6_COEFS; - } else { - /* None available */ - silk_assert( 0 ); - return -1; - } - } else { - /* Input and output sampling rates are equal: copy */ - S->resampler_function = USE_silk_resampler_copy; - } - - /* Ratio of input/output samples */ - S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 ); - /* Make sure the ratio is rounded up */ - while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) { - S->invRatio_Q16++; - } - - return 0; -} - -/* Resampler: convert from one sampling rate to another */ -/* Input and output sampling rate are at most 48000 Hz */ -opus_int silk_resampler( - silk_resampler_state_struct *S, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -) -{ - opus_int nSamples; - - /* Need at least 1 ms of input data */ - silk_assert( inLen >= S->Fs_in_kHz ); - /* Delay can't exceed the 1 ms of buffering */ - silk_assert( S->inputDelay <= S->Fs_in_kHz ); - - nSamples = S->Fs_in_kHz - S->inputDelay; - - /* Copy to delay buffer */ - silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) ); - - switch( S->resampler_function ) { - case USE_silk_resampler_private_up2_HQ_wrapper: - silk_resampler_private_up2_HQ_wrapper( S, out, S->delayBuf, S->Fs_in_kHz ); - silk_resampler_private_up2_HQ_wrapper( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); - break; - case USE_silk_resampler_private_IIR_FIR: - silk_resampler_private_IIR_FIR( S, out, S->delayBuf, S->Fs_in_kHz ); - silk_resampler_private_IIR_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); - break; - case USE_silk_resampler_private_down_FIR: - silk_resampler_private_down_FIR( S, out, S->delayBuf, S->Fs_in_kHz ); - silk_resampler_private_down_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); - break; - default: - silk_memcpy( out, S->delayBuf, S->Fs_in_kHz * sizeof( opus_int16 ) ); - silk_memcpy( &out[ S->Fs_out_kHz ], &in[ nSamples ], ( inLen - S->Fs_in_kHz ) * sizeof( opus_int16 ) ); - } - - /* Copy to delay buffer */ - silk_memcpy( S->delayBuf, &in[ inLen - S->inputDelay ], S->inputDelay * sizeof( opus_int16 ) ); - - return 0; -} diff --git a/thirdparty/opus/silk/resampler_down2.c b/thirdparty/opus/silk/resampler_down2.c deleted file mode 100644 index cec3634640..0000000000 --- a/thirdparty/opus/silk/resampler_down2.c +++ /dev/null @@ -1,74 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_rom.h" - -/* Downsample by a factor 2 */ -void silk_resampler_down2( - opus_int32 *S, /* I/O State vector [ 2 ] */ - opus_int16 *out, /* O Output signal [ floor(len/2) ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 inLen /* I Number of input samples */ -) -{ - opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 ); - opus_int32 in32, out32, Y, X; - - silk_assert( silk_resampler_down2_0 > 0 ); - silk_assert( silk_resampler_down2_1 < 0 ); - - /* Internal variables and state are in Q10 format */ - for( k = 0; k < len2; k++ ) { - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); - - /* All-pass section for even input sample */ - Y = silk_SUB32( in32, S[ 0 ] ); - X = silk_SMLAWB( Y, Y, silk_resampler_down2_1 ); - out32 = silk_ADD32( S[ 0 ], X ); - S[ 0 ] = silk_ADD32( in32, X ); - - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); - - /* All-pass section for odd input sample, and add to output of previous section */ - Y = silk_SUB32( in32, S[ 1 ] ); - X = silk_SMULWB( Y, silk_resampler_down2_0 ); - out32 = silk_ADD32( out32, S[ 1 ] ); - out32 = silk_ADD32( out32, X ); - S[ 1 ] = silk_ADD32( in32, X ); - - /* Add, convert back to int16 and store to output */ - out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32, 11 ) ); - } -} - diff --git a/thirdparty/opus/silk/resampler_down2_3.c b/thirdparty/opus/silk/resampler_down2_3.c deleted file mode 100644 index 4342614dcc..0000000000 --- a/thirdparty/opus/silk/resampler_down2_3.c +++ /dev/null @@ -1,103 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" -#include "stack_alloc.h" - -#define ORDER_FIR 4 - -/* Downsample by a factor 2/3, low quality */ -void silk_resampler_down2_3( - opus_int32 *S, /* I/O State vector [ 6 ] */ - opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ - const opus_int16 *in, /* I Input signal [ inLen ] */ - opus_int32 inLen /* I Number of input samples */ -) -{ - opus_int32 nSamplesIn, counter, res_Q6; - VARDECL( opus_int32, buf ); - opus_int32 *buf_ptr; - SAVE_STACK; - - ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 ); - - /* Copy buffered samples to start of buffer */ - silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) ); - - /* Iterate over blocks of frameSizeIn input samples */ - while( 1 ) { - nSamplesIn = silk_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN ); - - /* Second-order AR filter (output in Q8) */ - silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in, - silk_Resampler_2_3_COEFS_LQ, nSamplesIn ); - - /* Interpolate filtered signal */ - buf_ptr = buf; - counter = nSamplesIn; - while( counter > 2 ) { - /* Inner product */ - res_Q6 = silk_SMULWB( buf_ptr[ 0 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - - res_Q6 = silk_SMULWB( buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - - buf_ptr += 3; - counter -= 3; - } - - in += nSamplesIn; - inLen -= nSamplesIn; - - if( inLen > 0 ) { - /* More iterations to do; copy last part of filtered signal to beginning of buffer */ - silk_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) ); - } else { - break; - } - } - - /* Copy last part of filtered signal to the state for the next call */ - silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/resampler_private.h b/thirdparty/opus/silk/resampler_private.h deleted file mode 100644 index 422a7d9d95..0000000000 --- a/thirdparty/opus/silk/resampler_private.h +++ /dev/null @@ -1,88 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_RESAMPLER_PRIVATE_H -#define SILK_RESAMPLER_PRIVATE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "SigProc_FIX.h" -#include "resampler_structs.h" -#include "resampler_rom.h" - -/* Number of input samples to process in the inner loop */ -#define RESAMPLER_MAX_BATCH_SIZE_MS 10 -#define RESAMPLER_MAX_FS_KHZ 48 -#define RESAMPLER_MAX_BATCH_SIZE_IN ( RESAMPLER_MAX_BATCH_SIZE_MS * RESAMPLER_MAX_FS_KHZ ) - -/* Description: Hybrid IIR/FIR polyphase implementation of resampling */ -void silk_resampler_private_IIR_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -); - -/* Description: Hybrid IIR/FIR polyphase implementation of resampling */ -void silk_resampler_private_down_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -); - -/* Upsample by a factor 2, high quality */ -void silk_resampler_private_up2_HQ_wrapper( - void *SS, /* I/O Resampler state (unused) */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -); - -/* Upsample by a factor 2, high quality */ -void silk_resampler_private_up2_HQ( - opus_int32 *S, /* I/O Resampler state [ 6 ] */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -); - -/* Second order AR filter */ -void silk_resampler_private_AR2( - opus_int32 S[], /* I/O State vector [ 2 ] */ - opus_int32 out_Q8[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - const opus_int16 A_Q14[], /* I AR coefficients, Q14 */ - opus_int32 len /* I Signal length */ -); - -#ifdef __cplusplus -} -#endif -#endif /* SILK_RESAMPLER_PRIVATE_H */ diff --git a/thirdparty/opus/silk/resampler_private_AR2.c b/thirdparty/opus/silk/resampler_private_AR2.c deleted file mode 100644 index 5fff23714f..0000000000 --- a/thirdparty/opus/silk/resampler_private_AR2.c +++ /dev/null @@ -1,55 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" - -/* Second order AR filter with single delay elements */ -void silk_resampler_private_AR2( - opus_int32 S[], /* I/O State vector [ 2 ] */ - opus_int32 out_Q8[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - const opus_int16 A_Q14[], /* I AR coefficients, Q14 */ - opus_int32 len /* I Signal length */ -) -{ - opus_int32 k; - opus_int32 out32; - - for( k = 0; k < len; k++ ) { - out32 = silk_ADD_LSHIFT32( S[ 0 ], (opus_int32)in[ k ], 8 ); - out_Q8[ k ] = out32; - out32 = silk_LSHIFT( out32, 2 ); - S[ 0 ] = silk_SMLAWB( S[ 1 ], out32, A_Q14[ 0 ] ); - S[ 1 ] = silk_SMULWB( out32, A_Q14[ 1 ] ); - } -} - diff --git a/thirdparty/opus/silk/resampler_private_IIR_FIR.c b/thirdparty/opus/silk/resampler_private_IIR_FIR.c deleted file mode 100644 index 6b2b3a2e18..0000000000 --- a/thirdparty/opus/silk/resampler_private_IIR_FIR.c +++ /dev/null @@ -1,107 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" -#include "stack_alloc.h" - -static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL( - opus_int16 *out, - opus_int16 *buf, - opus_int32 max_index_Q16, - opus_int32 index_increment_Q16 -) -{ - opus_int32 index_Q16, res_Q15; - opus_int16 *buf_ptr; - opus_int32 table_index; - - /* Interpolate upsampled signal and store in output array */ - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - table_index = silk_SMULWB( index_Q16 & 0xFFFF, 12 ); - buf_ptr = &buf[ index_Q16 >> 16 ]; - - res_Q15 = silk_SMULBB( buf_ptr[ 0 ], silk_resampler_frac_FIR_12[ table_index ][ 0 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 1 ], silk_resampler_frac_FIR_12[ table_index ][ 1 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 2 ], silk_resampler_frac_FIR_12[ table_index ][ 2 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 3 ], silk_resampler_frac_FIR_12[ table_index ][ 3 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 4 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 3 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 5 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 2 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 6 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 1 ] ); - res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 7 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 0 ] ); - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q15, 15 ) ); - } - return out; -} -/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */ -void silk_resampler_private_IIR_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -) -{ - silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; - opus_int32 nSamplesIn; - opus_int32 max_index_Q16, index_increment_Q16; - VARDECL( opus_int16, buf ); - SAVE_STACK; - - ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); - - /* Copy buffered samples to start of buffer */ - silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); - - /* Iterate over blocks of frameSizeIn input samples */ - index_increment_Q16 = S->invRatio_Q16; - while( 1 ) { - nSamplesIn = silk_min( inLen, S->batchSize ); - - /* Upsample 2x */ - silk_resampler_private_up2_HQ( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_12 ], in, nSamplesIn ); - - max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 + 1 ); /* + 1 because 2x upsampling */ - out = silk_resampler_private_IIR_FIR_INTERPOL( out, buf, max_index_Q16, index_increment_Q16 ); - in += nSamplesIn; - inLen -= nSamplesIn; - - if( inLen > 0 ) { - /* More iterations to do; copy last part of filtered signal to beginning of buffer */ - silk_memcpy( buf, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); - } else { - break; - } - } - - /* Copy last part of filtered signal to the state for the next call */ - silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/resampler_private_down_FIR.c b/thirdparty/opus/silk/resampler_private_down_FIR.c deleted file mode 100644 index 783e42b356..0000000000 --- a/thirdparty/opus/silk/resampler_private_down_FIR.c +++ /dev/null @@ -1,194 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" -#include "stack_alloc.h" - -static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL( - opus_int16 *out, - opus_int32 *buf, - const opus_int16 *FIR_Coefs, - opus_int FIR_Order, - opus_int FIR_Fracs, - opus_int32 max_index_Q16, - opus_int32 index_increment_Q16 -) -{ - opus_int32 index_Q16, res_Q6; - opus_int32 *buf_ptr; - opus_int32 interpol_ind; - const opus_int16 *interpol_ptr; - - switch( FIR_Order ) { - case RESAMPLER_DOWN_ORDER_FIR0: - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - /* Integer part gives pointer to buffered input */ - buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); - - /* Fractional part gives interpolation coefficients */ - interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs ); - - /* Inner product */ - interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ]; - res_Q6 = silk_SMULWB( buf_ptr[ 0 ], interpol_ptr[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] ); - interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ]; - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 9 ], interpol_ptr[ 8 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - } - break; - case RESAMPLER_DOWN_ORDER_FIR1: - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - /* Integer part gives pointer to buffered input */ - buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); - - /* Inner product */ - res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 23 ] ), FIR_Coefs[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 22 ] ), FIR_Coefs[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 21 ] ), FIR_Coefs[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 20 ] ), FIR_Coefs[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 19 ] ), FIR_Coefs[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 18 ] ), FIR_Coefs[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 17 ] ), FIR_Coefs[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 16 ] ), FIR_Coefs[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 15 ] ), FIR_Coefs[ 8 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 14 ] ), FIR_Coefs[ 9 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - } - break; - case RESAMPLER_DOWN_ORDER_FIR2: - for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { - /* Integer part gives pointer to buffered input */ - buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); - - /* Inner product */ - res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 35 ] ), FIR_Coefs[ 0 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 34 ] ), FIR_Coefs[ 1 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 33 ] ), FIR_Coefs[ 2 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 32 ] ), FIR_Coefs[ 3 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 31 ] ), FIR_Coefs[ 4 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 30 ] ), FIR_Coefs[ 5 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 29 ] ), FIR_Coefs[ 6 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 28 ] ), FIR_Coefs[ 7 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 27 ] ), FIR_Coefs[ 8 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 26 ] ), FIR_Coefs[ 9 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] ); - res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] ); - - /* Scale down, saturate and store in output array */ - *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); - } - break; - default: - silk_assert( 0 ); - } - return out; -} - -/* Resample with a 2nd order AR filter followed by FIR interpolation */ -void silk_resampler_private_down_FIR( - void *SS, /* I/O Resampler state */ - opus_int16 out[], /* O Output signal */ - const opus_int16 in[], /* I Input signal */ - opus_int32 inLen /* I Number of input samples */ -) -{ - silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; - opus_int32 nSamplesIn; - opus_int32 max_index_Q16, index_increment_Q16; - VARDECL( opus_int32, buf ); - const opus_int16 *FIR_Coefs; - SAVE_STACK; - - ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 ); - - /* Copy buffered samples to start of buffer */ - silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) ); - - FIR_Coefs = &S->Coefs[ 2 ]; - - /* Iterate over blocks of frameSizeIn input samples */ - index_increment_Q16 = S->invRatio_Q16; - while( 1 ) { - nSamplesIn = silk_min( inLen, S->batchSize ); - - /* Second-order AR filter (output in Q8) */ - silk_resampler_private_AR2( S->sIIR, &buf[ S->FIR_Order ], in, S->Coefs, nSamplesIn ); - - max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 ); - - /* Interpolate filtered signal */ - out = silk_resampler_private_down_FIR_INTERPOL( out, buf, FIR_Coefs, S->FIR_Order, - S->FIR_Fracs, max_index_Q16, index_increment_Q16 ); - - in += nSamplesIn; - inLen -= nSamplesIn; - - if( inLen > 1 ) { - /* More iterations to do; copy last part of filtered signal to beginning of buffer */ - silk_memcpy( buf, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) ); - } else { - break; - } - } - - /* Copy last part of filtered signal to the state for the next call */ - silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) ); - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/resampler_private_up2_HQ.c b/thirdparty/opus/silk/resampler_private_up2_HQ.c deleted file mode 100644 index c7ec8de365..0000000000 --- a/thirdparty/opus/silk/resampler_private_up2_HQ.c +++ /dev/null @@ -1,113 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" -#include "resampler_private.h" - -/* Upsample by a factor 2, high quality */ -/* Uses 2nd order allpass filters for the 2x upsampling, followed by a */ -/* notch filter just above Nyquist. */ -void silk_resampler_private_up2_HQ( - opus_int32 *S, /* I/O Resampler state [ 6 ] */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -) -{ - opus_int32 k; - opus_int32 in32, out32_1, out32_2, Y, X; - - silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 ); - silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 ); - silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 ); - silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 ); - silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 ); - silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 ); - - /* Internal variables and state are in Q10 format */ - for( k = 0; k < len; k++ ) { - /* Convert to Q10 */ - in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 ); - - /* First all-pass section for even output sample */ - Y = silk_SUB32( in32, S[ 0 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] ); - out32_1 = silk_ADD32( S[ 0 ], X ); - S[ 0 ] = silk_ADD32( in32, X ); - - /* Second all-pass section for even output sample */ - Y = silk_SUB32( out32_1, S[ 1 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] ); - out32_2 = silk_ADD32( S[ 1 ], X ); - S[ 1 ] = silk_ADD32( out32_1, X ); - - /* Third all-pass section for even output sample */ - Y = silk_SUB32( out32_2, S[ 2 ] ); - X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] ); - out32_1 = silk_ADD32( S[ 2 ], X ); - S[ 2 ] = silk_ADD32( out32_2, X ); - - /* Apply gain in Q15, convert back to int16 and store to output */ - out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); - - /* First all-pass section for odd output sample */ - Y = silk_SUB32( in32, S[ 3 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] ); - out32_1 = silk_ADD32( S[ 3 ], X ); - S[ 3 ] = silk_ADD32( in32, X ); - - /* Second all-pass section for odd output sample */ - Y = silk_SUB32( out32_1, S[ 4 ] ); - X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] ); - out32_2 = silk_ADD32( S[ 4 ], X ); - S[ 4 ] = silk_ADD32( out32_1, X ); - - /* Third all-pass section for odd output sample */ - Y = silk_SUB32( out32_2, S[ 5 ] ); - X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] ); - out32_1 = silk_ADD32( S[ 5 ], X ); - S[ 5 ] = silk_ADD32( out32_2, X ); - - /* Apply gain in Q15, convert back to int16 and store to output */ - out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); - } -} - -void silk_resampler_private_up2_HQ_wrapper( - void *SS, /* I/O Resampler state (unused) */ - opus_int16 *out, /* O Output signal [ 2 * len ] */ - const opus_int16 *in, /* I Input signal [ len ] */ - opus_int32 len /* I Number of input samples */ -) -{ - silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; - silk_resampler_private_up2_HQ( S->sIIR, out, in, len ); -} diff --git a/thirdparty/opus/silk/resampler_rom.c b/thirdparty/opus/silk/resampler_rom.c deleted file mode 100644 index 5e6b04476a..0000000000 --- a/thirdparty/opus/silk/resampler_rom.c +++ /dev/null @@ -1,96 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Filter coefficients for IIR/FIR polyphase resampling * - * Total size: 179 Words (358 Bytes) */ - -#include "resampler_private.h" - -/* Matlab code for the notch filter coefficients: */ -/* B = [1, 0.147, 1]; A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */ -/* fprintf('\t%6d, %6d, %6d, %6d\n', round(B(2)*2^16), round(-A(2)*2^16), round((1-A(3))*2^16), round(G*2^15)) */ -/* const opus_int16 silk_resampler_up2_hq_notch[ 4 ] = { 9634, -7012, 7209, 30474 }; */ - -/* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */ -silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { - -20694, -13867, - -49, 64, 17, -157, 353, -496, 163, 11047, 22205, - -39, 6, 91, -170, 186, 23, -896, 6336, 19928, - -19, -36, 102, -89, -24, 328, -951, 2568, 15909, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { - -14457, -14019, - 64, 128, -122, 36, 310, -768, 584, 9267, 17733, - 12, 128, 18, -142, 288, -117, -865, 4123, 14459, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = { - 616, -14323, - -10, 39, 58, -46, -84, 120, 184, -315, -541, 1284, 5380, 9024, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 16102, -15162, - -13, 0, 20, 26, 5, -31, -43, -4, 65, 90, 7, -157, -248, -44, 593, 1583, 2612, 3271, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 22500, -15099, - 3, -14, -20, -15, 2, 25, 37, 25, -16, -71, -107, -79, 50, 292, 623, 982, 1288, 1464, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { - 27540, -15257, - 17, 12, 8, 1, -10, -22, -30, -32, -22, 3, 44, 100, 168, 243, 317, 381, 429, 455, -}; - -silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = { - -2797, -6507, - 4697, 10739, - 1567, 8276, -}; - -/* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */ -silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = { - { 189, -600, 617, 30567 }, - { 117, -159, -1070, 29704 }, - { 52, 221, -2392, 28276 }, - { -4, 529, -3350, 26341 }, - { -48, 758, -3956, 23973 }, - { -80, 905, -4235, 21254 }, - { -99, 972, -4222, 18278 }, - { -107, 967, -3957, 15143 }, - { -103, 896, -3487, 11950 }, - { -91, 773, -2865, 8798 }, - { -71, 611, -2143, 5784 }, - { -46, 425, -1375, 2996 }, -}; diff --git a/thirdparty/opus/silk/resampler_rom.h b/thirdparty/opus/silk/resampler_rom.h deleted file mode 100644 index 490b3388dc..0000000000 --- a/thirdparty/opus/silk/resampler_rom.h +++ /dev/null @@ -1,68 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_FIX_RESAMPLER_ROM_H -#define SILK_FIX_RESAMPLER_ROM_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "typedef.h" -#include "resampler_structs.h" - -#define RESAMPLER_DOWN_ORDER_FIR0 18 -#define RESAMPLER_DOWN_ORDER_FIR1 24 -#define RESAMPLER_DOWN_ORDER_FIR2 36 -#define RESAMPLER_ORDER_FIR_12 8 - -/* Tables for 2x downsampler */ -static const opus_int16 silk_resampler_down2_0 = 9872; -static const opus_int16 silk_resampler_down2_1 = 39809 - 65536; - -/* Tables for 2x upsampler, high quality */ -static const opus_int16 silk_resampler_up2_hq_0[ 3 ] = { 1746, 14986, 39083 - 65536 }; -static const opus_int16 silk_resampler_up2_hq_1[ 3 ] = { 6854, 25769, 55542 - 65536 }; - -/* Tables with IIR and FIR coefficients for fractional downsamplers */ -extern const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ]; -extern const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ]; -extern const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ]; -extern const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; -extern const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; -extern const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; -extern const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ]; - -/* Table with interplation fractions of 1/24, 3/24, ..., 23/24 */ -extern const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ]; - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_FIX_RESAMPLER_ROM_H */ diff --git a/thirdparty/opus/silk/resampler_structs.h b/thirdparty/opus/silk/resampler_structs.h deleted file mode 100644 index 9e9457d11c..0000000000 --- a/thirdparty/opus/silk/resampler_structs.h +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_RESAMPLER_STRUCTS_H -#define SILK_RESAMPLER_STRUCTS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define SILK_RESAMPLER_MAX_FIR_ORDER 36 -#define SILK_RESAMPLER_MAX_IIR_ORDER 6 - -typedef struct _silk_resampler_state_struct{ - opus_int32 sIIR[ SILK_RESAMPLER_MAX_IIR_ORDER ]; /* this must be the first element of this struct */ - union{ - opus_int32 i32[ SILK_RESAMPLER_MAX_FIR_ORDER ]; - opus_int16 i16[ SILK_RESAMPLER_MAX_FIR_ORDER ]; - } sFIR; - opus_int16 delayBuf[ 48 ]; - opus_int resampler_function; - opus_int batchSize; - opus_int32 invRatio_Q16; - opus_int FIR_Order; - opus_int FIR_Fracs; - opus_int Fs_in_kHz; - opus_int Fs_out_kHz; - opus_int inputDelay; - const opus_int16 *Coefs; -} silk_resampler_state_struct; - -#ifdef __cplusplus -} -#endif -#endif /* SILK_RESAMPLER_STRUCTS_H */ - diff --git a/thirdparty/opus/silk/shell_coder.c b/thirdparty/opus/silk/shell_coder.c deleted file mode 100644 index 4af341474b..0000000000 --- a/thirdparty/opus/silk/shell_coder.c +++ /dev/null @@ -1,151 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* shell coder; pulse-subframe length is hardcoded */ - -static OPUS_INLINE void combine_pulses( - opus_int *out, /* O combined pulses vector [len] */ - const opus_int *in, /* I input vector [2 * len] */ - const opus_int len /* I number of OUTPUT samples */ -) -{ - opus_int k; - for( k = 0; k < len; k++ ) { - out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ]; - } -} - -static OPUS_INLINE void encode_split( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int p_child1, /* I pulse amplitude of first child subframe */ - const opus_int p, /* I pulse amplitude of current subframe */ - const opus_uint8 *shell_table /* I table of shell cdfs */ -) -{ - if( p > 0 ) { - ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); - } -} - -static OPUS_INLINE void decode_split( - opus_int16 *p_child1, /* O pulse amplitude of first child subframe */ - opus_int16 *p_child2, /* O pulse amplitude of second child subframe */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - const opus_int p, /* I pulse amplitude of current subframe */ - const opus_uint8 *shell_table /* I table of shell cdfs */ -) -{ - if( p > 0 ) { - p_child1[ 0 ] = ec_dec_icdf( psRangeDec, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); - p_child2[ 0 ] = p - p_child1[ 0 ]; - } else { - p_child1[ 0 ] = 0; - p_child2[ 0 ] = 0; - } -} - -/* Shell encoder, operates on one shell code frame of 16 pulses */ -void silk_shell_encoder( - ec_enc *psRangeEnc, /* I/O compressor data structure */ - const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */ -) -{ - opus_int pulses1[ 8 ], pulses2[ 4 ], pulses3[ 2 ], pulses4[ 1 ]; - - /* this function operates on one shell code frame of 16 pulses */ - silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); - - /* tree representation per pulse-subframe */ - combine_pulses( pulses1, pulses0, 8 ); - combine_pulses( pulses2, pulses1, 4 ); - combine_pulses( pulses3, pulses2, 2 ); - combine_pulses( pulses4, pulses3, 1 ); - - encode_split( psRangeEnc, pulses3[ 0 ], pulses4[ 0 ], silk_shell_code_table3 ); - - encode_split( psRangeEnc, pulses2[ 0 ], pulses3[ 0 ], silk_shell_code_table2 ); - - encode_split( psRangeEnc, pulses1[ 0 ], pulses2[ 0 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 0 ], pulses1[ 0 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 2 ], pulses1[ 1 ], silk_shell_code_table0 ); - - encode_split( psRangeEnc, pulses1[ 2 ], pulses2[ 1 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 4 ], pulses1[ 2 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 6 ], pulses1[ 3 ], silk_shell_code_table0 ); - - encode_split( psRangeEnc, pulses2[ 2 ], pulses3[ 1 ], silk_shell_code_table2 ); - - encode_split( psRangeEnc, pulses1[ 4 ], pulses2[ 2 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 8 ], pulses1[ 4 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 10 ], pulses1[ 5 ], silk_shell_code_table0 ); - - encode_split( psRangeEnc, pulses1[ 6 ], pulses2[ 3 ], silk_shell_code_table1 ); - encode_split( psRangeEnc, pulses0[ 12 ], pulses1[ 6 ], silk_shell_code_table0 ); - encode_split( psRangeEnc, pulses0[ 14 ], pulses1[ 7 ], silk_shell_code_table0 ); -} - - -/* Shell decoder, operates on one shell code frame of 16 pulses */ -void silk_shell_decoder( - opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ - ec_dec *psRangeDec, /* I/O Compressor data structure */ - const opus_int pulses4 /* I number of pulses per pulse-subframe */ -) -{ - opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; - - /* this function operates on one shell code frame of 16 pulses */ - silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); - - decode_split( &pulses3[ 0 ], &pulses3[ 1 ], psRangeDec, pulses4, silk_shell_code_table3 ); - - decode_split( &pulses2[ 0 ], &pulses2[ 1 ], psRangeDec, pulses3[ 0 ], silk_shell_code_table2 ); - - decode_split( &pulses1[ 0 ], &pulses1[ 1 ], psRangeDec, pulses2[ 0 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 0 ], &pulses0[ 1 ], psRangeDec, pulses1[ 0 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 2 ], &pulses0[ 3 ], psRangeDec, pulses1[ 1 ], silk_shell_code_table0 ); - - decode_split( &pulses1[ 2 ], &pulses1[ 3 ], psRangeDec, pulses2[ 1 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 4 ], &pulses0[ 5 ], psRangeDec, pulses1[ 2 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 6 ], &pulses0[ 7 ], psRangeDec, pulses1[ 3 ], silk_shell_code_table0 ); - - decode_split( &pulses2[ 2 ], &pulses2[ 3 ], psRangeDec, pulses3[ 1 ], silk_shell_code_table2 ); - - decode_split( &pulses1[ 4 ], &pulses1[ 5 ], psRangeDec, pulses2[ 2 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 8 ], &pulses0[ 9 ], psRangeDec, pulses1[ 4 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 10 ], &pulses0[ 11 ], psRangeDec, pulses1[ 5 ], silk_shell_code_table0 ); - - decode_split( &pulses1[ 6 ], &pulses1[ 7 ], psRangeDec, pulses2[ 3 ], silk_shell_code_table1 ); - decode_split( &pulses0[ 12 ], &pulses0[ 13 ], psRangeDec, pulses1[ 6 ], silk_shell_code_table0 ); - decode_split( &pulses0[ 14 ], &pulses0[ 15 ], psRangeDec, pulses1[ 7 ], silk_shell_code_table0 ); -} diff --git a/thirdparty/opus/silk/sigm_Q15.c b/thirdparty/opus/silk/sigm_Q15.c deleted file mode 100644 index 3c507d255b..0000000000 --- a/thirdparty/opus/silk/sigm_Q15.c +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Approximate sigmoid function */ - -#include "SigProc_FIX.h" - -/* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */ -static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = { - 237, 153, 73, 30, 12, 7 -}; -/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp(-(0:5))))); */ -static const opus_int32 sigm_LUT_pos_Q15[ 6 ] = { - 16384, 23955, 28861, 31213, 32178, 32548 -}; -/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp((0:5))))); */ -static const opus_int32 sigm_LUT_neg_Q15[ 6 ] = { - 16384, 8812, 3906, 1554, 589, 219 -}; - -opus_int silk_sigm_Q15( - opus_int in_Q5 /* I */ -) -{ - opus_int ind; - - if( in_Q5 < 0 ) { - /* Negative input */ - in_Q5 = -in_Q5; - if( in_Q5 >= 6 * 32 ) { - return 0; /* Clip */ - } else { - /* Linear interpolation of look up table */ - ind = silk_RSHIFT( in_Q5, 5 ); - return( sigm_LUT_neg_Q15[ ind ] - silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) ); - } - } else { - /* Positive input */ - if( in_Q5 >= 6 * 32 ) { - return 32767; /* clip */ - } else { - /* Linear interpolation of look up table */ - ind = silk_RSHIFT( in_Q5, 5 ); - return( sigm_LUT_pos_Q15[ ind ] + silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) ); - } - } -} - diff --git a/thirdparty/opus/silk/sort.c b/thirdparty/opus/silk/sort.c deleted file mode 100644 index 7187c9efb1..0000000000 --- a/thirdparty/opus/silk/sort.c +++ /dev/null @@ -1,154 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* Insertion sort (fast for already almost sorted arrays): */ -/* Best case: O(n) for an already sorted array */ -/* Worst case: O(n^2) for an inversely sorted array */ -/* */ -/* Shell short: https://en.wikipedia.org/wiki/Shell_sort */ - -#include "SigProc_FIX.h" - -void silk_insertion_sort_increasing( - opus_int32 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -) -{ - opus_int32 value; - opus_int i, j; - - /* Safety checks */ - silk_assert( K > 0 ); - silk_assert( L > 0 ); - silk_assert( L >= K ); - - /* Write start indices in index vector */ - for( i = 0; i < K; i++ ) { - idx[ i ] = i; - } - - /* Sort vector elements by value, increasing order */ - for( i = 1; i < K; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - - /* If less than L values are asked for, check the remaining values, */ - /* but only spend CPU to ensure that the K first values are correct */ - for( i = K; i < L; i++ ) { - value = a[ i ]; - if( value < a[ K - 1 ] ) { - for( j = K - 2; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - } -} - -#ifdef FIXED_POINT -/* This function is only used by the fixed-point build */ -void silk_insertion_sort_decreasing_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - opus_int *idx, /* O Index vector for the sorted elements */ - const opus_int L, /* I Vector length */ - const opus_int K /* I Number of correctly sorted positions */ -) -{ - opus_int i, j; - opus_int value; - - /* Safety checks */ - silk_assert( K > 0 ); - silk_assert( L > 0 ); - silk_assert( L >= K ); - - /* Write start indices in index vector */ - for( i = 0; i < K; i++ ) { - idx[ i ] = i; - } - - /* Sort vector elements by value, decreasing order */ - for( i = 1; i < K; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - - /* If less than L values are asked for, check the remaining values, */ - /* but only spend CPU to ensure that the K first values are correct */ - for( i = K; i < L; i++ ) { - value = a[ i ]; - if( value > a[ K - 1 ] ) { - for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - idx[ j + 1 ] = idx[ j ]; /* Shift index */ - } - a[ j + 1 ] = value; /* Write value */ - idx[ j + 1 ] = i; /* Write index */ - } - } -} -#endif - -void silk_insertion_sort_increasing_all_values_int16( - opus_int16 *a, /* I/O Unsorted / Sorted vector */ - const opus_int L /* I Vector length */ -) -{ - opus_int value; - opus_int i, j; - - /* Safety checks */ - silk_assert( L > 0 ); - - /* Sort vector elements by value, increasing order */ - for( i = 1; i < L; i++ ) { - value = a[ i ]; - for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { - a[ j + 1 ] = a[ j ]; /* Shift value */ - } - a[ j + 1 ] = value; /* Write value */ - } -} diff --git a/thirdparty/opus/silk/stereo_LR_to_MS.c b/thirdparty/opus/silk/stereo_LR_to_MS.c deleted file mode 100644 index dda0298de2..0000000000 --- a/thirdparty/opus/silk/stereo_LR_to_MS.c +++ /dev/null @@ -1,229 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" -#include "stack_alloc.h" - -/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ -void silk_stereo_LR_to_MS( - stereo_enc_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ - opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ - opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ - opus_int32 total_rate_bps, /* I Total bitrate */ - opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ - opus_int toMono, /* I Last frame before a stereo->mono transition */ - opus_int fs_kHz, /* I Sample rate (kHz) */ - opus_int frame_length /* I Number of samples */ -) -{ - opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13; - opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13; - opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24; - VARDECL( opus_int16, side ); - VARDECL( opus_int16, LP_mid ); - VARDECL( opus_int16, HP_mid ); - VARDECL( opus_int16, LP_side ); - VARDECL( opus_int16, HP_side ); - opus_int16 *mid = &x1[ -2 ]; - SAVE_STACK; - - ALLOC( side, frame_length + 2, opus_int16 ); - /* Convert to basic mid/side signals */ - for( n = 0; n < frame_length + 2; n++ ) { - sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ]; - diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ]; - mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); - side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) ); - } - - /* Buffering */ - silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) ); - - /* LP and HP filter mid signal */ - ALLOC( LP_mid, frame_length, opus_int16 ); - ALLOC( HP_mid, frame_length, opus_int16 ); - for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); - LP_mid[ n ] = sum; - HP_mid[ n ] = mid[ n + 1 ] - sum; - } - - /* LP and HP filter side signal */ - ALLOC( LP_side, frame_length, opus_int16 ); - ALLOC( HP_side, frame_length, opus_int16 ); - for( n = 0; n < frame_length; n++ ) { - sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); - LP_side[ n ] = sum; - HP_side[ n ] = side[ n + 1 ] - sum; - } - - /* Find energies and predictors */ - is10msFrame = frame_length == 10 * fs_kHz; - smooth_coef_Q16 = is10msFrame ? - SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) : - SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 ); - smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 ); - - pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 ); - pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 ); - /* Ratio of the norms of residual and mid signals */ - frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 ); - frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) ); - - /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ - total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ - if( total_rate_bps < 1 ) { - total_rate_bps = 1; - } - min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 ); - silk_assert( min_mid_rate_bps < 32767 ); - /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ - frac_3_Q16 = silk_MUL( 3, frac_Q16 ); - mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 ); - /* If Mid bitrate below minimum, reduce stereo width */ - if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) { - mid_side_rates_bps[ 0 ] = min_mid_rate_bps; - mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; - /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */ - width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps, - silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 ); - width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) ); - } else { - mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; - width_Q14 = SILK_FIX_CONST( 1, 14 ); - } - - /* Smoother */ - state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 ); - - /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ - *mid_only_flag = 0; - if( toMono ) { - /* Last frame before stereo->mono transition; collapse stereo width */ - width_Q14 = 0; - pred_Q13[ 0 ] = 0; - pred_Q13[ 1 ] = 0; - silk_stereo_quant_pred( pred_Q13, ix ); - } else if( state->width_prev_Q14 == 0 && - ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) ) - { - /* Code as panned-mono; previous frame already had zero width */ - /* Scale down and quantize predictors */ - pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); - pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); - silk_stereo_quant_pred( pred_Q13, ix ); - /* Collapse stereo width */ - width_Q14 = 0; - pred_Q13[ 0 ] = 0; - pred_Q13[ 1 ] = 0; - mid_side_rates_bps[ 0 ] = total_rate_bps; - mid_side_rates_bps[ 1 ] = 0; - *mid_only_flag = 1; - } else if( state->width_prev_Q14 != 0 && - ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) ) - { - /* Transition to zero-width stereo */ - /* Scale down and quantize predictors */ - pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); - pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); - silk_stereo_quant_pred( pred_Q13, ix ); - /* Collapse stereo width */ - width_Q14 = 0; - pred_Q13[ 0 ] = 0; - pred_Q13[ 1 ] = 0; - } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) { - /* Full-width stereo coding */ - silk_stereo_quant_pred( pred_Q13, ix ); - width_Q14 = SILK_FIX_CONST( 1, 14 ); - } else { - /* Reduced-width stereo coding; scale down and quantize predictors */ - pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); - pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); - silk_stereo_quant_pred( pred_Q13, ix ); - width_Q14 = state->smth_width_Q14; - } - - /* Make sure to keep on encoding until the tapered output has been transmitted */ - if( *mid_only_flag == 1 ) { - state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz; - if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) { - *mid_only_flag = 0; - } else { - /* Limit to avoid wrapping around */ - state->silent_side_len = 10000; - } - } else { - state->silent_side_len = 0; - } - - if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) { - mid_side_rates_bps[ 1 ] = 1; - mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]); - } - - /* Interpolate predictors and subtract prediction from side channel */ - pred0_Q13 = -state->pred_prev_Q13[ 0 ]; - pred1_Q13 = -state->pred_prev_Q13[ 1 ]; - w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 ); - denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); - delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); - delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); - deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 ); - for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { - pred0_Q13 += delta0_Q13; - pred1_Q13 += delta1_Q13; - w_Q24 += deltaw_Q24; - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - - pred0_Q13 = -pred_Q13[ 0 ]; - pred1_Q13 = -pred_Q13[ 1 ]; - w_Q24 = silk_LSHIFT( width_Q14, 10 ); - for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ]; - state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ]; - state->width_prev_Q14 = (opus_int16)width_Q14; - RESTORE_STACK; -} diff --git a/thirdparty/opus/silk/stereo_MS_to_LR.c b/thirdparty/opus/silk/stereo_MS_to_LR.c deleted file mode 100644 index 62521a4f35..0000000000 --- a/thirdparty/opus/silk/stereo_MS_to_LR.c +++ /dev/null @@ -1,85 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ -void silk_stereo_MS_to_LR( - stereo_dec_state *state, /* I/O State */ - opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ - opus_int16 x2[], /* I/O Right input signal, becomes side signal */ - const opus_int32 pred_Q13[], /* I Predictors */ - opus_int fs_kHz, /* I Samples rate (kHz) */ - opus_int frame_length /* I Number of samples */ -) -{ - opus_int n, denom_Q16, delta0_Q13, delta1_Q13; - opus_int32 sum, diff, pred0_Q13, pred1_Q13; - - /* Buffering */ - silk_memcpy( x1, state->sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sMid, &x1[ frame_length ], 2 * sizeof( opus_int16 ) ); - silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) ); - - /* Interpolate predictors and add prediction to side channel */ - pred0_Q13 = state->pred_prev_Q13[ 0 ]; - pred1_Q13 = state->pred_prev_Q13[ 1 ]; - denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); - delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); - delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); - for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { - pred0_Q13 += delta0_Q13; - pred1_Q13 += delta1_Q13; - sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - pred0_Q13 = pred_Q13[ 0 ]; - pred1_Q13 = pred_Q13[ 1 ]; - for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { - sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ - sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ - sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ - x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); - } - state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ]; - state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ]; - - /* Convert to left/right signals */ - for( n = 0; n < frame_length; n++ ) { - sum = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ]; - diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ]; - x1[ n + 1 ] = (opus_int16)silk_SAT16( sum ); - x2[ n + 1 ] = (opus_int16)silk_SAT16( diff ); - } -} diff --git a/thirdparty/opus/silk/stereo_decode_pred.c b/thirdparty/opus/silk/stereo_decode_pred.c deleted file mode 100644 index 56ba3925e8..0000000000 --- a/thirdparty/opus/silk/stereo_decode_pred.c +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Decode mid/side predictors */ -void silk_stereo_decode_pred( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int32 pred_Q13[] /* O Predictors */ -) -{ - opus_int n, ix[ 2 ][ 3 ]; - opus_int32 low_Q13, step_Q13; - - /* Entropy decoding */ - n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 ); - ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 ); - ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ]; - for( n = 0; n < 2; n++ ) { - ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 ); - ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 ); - } - - /* Dequantize */ - for( n = 0; n < 2; n++ ) { - ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ]; - low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ]; - step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13, - SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); - pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 ); - } - - /* Subtract second from first predictor (helps when actually applying these) */ - pred_Q13[ 0 ] -= pred_Q13[ 1 ]; -} - -/* Decode mid-only flag */ -void silk_stereo_decode_mid_only( - ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int *decode_only_mid /* O Flag that only mid channel has been coded */ -) -{ - /* Decode flag that only mid channel is coded */ - *decode_only_mid = ec_dec_icdf( psRangeDec, silk_stereo_only_code_mid_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/stereo_encode_pred.c b/thirdparty/opus/silk/stereo_encode_pred.c deleted file mode 100644 index e6dd195066..0000000000 --- a/thirdparty/opus/silk/stereo_encode_pred.c +++ /dev/null @@ -1,62 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Entropy code the mid/side quantization indices */ -void silk_stereo_encode_pred( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */ -) -{ - opus_int n; - - /* Entropy coding */ - n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ]; - silk_assert( n < 25 ); - ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 ); - for( n = 0; n < 2; n++ ) { - silk_assert( ix[ n ][ 0 ] < 3 ); - silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS ); - ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 ); - ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 ); - } -} - -/* Entropy code the mid-only flag */ -void silk_stereo_encode_mid_only( - ec_enc *psRangeEnc, /* I/O Compressor data structure */ - opus_int8 mid_only_flag -) -{ - /* Encode flag that only mid channel is coded */ - ec_enc_icdf( psRangeEnc, mid_only_flag, silk_stereo_only_code_mid_iCDF, 8 ); -} diff --git a/thirdparty/opus/silk/stereo_find_predictor.c b/thirdparty/opus/silk/stereo_find_predictor.c deleted file mode 100644 index e30e90bddc..0000000000 --- a/thirdparty/opus/silk/stereo_find_predictor.c +++ /dev/null @@ -1,79 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Find least-squares prediction gain for one signal based on another and quantize it */ -opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */ - opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */ - const opus_int16 x[], /* I Basis signal */ - const opus_int16 y[], /* I Target signal */ - opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */ - opus_int length, /* I Number of samples */ - opus_int smooth_coef_Q16 /* I Smoothing coefficient */ -) -{ - opus_int scale, scale1, scale2; - opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10; - - /* Find predictor */ - silk_sum_sqr_shift( &nrgx, &scale1, x, length ); - silk_sum_sqr_shift( &nrgy, &scale2, y, length ); - scale = silk_max_int( scale1, scale2 ); - scale = scale + ( scale & 1 ); /* make even */ - nrgy = silk_RSHIFT32( nrgy, scale - scale2 ); - nrgx = silk_RSHIFT32( nrgx, scale - scale1 ); - nrgx = silk_max_int( nrgx, 1 ); - corr = silk_inner_prod_aligned_scale( x, y, scale, length ); - pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 ); - pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 ); - pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 ); - - /* Faster update for signals with large prediction parameters */ - smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) ); - - /* Smoothed mid and residual norms */ - silk_assert( smooth_coef_Q16 < 32768 ); - scale = silk_RSHIFT( scale, 1 ); - mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ], - smooth_coef_Q16 ); - /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */ - nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 ); - nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 ); - mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ], - smooth_coef_Q16 ); - - /* Ratio of smoothed residual and mid norms */ - *ratio_Q14 = silk_DIV32_varQ( mid_res_amp_Q0[ 1 ], silk_max( mid_res_amp_Q0[ 0 ], 1 ), 14 ); - *ratio_Q14 = silk_LIMIT( *ratio_Q14, 0, 32767 ); - - return pred_Q13; -} diff --git a/thirdparty/opus/silk/stereo_quant_pred.c b/thirdparty/opus/silk/stereo_quant_pred.c deleted file mode 100644 index d4ced6c3e8..0000000000 --- a/thirdparty/opus/silk/stereo_quant_pred.c +++ /dev/null @@ -1,73 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "main.h" - -/* Quantize mid/side predictors */ -void silk_stereo_quant_pred( - opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ - opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ -) -{ - opus_int i, j, n; - opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0; - - /* Quantize */ - for( n = 0; n < 2; n++ ) { - /* Brute-force search over quantization levels */ - err_min_Q13 = silk_int32_MAX; - for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) { - low_Q13 = silk_stereo_pred_quant_Q13[ i ]; - step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13, - SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); - for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) { - lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 ); - err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 ); - if( err_Q13 < err_min_Q13 ) { - err_min_Q13 = err_Q13; - quant_pred_Q13 = lvl_Q13; - ix[ n ][ 0 ] = i; - ix[ n ][ 1 ] = j; - } else { - /* Error increasing, so we're past the optimum */ - goto done; - } - } - } - done: - ix[ n ][ 2 ] = silk_DIV32_16( ix[ n ][ 0 ], 3 ); - ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3; - pred_Q13[ n ] = quant_pred_Q13; - } - - /* Subtract second from first predictor (helps when actually applying these) */ - pred_Q13[ 0 ] -= pred_Q13[ 1 ]; -} diff --git a/thirdparty/opus/silk/structs.h b/thirdparty/opus/silk/structs.h deleted file mode 100644 index 827829dc6f..0000000000 --- a/thirdparty/opus/silk/structs.h +++ /dev/null @@ -1,327 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_STRUCTS_H -#define SILK_STRUCTS_H - -#include "typedef.h" -#include "SigProc_FIX.h" -#include "define.h" -#include "entenc.h" -#include "entdec.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/************************************/ -/* Noise shaping quantization state */ -/************************************/ -typedef struct { - opus_int16 xq[ 2 * MAX_FRAME_LENGTH ]; /* Buffer for quantized output signal */ - opus_int32 sLTP_shp_Q14[ 2 * MAX_FRAME_LENGTH ]; - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 sLF_AR_shp_Q14; - opus_int lagPrev; - opus_int sLTP_buf_idx; - opus_int sLTP_shp_buf_idx; - opus_int32 rand_seed; - opus_int32 prev_gain_Q16; - opus_int rewhite_flag; -} silk_nsq_state; - -/********************************/ -/* VAD state */ -/********************************/ -typedef struct { - opus_int32 AnaState[ 2 ]; /* Analysis filterbank state: 0-8 kHz */ - opus_int32 AnaState1[ 2 ]; /* Analysis filterbank state: 0-4 kHz */ - opus_int32 AnaState2[ 2 ]; /* Analysis filterbank state: 0-2 kHz */ - opus_int32 XnrgSubfr[ VAD_N_BANDS ]; /* Subframe energies */ - opus_int32 NrgRatioSmth_Q8[ VAD_N_BANDS ]; /* Smoothed energy level in each band */ - opus_int16 HPstate; /* State of differentiator in the lowest band */ - opus_int32 NL[ VAD_N_BANDS ]; /* Noise energy level in each band */ - opus_int32 inv_NL[ VAD_N_BANDS ]; /* Inverse noise energy level in each band */ - opus_int32 NoiseLevelBias[ VAD_N_BANDS ]; /* Noise level estimator bias/offset */ - opus_int32 counter; /* Frame counter used in the initial phase */ -} silk_VAD_state; - -/* Variable cut-off low-pass filter state */ -typedef struct { - opus_int32 In_LP_State[ 2 ]; /* Low pass filter state */ - opus_int32 transition_frame_no; /* Counter which is mapped to a cut-off frequency */ - opus_int mode; /* Operating mode, <0: switch down, >0: switch up; 0: do nothing */ -} silk_LP_state; - -/* Structure containing NLSF codebook */ -typedef struct { - const opus_int16 nVectors; - const opus_int16 order; - const opus_int16 quantStepSize_Q16; - const opus_int16 invQuantStepSize_Q6; - const opus_uint8 *CB1_NLSF_Q8; - const opus_uint8 *CB1_iCDF; - const opus_uint8 *pred_Q8; - const opus_uint8 *ec_sel; - const opus_uint8 *ec_iCDF; - const opus_uint8 *ec_Rates_Q5; - const opus_int16 *deltaMin_Q15; -} silk_NLSF_CB_struct; - -typedef struct { - opus_int16 pred_prev_Q13[ 2 ]; - opus_int16 sMid[ 2 ]; - opus_int16 sSide[ 2 ]; - opus_int32 mid_side_amp_Q0[ 4 ]; - opus_int16 smth_width_Q14; - opus_int16 width_prev_Q14; - opus_int16 silent_side_len; - opus_int8 predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ]; - opus_int8 mid_only_flags[ MAX_FRAMES_PER_PACKET ]; -} stereo_enc_state; - -typedef struct { - opus_int16 pred_prev_Q13[ 2 ]; - opus_int16 sMid[ 2 ]; - opus_int16 sSide[ 2 ]; -} stereo_dec_state; - -typedef struct { - opus_int8 GainsIndices[ MAX_NB_SUBFR ]; - opus_int8 LTPIndex[ MAX_NB_SUBFR ]; - opus_int8 NLSFIndices[ MAX_LPC_ORDER + 1 ]; - opus_int16 lagIndex; - opus_int8 contourIndex; - opus_int8 signalType; - opus_int8 quantOffsetType; - opus_int8 NLSFInterpCoef_Q2; - opus_int8 PERIndex; - opus_int8 LTP_scaleIndex; - opus_int8 Seed; -} SideInfoIndices; - -/********************************/ -/* Encoder state */ -/********************************/ -typedef struct { - opus_int32 In_HP_State[ 2 ]; /* High pass filter state */ - opus_int32 variable_HP_smth1_Q15; /* State of first smoother */ - opus_int32 variable_HP_smth2_Q15; /* State of second smoother */ - silk_LP_state sLP; /* Low pass filter state */ - silk_VAD_state sVAD; /* Voice activity detector state */ - silk_nsq_state sNSQ; /* Noise Shape Quantizer State */ - opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ]; /* Previously quantized NLSF vector */ - opus_int speech_activity_Q8; /* Speech activity */ - opus_int allow_bandwidth_switch; /* Flag indicating that switching of internal bandwidth is allowed */ - opus_int8 LBRRprevLastGainIndex; - opus_int8 prevSignalType; - opus_int prevLag; - opus_int pitch_LPC_win_length; - opus_int max_pitch_lag; /* Highest possible pitch lag (samples) */ - opus_int32 API_fs_Hz; /* API sampling frequency (Hz) */ - opus_int32 prev_API_fs_Hz; /* Previous API sampling frequency (Hz) */ - opus_int maxInternal_fs_Hz; /* Maximum internal sampling frequency (Hz) */ - opus_int minInternal_fs_Hz; /* Minimum internal sampling frequency (Hz) */ - opus_int desiredInternal_fs_Hz; /* Soft request for internal sampling frequency (Hz) */ - opus_int fs_kHz; /* Internal sampling frequency (kHz) */ - opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ - opus_int frame_length; /* Frame length (samples) */ - opus_int subfr_length; /* Subframe length (samples) */ - opus_int ltp_mem_length; /* Length of LTP memory */ - opus_int la_pitch; /* Look-ahead for pitch analysis (samples) */ - opus_int la_shape; /* Look-ahead for noise shape analysis (samples) */ - opus_int shapeWinLength; /* Window length for noise shape analysis (samples) */ - opus_int32 TargetRate_bps; /* Target bitrate (bps) */ - opus_int PacketSize_ms; /* Number of milliseconds to put in each packet */ - opus_int PacketLoss_perc; /* Packet loss rate measured by farend */ - opus_int32 frameCounter; - opus_int Complexity; /* Complexity setting */ - opus_int nStatesDelayedDecision; /* Number of states in delayed decision quantization */ - opus_int useInterpolatedNLSFs; /* Flag for using NLSF interpolation */ - opus_int shapingLPCOrder; /* Filter order for noise shaping filters */ - opus_int predictLPCOrder; /* Filter order for prediction filters */ - opus_int pitchEstimationComplexity; /* Complexity level for pitch estimator */ - opus_int pitchEstimationLPCOrder; /* Whitening filter order for pitch estimator */ - opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */ - opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ - opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ - opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ - opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ - opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ - opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */ - opus_int warping_Q16; /* Warping parameter for warped noise shaping */ - opus_int useCBR; /* Flag to enable constant bitrate */ - opus_int prefillFlag; /* Flag to indicate that only buffers are prefilled, no coding */ - const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ - const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ - const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */ - opus_int input_quality_bands_Q15[ VAD_N_BANDS ]; - opus_int input_tilt_Q15; - opus_int SNR_dB_Q7; /* Quality setting */ - - opus_int8 VAD_flags[ MAX_FRAMES_PER_PACKET ]; - opus_int8 LBRR_flag; - opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; - - SideInfoIndices indices; - opus_int8 pulses[ MAX_FRAME_LENGTH ]; - - int arch; - - /* Input/output buffering */ - opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */ - opus_int inputBufIx; - opus_int nFramesPerPacket; - opus_int nFramesEncoded; /* Number of frames analyzed in current packet */ - - opus_int nChannelsAPI; - opus_int nChannelsInternal; - opus_int channelNb; - - /* Parameters For LTP scaling Control */ - opus_int frames_since_onset; - - /* Specifically for entropy coding */ - opus_int ec_prevSignalType; - opus_int16 ec_prevLagIndex; - - silk_resampler_state_struct resampler_state; - - /* DTX */ - opus_int useDTX; /* Flag to enable DTX */ - opus_int inDTX; /* Flag to signal DTX period */ - opus_int noSpeechCounter; /* Counts concecutive nonactive frames, used by DTX */ - - /* Inband Low Bitrate Redundancy (LBRR) data */ - opus_int useInBandFEC; /* Saves the API setting for query */ - opus_int LBRR_enabled; /* Depends on useInBandFRC, bitrate and packet loss rate */ - opus_int LBRR_GainIncreases; /* Gains increment for coding LBRR frames */ - SideInfoIndices indices_LBRR[ MAX_FRAMES_PER_PACKET ]; - opus_int8 pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ]; -} silk_encoder_state; - - -/* Struct for Packet Loss Concealment */ -typedef struct { - opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */ - opus_int16 LTPCoef_Q14[ LTP_ORDER ]; /* LTP coeficients to use for voiced concealment */ - opus_int16 prevLPC_Q12[ MAX_LPC_ORDER ]; - opus_int last_frame_lost; /* Was previous frame lost */ - opus_int32 rand_seed; /* Seed for unvoiced signal generation */ - opus_int16 randScale_Q14; /* Scaling of unvoiced random signal */ - opus_int32 conc_energy; - opus_int conc_energy_shift; - opus_int16 prevLTP_scale_Q14; - opus_int32 prevGain_Q16[ 2 ]; - opus_int fs_kHz; - opus_int nb_subfr; - opus_int subfr_length; -} silk_PLC_struct; - -/* Struct for CNG */ -typedef struct { - opus_int32 CNG_exc_buf_Q14[ MAX_FRAME_LENGTH ]; - opus_int16 CNG_smth_NLSF_Q15[ MAX_LPC_ORDER ]; - opus_int32 CNG_synth_state[ MAX_LPC_ORDER ]; - opus_int32 CNG_smth_Gain_Q16; - opus_int32 rand_seed; - opus_int fs_kHz; -} silk_CNG_struct; - -/********************************/ -/* Decoder state */ -/********************************/ -typedef struct { - opus_int32 prev_gain_Q16; - opus_int32 exc_Q14[ MAX_FRAME_LENGTH ]; - opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ]; - opus_int16 outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ]; /* Buffer for output signal */ - opus_int lagPrev; /* Previous Lag */ - opus_int8 LastGainIndex; /* Previous gain index */ - opus_int fs_kHz; /* Sampling frequency in kHz */ - opus_int32 fs_API_hz; /* API sample frequency (Hz) */ - opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ - opus_int frame_length; /* Frame length (samples) */ - opus_int subfr_length; /* Subframe length (samples) */ - opus_int ltp_mem_length; /* Length of LTP memory */ - opus_int LPC_order; /* LPC order */ - opus_int16 prevNLSF_Q15[ MAX_LPC_ORDER ]; /* Used to interpolate LSFs */ - opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation */ - const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ - const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ - - /* For buffering payload in case of more frames per packet */ - opus_int nFramesDecoded; - opus_int nFramesPerPacket; - - /* Specifically for entropy coding */ - opus_int ec_prevSignalType; - opus_int16 ec_prevLagIndex; - - opus_int VAD_flags[ MAX_FRAMES_PER_PACKET ]; - opus_int LBRR_flag; - opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; - - silk_resampler_state_struct resampler_state; - - const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */ - - /* Quantization indices */ - SideInfoIndices indices; - - /* CNG state */ - silk_CNG_struct sCNG; - - /* Stuff used for PLC */ - opus_int lossCnt; - opus_int prevSignalType; - - silk_PLC_struct sPLC; - -} silk_decoder_state; - -/************************/ -/* Decoder control */ -/************************/ -typedef struct { - /* Prediction and coding parameters */ - opus_int pitchL[ MAX_NB_SUBFR ]; - opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; - /* Holds interpolated and final coefficients, 4-byte aligned */ - silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; - opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; - opus_int LTP_scale_Q14; -} silk_decoder_control; - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/sum_sqr_shift.c b/thirdparty/opus/silk/sum_sqr_shift.c deleted file mode 100644 index 129df191d8..0000000000 --- a/thirdparty/opus/silk/sum_sqr_shift.c +++ /dev/null @@ -1,86 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "SigProc_FIX.h" - -/* Compute number of bits to right shift the sum of squares of a vector */ -/* of int16s to make it fit in an int32 */ -void silk_sum_sqr_shift( - opus_int32 *energy, /* O Energy of x, after shifting to the right */ - opus_int *shift, /* O Number of bits right shift applied to energy */ - const opus_int16 *x, /* I Input vector */ - opus_int len /* I Length of input vector */ -) -{ - opus_int i, shft; - opus_int32 nrg_tmp, nrg; - - nrg = 0; - shft = 0; - len--; - for( i = 0; i < len; i += 2 ) { - nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] ); - nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] ); - if( nrg < 0 ) { - /* Scale down */ - nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); - shft = 2; - i+=2; - break; - } - } - for( ; i < len; i += 2 ) { - nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); - nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft ); - if( nrg < 0 ) { - /* Scale down */ - nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); - shft += 2; - } - } - if( i == len ) { - /* One sample left to process */ - nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); - nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); - } - - /* Make sure to have at least one extra leading zero (two leading zeros in total) */ - if( nrg & 0xC0000000 ) { - nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); - shft += 2; - } - - /* Output arguments */ - *shift = shft; - *energy = nrg; -} - diff --git a/thirdparty/opus/silk/table_LSF_cos.c b/thirdparty/opus/silk/table_LSF_cos.c deleted file mode 100644 index ec9dc63927..0000000000 --- a/thirdparty/opus/silk/table_LSF_cos.c +++ /dev/null @@ -1,70 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -/* Cosine approximation table for LSF conversion */ -/* Q12 values (even) */ -const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ] = { - 8192, 8190, 8182, 8170, - 8152, 8130, 8104, 8072, - 8034, 7994, 7946, 7896, - 7840, 7778, 7714, 7644, - 7568, 7490, 7406, 7318, - 7226, 7128, 7026, 6922, - 6812, 6698, 6580, 6458, - 6332, 6204, 6070, 5934, - 5792, 5648, 5502, 5352, - 5198, 5040, 4880, 4718, - 4552, 4382, 4212, 4038, - 3862, 3684, 3502, 3320, - 3136, 2948, 2760, 2570, - 2378, 2186, 1990, 1794, - 1598, 1400, 1202, 1002, - 802, 602, 402, 202, - 0, -202, -402, -602, - -802, -1002, -1202, -1400, - -1598, -1794, -1990, -2186, - -2378, -2570, -2760, -2948, - -3136, -3320, -3502, -3684, - -3862, -4038, -4212, -4382, - -4552, -4718, -4880, -5040, - -5198, -5352, -5502, -5648, - -5792, -5934, -6070, -6204, - -6332, -6458, -6580, -6698, - -6812, -6922, -7026, -7128, - -7226, -7318, -7406, -7490, - -7568, -7644, -7714, -7778, - -7840, -7896, -7946, -7994, - -8034, -8072, -8104, -8130, - -8152, -8170, -8182, -8190, - -8192 -}; diff --git a/thirdparty/opus/silk/tables.h b/thirdparty/opus/silk/tables.h deleted file mode 100644 index 7fea6fda39..0000000000 --- a/thirdparty/opus/silk/tables.h +++ /dev/null @@ -1,122 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_TABLES_H -#define SILK_TABLES_H - -#include "define.h" -#include "structs.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Entropy coding tables (with size in bytes indicated) */ -extern const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ]; /* 24 */ -extern const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ]; /* 41 */ - -extern const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ];/* 32 */ -extern const opus_uint8 silk_pitch_delta_iCDF[ 21 ]; /* 21 */ -extern const opus_uint8 silk_pitch_contour_iCDF[ 34 ]; /* 34 */ -extern const opus_uint8 silk_pitch_contour_NB_iCDF[ 11 ]; /* 11 */ -extern const opus_uint8 silk_pitch_contour_10_ms_iCDF[ 12 ]; /* 12 */ -extern const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[ 3 ]; /* 3 */ - -extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ SILK_MAX_PULSES + 2 ]; /* 180 */ -extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ SILK_MAX_PULSES + 2 ]; /* 162 */ - -extern const opus_uint8 silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ -extern const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ - -extern const opus_uint8 silk_max_pulses_table[ 4 ]; /* 4 */ - -extern const opus_uint8 silk_shell_code_table0[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table1[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table2[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table3[ 152 ]; /* 152 */ -extern const opus_uint8 silk_shell_code_table_offsets[ SILK_MAX_PULSES + 1 ]; /* 17 */ - -extern const opus_uint8 silk_lsb_iCDF[ 2 ]; /* 2 */ - -extern const opus_uint8 silk_sign_iCDF[ 42 ]; /* 42 */ - -extern const opus_uint8 silk_uniform3_iCDF[ 3 ]; /* 3 */ -extern const opus_uint8 silk_uniform4_iCDF[ 4 ]; /* 4 */ -extern const opus_uint8 silk_uniform5_iCDF[ 5 ]; /* 5 */ -extern const opus_uint8 silk_uniform6_iCDF[ 6 ]; /* 6 */ -extern const opus_uint8 silk_uniform8_iCDF[ 8 ]; /* 8 */ - -extern const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ]; /* 7 */ - -extern const opus_uint8 silk_LTP_per_index_iCDF[ 3 ]; /* 3 */ -extern const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ]; /* 3 */ -extern const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ]; /* 3 */ -extern const opus_int16 silk_LTP_gain_middle_avg_RD_Q14; -extern const opus_int8 * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ]; /* 168 */ -extern const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS]; - -extern const opus_int8 silk_LTP_vq_sizes[ NB_LTP_CBKS ]; /* 3 */ - -extern const opus_uint8 silk_LTPscale_iCDF[ 3 ]; /* 4 */ -extern const opus_int16 silk_LTPScales_table_Q14[ 3 ]; /* 6 */ - -extern const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ]; /* 4 */ -extern const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ]; /* 2 */ - -extern const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ]; /* 32 */ -extern const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ]; /* 25 */ -extern const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ]; /* 2 */ - -extern const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ]; /* 10 */ - -extern const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ]; /* 5 */ - -extern const silk_NLSF_CB_struct silk_NLSF_CB_WB; /* 1040 */ -extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB; /* 728 */ - -/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ -extern const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ]; /* 32 */ -extern const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ]; /* 32 */ -extern const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ]; /* 32 */ -extern const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ]; /* 32 */ - -/* Quantization offsets */ -extern const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ]; /* 8 */ - -/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ -extern const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ]; /* 60 */ -extern const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ]; /* 60 */ - -/* Rom table with cosine values */ -extern const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ]; /* 258 */ - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/opus/silk/tables_LTP.c b/thirdparty/opus/silk/tables_LTP.c deleted file mode 100644 index 0e6a0254d5..0000000000 --- a/thirdparty/opus/silk/tables_LTP.c +++ /dev/null @@ -1,296 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -const opus_uint8 silk_LTP_per_index_iCDF[3] = { - 179, 99, 0 -}; - -static const opus_uint8 silk_LTP_gain_iCDF_0[8] = { - 71, 56, 43, 30, 21, 12, 6, 0 -}; - -static const opus_uint8 silk_LTP_gain_iCDF_1[16] = { - 199, 165, 144, 124, 109, 96, 84, 71, - 61, 51, 42, 32, 23, 15, 8, 0 -}; - -static const opus_uint8 silk_LTP_gain_iCDF_2[32] = { - 241, 225, 211, 199, 187, 175, 164, 153, - 142, 132, 123, 114, 105, 96, 88, 80, - 72, 64, 57, 50, 44, 38, 33, 29, - 24, 20, 16, 12, 9, 5, 2, 0 -}; - -const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304; - -static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = { - 15, 131, 138, 138, 155, 155, 173, 173 -}; - -static const opus_uint8 silk_LTP_gain_BITS_Q5_1[16] = { - 69, 93, 115, 118, 131, 138, 141, 138, - 150, 150, 155, 150, 155, 160, 166, 160 -}; - -static const opus_uint8 silk_LTP_gain_BITS_Q5_2[32] = { - 131, 128, 134, 141, 141, 141, 145, 145, - 145, 150, 155, 155, 155, 155, 160, 160, - 160, 160, 166, 166, 173, 173, 182, 192, - 182, 192, 192, 192, 205, 192, 205, 224 -}; - -const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[NB_LTP_CBKS] = { - silk_LTP_gain_iCDF_0, - silk_LTP_gain_iCDF_1, - silk_LTP_gain_iCDF_2 -}; - -const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[NB_LTP_CBKS] = { - silk_LTP_gain_BITS_Q5_0, - silk_LTP_gain_BITS_Q5_1, - silk_LTP_gain_BITS_Q5_2 -}; - -static const opus_int8 silk_LTP_gain_vq_0[8][5] = -{ -{ - 4, 6, 24, 7, 5 -}, -{ - 0, 0, 2, 0, 0 -}, -{ - 12, 28, 41, 13, -4 -}, -{ - -9, 15, 42, 25, 14 -}, -{ - 1, -2, 62, 41, -9 -}, -{ - -10, 37, 65, -4, 3 -}, -{ - -6, 4, 66, 7, -8 -}, -{ - 16, 14, 38, -3, 33 -} -}; - -static const opus_int8 silk_LTP_gain_vq_1[16][5] = -{ -{ - 13, 22, 39, 23, 12 -}, -{ - -1, 36, 64, 27, -6 -}, -{ - -7, 10, 55, 43, 17 -}, -{ - 1, 1, 8, 1, 1 -}, -{ - 6, -11, 74, 53, -9 -}, -{ - -12, 55, 76, -12, 8 -}, -{ - -3, 3, 93, 27, -4 -}, -{ - 26, 39, 59, 3, -8 -}, -{ - 2, 0, 77, 11, 9 -}, -{ - -8, 22, 44, -6, 7 -}, -{ - 40, 9, 26, 3, 9 -}, -{ - -7, 20, 101, -7, 4 -}, -{ - 3, -8, 42, 26, 0 -}, -{ - -15, 33, 68, 2, 23 -}, -{ - -2, 55, 46, -2, 15 -}, -{ - 3, -1, 21, 16, 41 -} -}; - -static const opus_int8 silk_LTP_gain_vq_2[32][5] = -{ -{ - -6, 27, 61, 39, 5 -}, -{ - -11, 42, 88, 4, 1 -}, -{ - -2, 60, 65, 6, -4 -}, -{ - -1, -5, 73, 56, 1 -}, -{ - -9, 19, 94, 29, -9 -}, -{ - 0, 12, 99, 6, 4 -}, -{ - 8, -19, 102, 46, -13 -}, -{ - 3, 2, 13, 3, 2 -}, -{ - 9, -21, 84, 72, -18 -}, -{ - -11, 46, 104, -22, 8 -}, -{ - 18, 38, 48, 23, 0 -}, -{ - -16, 70, 83, -21, 11 -}, -{ - 5, -11, 117, 22, -8 -}, -{ - -6, 23, 117, -12, 3 -}, -{ - 3, -8, 95, 28, 4 -}, -{ - -10, 15, 77, 60, -15 -}, -{ - -1, 4, 124, 2, -4 -}, -{ - 3, 38, 84, 24, -25 -}, -{ - 2, 13, 42, 13, 31 -}, -{ - 21, -4, 56, 46, -1 -}, -{ - -1, 35, 79, -13, 19 -}, -{ - -7, 65, 88, -9, -14 -}, -{ - 20, 4, 81, 49, -29 -}, -{ - 20, 0, 75, 3, -17 -}, -{ - 5, -9, 44, 92, -8 -}, -{ - 1, -3, 22, 69, 31 -}, -{ - -6, 95, 41, -12, 5 -}, -{ - 39, 67, 16, -4, 1 -}, -{ - 0, -6, 120, 55, -36 -}, -{ - -13, 44, 122, 4, -24 -}, -{ - 81, 5, 11, 3, 7 -}, -{ - 2, 0, 9, 10, 88 -} -}; - -const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = { - (opus_int8 *)&silk_LTP_gain_vq_0[0][0], - (opus_int8 *)&silk_LTP_gain_vq_1[0][0], - (opus_int8 *)&silk_LTP_gain_vq_2[0][0] -}; - -/* Maximum frequency-dependent response of the pitch taps above, - computed as max(abs(freqz(taps))) */ -static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = { - 46, 2, 90, 87, 93, 91, 82, 98 -}; - -static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = { - 109, 120, 118, 12, 113, 115, 117, 119, - 99, 59, 87, 111, 63, 111, 112, 80 -}; - -static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = { - 126, 124, 125, 124, 129, 121, 126, 23, - 132, 127, 127, 127, 126, 127, 122, 133, - 130, 134, 101, 118, 119, 145, 126, 86, - 124, 120, 123, 119, 170, 173, 107, 109 -}; - -const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = { - &silk_LTP_gain_vq_0_gain[0], - &silk_LTP_gain_vq_1_gain[0], - &silk_LTP_gain_vq_2_gain[0] -}; - -const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = { - 8, 16, 32 -}; diff --git a/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c b/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c deleted file mode 100644 index 8c59d207aa..0000000000 --- a/thirdparty/opus/silk/tables_NLSF_CB_NB_MB.c +++ /dev/null @@ -1,159 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = { - 12, 35, 60, 83, 108, 132, 157, 180, - 206, 228, 15, 32, 55, 77, 101, 125, - 151, 175, 201, 225, 19, 42, 66, 89, - 114, 137, 162, 184, 209, 230, 12, 25, - 50, 72, 97, 120, 147, 172, 200, 223, - 26, 44, 69, 90, 114, 135, 159, 180, - 205, 225, 13, 22, 53, 80, 106, 130, - 156, 180, 205, 228, 15, 25, 44, 64, - 90, 115, 142, 168, 196, 222, 19, 24, - 62, 82, 100, 120, 145, 168, 190, 214, - 22, 31, 50, 79, 103, 120, 151, 170, - 203, 227, 21, 29, 45, 65, 106, 124, - 150, 171, 196, 224, 30, 49, 75, 97, - 121, 142, 165, 186, 209, 229, 19, 25, - 52, 70, 93, 116, 143, 166, 192, 219, - 26, 34, 62, 75, 97, 118, 145, 167, - 194, 217, 25, 33, 56, 70, 91, 113, - 143, 165, 196, 223, 21, 34, 51, 72, - 97, 117, 145, 171, 196, 222, 20, 29, - 50, 67, 90, 117, 144, 168, 197, 221, - 22, 31, 48, 66, 95, 117, 146, 168, - 196, 222, 24, 33, 51, 77, 116, 134, - 158, 180, 200, 224, 21, 28, 70, 87, - 106, 124, 149, 170, 194, 217, 26, 33, - 53, 64, 83, 117, 152, 173, 204, 225, - 27, 34, 65, 95, 108, 129, 155, 174, - 210, 225, 20, 26, 72, 99, 113, 131, - 154, 176, 200, 219, 34, 43, 61, 78, - 93, 114, 155, 177, 205, 229, 23, 29, - 54, 97, 124, 138, 163, 179, 209, 229, - 30, 38, 56, 89, 118, 129, 158, 178, - 200, 231, 21, 29, 49, 63, 85, 111, - 142, 163, 193, 222, 27, 48, 77, 103, - 133, 158, 179, 196, 215, 232, 29, 47, - 74, 99, 124, 151, 176, 198, 220, 237, - 33, 42, 61, 76, 93, 121, 155, 174, - 207, 225, 29, 53, 87, 112, 136, 154, - 170, 188, 208, 227, 24, 30, 52, 84, - 131, 150, 166, 186, 203, 229, 37, 48, - 64, 84, 104, 118, 156, 177, 201, 230 -}; - -static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = { - 212, 178, 148, 129, 108, 96, 85, 82, - 79, 77, 61, 59, 57, 56, 51, 49, - 48, 45, 42, 41, 40, 38, 36, 34, - 31, 30, 21, 12, 10, 3, 1, 0, - 255, 245, 244, 236, 233, 225, 217, 203, - 190, 176, 175, 161, 149, 136, 125, 114, - 102, 91, 81, 71, 60, 52, 43, 35, - 28, 20, 19, 18, 12, 11, 5, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_SELECT_NB_MB[ 160 ] = { - 16, 0, 0, 0, 0, 99, 66, 36, - 36, 34, 36, 34, 34, 34, 34, 83, - 69, 36, 52, 34, 116, 102, 70, 68, - 68, 176, 102, 68, 68, 34, 65, 85, - 68, 84, 36, 116, 141, 152, 139, 170, - 132, 187, 184, 216, 137, 132, 249, 168, - 185, 139, 104, 102, 100, 68, 68, 178, - 218, 185, 185, 170, 244, 216, 187, 187, - 170, 244, 187, 187, 219, 138, 103, 155, - 184, 185, 137, 116, 183, 155, 152, 136, - 132, 217, 184, 184, 170, 164, 217, 171, - 155, 139, 244, 169, 184, 185, 170, 164, - 216, 223, 218, 138, 214, 143, 188, 218, - 168, 244, 141, 136, 155, 170, 168, 138, - 220, 219, 139, 164, 219, 202, 216, 137, - 168, 186, 246, 185, 139, 116, 185, 219, - 185, 138, 100, 100, 134, 100, 102, 34, - 68, 68, 100, 68, 168, 203, 221, 218, - 168, 167, 154, 136, 104, 70, 164, 246, - 171, 137, 139, 137, 155, 218, 219, 139 -}; - -static const opus_uint8 silk_NLSF_CB2_iCDF_NB_MB[ 72 ] = { - 255, 254, 253, 238, 14, 3, 2, 1, - 0, 255, 254, 252, 218, 35, 3, 2, - 1, 0, 255, 254, 250, 208, 59, 4, - 2, 1, 0, 255, 254, 246, 194, 71, - 10, 2, 1, 0, 255, 252, 236, 183, - 82, 8, 2, 1, 0, 255, 252, 235, - 180, 90, 17, 2, 1, 0, 255, 248, - 224, 171, 97, 30, 4, 1, 0, 255, - 254, 236, 173, 95, 37, 7, 1, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_BITS_NB_MB_Q5[ 72 ] = { - 255, 255, 255, 131, 6, 145, 255, 255, - 255, 255, 255, 236, 93, 15, 96, 255, - 255, 255, 255, 255, 194, 83, 25, 71, - 221, 255, 255, 255, 255, 162, 73, 34, - 66, 162, 255, 255, 255, 210, 126, 73, - 43, 57, 173, 255, 255, 255, 201, 125, - 71, 48, 58, 130, 255, 255, 255, 166, - 110, 73, 57, 62, 104, 210, 255, 255, - 251, 123, 65, 55, 68, 100, 171, 255 -}; - -static const opus_uint8 silk_NLSF_PRED_NB_MB_Q8[ 18 ] = { - 179, 138, 140, 148, 151, 149, 153, 151, - 163, 116, 67, 82, 59, 92, 72, 100, - 89, 92 -}; - -static const opus_int16 silk_NLSF_DELTA_MIN_NB_MB_Q15[ 11 ] = { - 250, 3, 6, 3, 3, 3, 4, 3, - 3, 3, 461 -}; - -const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB = -{ - 32, - 10, - SILK_FIX_CONST( 0.18, 16 ), - SILK_FIX_CONST( 1.0 / 0.18, 6 ), - silk_NLSF_CB1_NB_MB_Q8, - silk_NLSF_CB1_iCDF_NB_MB, - silk_NLSF_PRED_NB_MB_Q8, - silk_NLSF_CB2_SELECT_NB_MB, - silk_NLSF_CB2_iCDF_NB_MB, - silk_NLSF_CB2_BITS_NB_MB_Q5, - silk_NLSF_DELTA_MIN_NB_MB_Q15, -}; diff --git a/thirdparty/opus/silk/tables_NLSF_CB_WB.c b/thirdparty/opus/silk/tables_NLSF_CB_WB.c deleted file mode 100644 index 50af87eb2e..0000000000 --- a/thirdparty/opus/silk/tables_NLSF_CB_WB.c +++ /dev/null @@ -1,198 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = { - 7, 23, 38, 54, 69, 85, 100, 116, - 131, 147, 162, 178, 193, 208, 223, 239, - 13, 25, 41, 55, 69, 83, 98, 112, - 127, 142, 157, 171, 187, 203, 220, 236, - 15, 21, 34, 51, 61, 78, 92, 106, - 126, 136, 152, 167, 185, 205, 225, 240, - 10, 21, 36, 50, 63, 79, 95, 110, - 126, 141, 157, 173, 189, 205, 221, 237, - 17, 20, 37, 51, 59, 78, 89, 107, - 123, 134, 150, 164, 184, 205, 224, 240, - 10, 15, 32, 51, 67, 81, 96, 112, - 129, 142, 158, 173, 189, 204, 220, 236, - 8, 21, 37, 51, 65, 79, 98, 113, - 126, 138, 155, 168, 179, 192, 209, 218, - 12, 15, 34, 55, 63, 78, 87, 108, - 118, 131, 148, 167, 185, 203, 219, 236, - 16, 19, 32, 36, 56, 79, 91, 108, - 118, 136, 154, 171, 186, 204, 220, 237, - 11, 28, 43, 58, 74, 89, 105, 120, - 135, 150, 165, 180, 196, 211, 226, 241, - 6, 16, 33, 46, 60, 75, 92, 107, - 123, 137, 156, 169, 185, 199, 214, 225, - 11, 19, 30, 44, 57, 74, 89, 105, - 121, 135, 152, 169, 186, 202, 218, 234, - 12, 19, 29, 46, 57, 71, 88, 100, - 120, 132, 148, 165, 182, 199, 216, 233, - 17, 23, 35, 46, 56, 77, 92, 106, - 123, 134, 152, 167, 185, 204, 222, 237, - 14, 17, 45, 53, 63, 75, 89, 107, - 115, 132, 151, 171, 188, 206, 221, 240, - 9, 16, 29, 40, 56, 71, 88, 103, - 119, 137, 154, 171, 189, 205, 222, 237, - 16, 19, 36, 48, 57, 76, 87, 105, - 118, 132, 150, 167, 185, 202, 218, 236, - 12, 17, 29, 54, 71, 81, 94, 104, - 126, 136, 149, 164, 182, 201, 221, 237, - 15, 28, 47, 62, 79, 97, 115, 129, - 142, 155, 168, 180, 194, 208, 223, 238, - 8, 14, 30, 45, 62, 78, 94, 111, - 127, 143, 159, 175, 192, 207, 223, 239, - 17, 30, 49, 62, 79, 92, 107, 119, - 132, 145, 160, 174, 190, 204, 220, 235, - 14, 19, 36, 45, 61, 76, 91, 108, - 121, 138, 154, 172, 189, 205, 222, 238, - 12, 18, 31, 45, 60, 76, 91, 107, - 123, 138, 154, 171, 187, 204, 221, 236, - 13, 17, 31, 43, 53, 70, 83, 103, - 114, 131, 149, 167, 185, 203, 220, 237, - 17, 22, 35, 42, 58, 78, 93, 110, - 125, 139, 155, 170, 188, 206, 224, 240, - 8, 15, 34, 50, 67, 83, 99, 115, - 131, 146, 162, 178, 193, 209, 224, 239, - 13, 16, 41, 66, 73, 86, 95, 111, - 128, 137, 150, 163, 183, 206, 225, 241, - 17, 25, 37, 52, 63, 75, 92, 102, - 119, 132, 144, 160, 175, 191, 212, 231, - 19, 31, 49, 65, 83, 100, 117, 133, - 147, 161, 174, 187, 200, 213, 227, 242, - 18, 31, 52, 68, 88, 103, 117, 126, - 138, 149, 163, 177, 192, 207, 223, 239, - 16, 29, 47, 61, 76, 90, 106, 119, - 133, 147, 161, 176, 193, 209, 224, 240, - 15, 21, 35, 50, 61, 73, 86, 97, - 110, 119, 129, 141, 175, 198, 218, 237 -}; - -static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = { - 225, 204, 201, 184, 183, 175, 158, 154, - 153, 135, 119, 115, 113, 110, 109, 99, - 98, 95, 79, 68, 52, 50, 48, 45, - 43, 32, 31, 27, 18, 10, 3, 0, - 255, 251, 235, 230, 212, 201, 196, 182, - 167, 166, 163, 151, 138, 124, 110, 104, - 90, 78, 76, 70, 69, 57, 45, 34, - 24, 21, 11, 6, 5, 4, 3, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_SELECT_WB[ 256 ] = { - 0, 0, 0, 0, 0, 0, 0, 1, - 100, 102, 102, 68, 68, 36, 34, 96, - 164, 107, 158, 185, 180, 185, 139, 102, - 64, 66, 36, 34, 34, 0, 1, 32, - 208, 139, 141, 191, 152, 185, 155, 104, - 96, 171, 104, 166, 102, 102, 102, 132, - 1, 0, 0, 0, 0, 16, 16, 0, - 80, 109, 78, 107, 185, 139, 103, 101, - 208, 212, 141, 139, 173, 153, 123, 103, - 36, 0, 0, 0, 0, 0, 0, 1, - 48, 0, 0, 0, 0, 0, 0, 32, - 68, 135, 123, 119, 119, 103, 69, 98, - 68, 103, 120, 118, 118, 102, 71, 98, - 134, 136, 157, 184, 182, 153, 139, 134, - 208, 168, 248, 75, 189, 143, 121, 107, - 32, 49, 34, 34, 34, 0, 17, 2, - 210, 235, 139, 123, 185, 137, 105, 134, - 98, 135, 104, 182, 100, 183, 171, 134, - 100, 70, 68, 70, 66, 66, 34, 131, - 64, 166, 102, 68, 36, 2, 1, 0, - 134, 166, 102, 68, 34, 34, 66, 132, - 212, 246, 158, 139, 107, 107, 87, 102, - 100, 219, 125, 122, 137, 118, 103, 132, - 114, 135, 137, 105, 171, 106, 50, 34, - 164, 214, 141, 143, 185, 151, 121, 103, - 192, 34, 0, 0, 0, 0, 0, 1, - 208, 109, 74, 187, 134, 249, 159, 137, - 102, 110, 154, 118, 87, 101, 119, 101, - 0, 2, 0, 36, 36, 66, 68, 35, - 96, 164, 102, 100, 36, 0, 2, 33, - 167, 138, 174, 102, 100, 84, 2, 2, - 100, 107, 120, 119, 36, 197, 24, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_iCDF_WB[ 72 ] = { - 255, 254, 253, 244, 12, 3, 2, 1, - 0, 255, 254, 252, 224, 38, 3, 2, - 1, 0, 255, 254, 251, 209, 57, 4, - 2, 1, 0, 255, 254, 244, 195, 69, - 4, 2, 1, 0, 255, 251, 232, 184, - 84, 7, 2, 1, 0, 255, 254, 240, - 186, 86, 14, 2, 1, 0, 255, 254, - 239, 178, 91, 30, 5, 1, 0, 255, - 248, 227, 177, 100, 19, 2, 1, 0 -}; - -static const opus_uint8 silk_NLSF_CB2_BITS_WB_Q5[ 72 ] = { - 255, 255, 255, 156, 4, 154, 255, 255, - 255, 255, 255, 227, 102, 15, 92, 255, - 255, 255, 255, 255, 213, 83, 24, 72, - 236, 255, 255, 255, 255, 150, 76, 33, - 63, 214, 255, 255, 255, 190, 121, 77, - 43, 55, 185, 255, 255, 255, 245, 137, - 71, 43, 59, 139, 255, 255, 255, 255, - 131, 66, 50, 66, 107, 194, 255, 255, - 166, 116, 76, 55, 53, 125, 255, 255 -}; - -static const opus_uint8 silk_NLSF_PRED_WB_Q8[ 30 ] = { - 175, 148, 160, 176, 178, 173, 174, 164, - 177, 174, 196, 182, 198, 192, 182, 68, - 62, 66, 60, 72, 117, 85, 90, 118, - 136, 151, 142, 160, 142, 155 -}; - -static const opus_int16 silk_NLSF_DELTA_MIN_WB_Q15[ 17 ] = { - 100, 3, 40, 3, 3, 3, 5, 14, - 14, 10, 11, 3, 8, 9, 7, 3, - 347 -}; - -const silk_NLSF_CB_struct silk_NLSF_CB_WB = -{ - 32, - 16, - SILK_FIX_CONST( 0.15, 16 ), - SILK_FIX_CONST( 1.0 / 0.15, 6 ), - silk_NLSF_CB1_WB_Q8, - silk_NLSF_CB1_iCDF_WB, - silk_NLSF_PRED_WB_Q8, - silk_NLSF_CB2_SELECT_WB, - silk_NLSF_CB2_iCDF_WB, - silk_NLSF_CB2_BITS_WB_Q5, - silk_NLSF_DELTA_MIN_WB_Q15, -}; - diff --git a/thirdparty/opus/silk/tables_gain.c b/thirdparty/opus/silk/tables_gain.c deleted file mode 100644 index 37e41d890c..0000000000 --- a/thirdparty/opus/silk/tables_gain.c +++ /dev/null @@ -1,63 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ] = -{ -{ - 224, 112, 44, 15, 3, 2, 1, 0 -}, -{ - 254, 237, 192, 132, 70, 23, 4, 0 -}, -{ - 255, 252, 226, 155, 61, 11, 2, 0 -} -}; - -const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ] = { - 250, 245, 234, 203, 71, 50, 42, 38, - 35, 33, 31, 29, 28, 27, 26, 25, - 24, 23, 22, 21, 20, 19, 18, 17, - 16, 15, 14, 13, 12, 11, 10, 9, - 8, 7, 6, 5, 4, 3, 2, 1, - 0 -}; - -#ifdef __cplusplus -} -#endif diff --git a/thirdparty/opus/silk/tables_other.c b/thirdparty/opus/silk/tables_other.c deleted file mode 100644 index 398686bf26..0000000000 --- a/thirdparty/opus/silk/tables_other.c +++ /dev/null @@ -1,138 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "structs.h" -#include "define.h" -#include "tables.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ -const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = { - 0, 8000, 9400, 11500, 13500, 17500, 25000, MAX_TARGET_RATE_BPS -}; -const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = { - 0, 9000, 12000, 14500, 18500, 24500, 35500, MAX_TARGET_RATE_BPS -}; -const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = { - 0, 10500, 14000, 17000, 21500, 28500, 42000, MAX_TARGET_RATE_BPS -}; -const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = { - 18, 29, 38, 40, 46, 52, 62, 84 -}; - -/* Tables for stereo predictor coding */ -const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = { - -13732, -10050, -8266, -7526, -6500, -5000, -2950, -820, - 820, 2950, 5000, 6500, 7526, 8266, 10050, 13732 -}; -const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ] = { - 249, 247, 246, 245, 244, - 234, 210, 202, 201, 200, - 197, 174, 82, 59, 56, - 55, 54, 46, 22, 12, - 11, 10, 9, 7, 0 -}; -const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ] = { 64, 0 }; - -/* Tables for LBRR flags */ -static const opus_uint8 silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 }; -static const opus_uint8 silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 }; -const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ] = { - silk_LBRR_flags_2_iCDF, - silk_LBRR_flags_3_iCDF -}; - -/* Table for LSB coding */ -const opus_uint8 silk_lsb_iCDF[ 2 ] = { 120, 0 }; - -/* Tables for LTPScale */ -const opus_uint8 silk_LTPscale_iCDF[ 3 ] = { 128, 64, 0 }; - -/* Tables for signal type and offset coding */ -const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ] = { - 232, 158, 10, 0 -}; -const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ] = { - 230, 0 -}; - -/* Tables for NLSF interpolation factor */ -const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ] = { 243, 221, 192, 181, 0 }; - -/* Quantization offsets */ -const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ] = { - { OFFSET_UVL_Q10, OFFSET_UVH_Q10 }, { OFFSET_VL_Q10, OFFSET_VH_Q10 } -}; - -/* Table for LTPScale */ -const opus_int16 silk_LTPScales_table_Q14[ 3 ] = { 15565, 12288, 8192 }; - -/* Uniform entropy tables */ -const opus_uint8 silk_uniform3_iCDF[ 3 ] = { 171, 85, 0 }; -const opus_uint8 silk_uniform4_iCDF[ 4 ] = { 192, 128, 64, 0 }; -const opus_uint8 silk_uniform5_iCDF[ 5 ] = { 205, 154, 102, 51, 0 }; -const opus_uint8 silk_uniform6_iCDF[ 6 ] = { 213, 171, 128, 85, 43, 0 }; -const opus_uint8 silk_uniform8_iCDF[ 8 ] = { 224, 192, 160, 128, 96, 64, 32, 0 }; - -const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ] = { 100, 40, 16, 7, 3, 1, 0 }; - -/* Elliptic/Cauer filters designed with 0.1 dB passband ripple, - 80 dB minimum stopband attenuation, and - [0.95 : 0.15 : 0.35] normalized cut off frequencies. */ - -/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ -const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ] = -{ -{ 250767114, 501534038, 250767114 }, -{ 209867381, 419732057, 209867381 }, -{ 170987846, 341967853, 170987846 }, -{ 131531482, 263046905, 131531482 }, -{ 89306658, 178584282, 89306658 } -}; - -/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ -const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ] = -{ -{ 506393414, 239854379 }, -{ 411067935, 169683996 }, -{ 306733530, 116694253 }, -{ 185807084, 77959395 }, -{ 35497197, 57401098 } -}; - -#ifdef __cplusplus -} -#endif - diff --git a/thirdparty/opus/silk/tables_pitch_lag.c b/thirdparty/opus/silk/tables_pitch_lag.c deleted file mode 100644 index e80cc59a27..0000000000 --- a/thirdparty/opus/silk/tables_pitch_lag.c +++ /dev/null @@ -1,69 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = { - 253, 250, 244, 233, 212, 182, 150, 131, - 120, 110, 98, 85, 72, 60, 49, 40, - 32, 25, 19, 15, 13, 11, 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0 -}; - -const opus_uint8 silk_pitch_delta_iCDF[21] = { - 210, 208, 206, 203, 199, 193, 183, 168, - 142, 104, 74, 52, 37, 27, 20, 14, - 10, 6, 4, 2, 0 -}; - -const opus_uint8 silk_pitch_contour_iCDF[34] = { - 223, 201, 183, 167, 152, 138, 124, 111, - 98, 88, 79, 70, 62, 56, 50, 44, - 39, 35, 31, 27, 24, 21, 18, 16, - 14, 12, 10, 8, 6, 4, 3, 2, - 1, 0 -}; - -const opus_uint8 silk_pitch_contour_NB_iCDF[11] = { - 188, 176, 155, 138, 119, 97, 67, 43, - 26, 10, 0 -}; - -const opus_uint8 silk_pitch_contour_10_ms_iCDF[12] = { - 165, 119, 80, 61, 47, 35, 27, 20, - 14, 9, 4, 0 -}; - -const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[3] = { - 113, 63, 0 -}; - - diff --git a/thirdparty/opus/silk/tables_pulses_per_block.c b/thirdparty/opus/silk/tables_pulses_per_block.c deleted file mode 100644 index c7c01c8893..0000000000 --- a/thirdparty/opus/silk/tables_pulses_per_block.c +++ /dev/null @@ -1,264 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "tables.h" - -const opus_uint8 silk_max_pulses_table[ 4 ] = { - 8, 10, 12, 16 -}; - -const opus_uint8 silk_pulses_per_block_iCDF[ 10 ][ 18 ] = { -{ - 125, 51, 26, 18, 15, 12, 11, 10, - 9, 8, 7, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 198, 105, 45, 22, 15, 12, 11, 10, - 9, 8, 7, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 213, 162, 116, 83, 59, 43, 32, 24, - 18, 15, 12, 9, 7, 6, 5, 3, - 2, 0 -}, -{ - 239, 187, 116, 59, 28, 16, 11, 10, - 9, 8, 7, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 250, 229, 188, 135, 86, 51, 30, 19, - 13, 10, 8, 6, 5, 4, 3, 2, - 1, 0 -}, -{ - 249, 235, 213, 185, 156, 128, 103, 83, - 66, 53, 42, 33, 26, 21, 17, 13, - 10, 0 -}, -{ - 254, 249, 235, 206, 164, 118, 77, 46, - 27, 16, 10, 7, 5, 4, 3, 2, - 1, 0 -}, -{ - 255, 253, 249, 239, 220, 191, 156, 119, - 85, 57, 37, 23, 15, 10, 6, 4, - 2, 0 -}, -{ - 255, 253, 251, 246, 237, 223, 203, 179, - 152, 124, 98, 75, 55, 40, 29, 21, - 15, 0 -}, -{ - 255, 254, 253, 247, 220, 162, 106, 67, - 42, 28, 18, 12, 9, 6, 4, 3, - 2, 0 -} -}; - -const opus_uint8 silk_pulses_per_block_BITS_Q5[ 9 ][ 18 ] = { -{ - 31, 57, 107, 160, 205, 205, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255 -}, -{ - 69, 47, 67, 111, 166, 205, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255 -}, -{ - 82, 74, 79, 95, 109, 128, 145, 160, - 173, 205, 205, 205, 224, 255, 255, 224, - 255, 224 -}, -{ - 125, 74, 59, 69, 97, 141, 182, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255 -}, -{ - 173, 115, 85, 73, 76, 92, 115, 145, - 173, 205, 224, 224, 255, 255, 255, 255, - 255, 255 -}, -{ - 166, 134, 113, 102, 101, 102, 107, 118, - 125, 138, 145, 155, 166, 182, 192, 192, - 205, 150 -}, -{ - 224, 182, 134, 101, 83, 79, 85, 97, - 120, 145, 173, 205, 224, 255, 255, 255, - 255, 255 -}, -{ - 255, 224, 192, 150, 120, 101, 92, 89, - 93, 102, 118, 134, 160, 182, 192, 224, - 224, 224 -}, -{ - 255, 224, 224, 182, 155, 134, 118, 109, - 104, 102, 106, 111, 118, 131, 145, 160, - 173, 131 -} -}; - -const opus_uint8 silk_rate_levels_iCDF[ 2 ][ 9 ] = -{ -{ - 241, 190, 178, 132, 87, 74, 41, 14, - 0 -}, -{ - 223, 193, 157, 140, 106, 57, 39, 18, - 0 -} -}; - -const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ 9 ] = -{ -{ - 131, 74, 141, 79, 80, 138, 95, 104, - 134 -}, -{ - 95, 99, 91, 125, 93, 76, 123, 115, - 123 -} -}; - -const opus_uint8 silk_shell_code_table0[ 152 ] = { - 128, 0, 214, 42, 0, 235, 128, 21, - 0, 244, 184, 72, 11, 0, 248, 214, - 128, 42, 7, 0, 248, 225, 170, 80, - 25, 5, 0, 251, 236, 198, 126, 54, - 18, 3, 0, 250, 238, 211, 159, 82, - 35, 15, 5, 0, 250, 231, 203, 168, - 128, 88, 53, 25, 6, 0, 252, 238, - 216, 185, 148, 108, 71, 40, 18, 4, - 0, 253, 243, 225, 199, 166, 128, 90, - 57, 31, 13, 3, 0, 254, 246, 233, - 212, 183, 147, 109, 73, 44, 23, 10, - 2, 0, 255, 250, 240, 223, 198, 166, - 128, 90, 58, 33, 16, 6, 1, 0, - 255, 251, 244, 231, 210, 181, 146, 110, - 75, 46, 25, 12, 5, 1, 0, 255, - 253, 248, 238, 221, 196, 164, 128, 92, - 60, 35, 18, 8, 3, 1, 0, 255, - 253, 249, 242, 229, 208, 180, 146, 110, - 76, 48, 27, 14, 7, 3, 1, 0 -}; - -const opus_uint8 silk_shell_code_table1[ 152 ] = { - 129, 0, 207, 50, 0, 236, 129, 20, - 0, 245, 185, 72, 10, 0, 249, 213, - 129, 42, 6, 0, 250, 226, 169, 87, - 27, 4, 0, 251, 233, 194, 130, 62, - 20, 4, 0, 250, 236, 207, 160, 99, - 47, 17, 3, 0, 255, 240, 217, 182, - 131, 81, 41, 11, 1, 0, 255, 254, - 233, 201, 159, 107, 61, 20, 2, 1, - 0, 255, 249, 233, 206, 170, 128, 86, - 50, 23, 7, 1, 0, 255, 250, 238, - 217, 186, 148, 108, 70, 39, 18, 6, - 1, 0, 255, 252, 243, 226, 200, 166, - 128, 90, 56, 30, 13, 4, 1, 0, - 255, 252, 245, 231, 209, 180, 146, 110, - 76, 47, 25, 11, 4, 1, 0, 255, - 253, 248, 237, 219, 194, 163, 128, 93, - 62, 37, 19, 8, 3, 1, 0, 255, - 254, 250, 241, 226, 205, 177, 145, 111, - 79, 51, 30, 15, 6, 2, 1, 0 -}; - -const opus_uint8 silk_shell_code_table2[ 152 ] = { - 129, 0, 203, 54, 0, 234, 129, 23, - 0, 245, 184, 73, 10, 0, 250, 215, - 129, 41, 5, 0, 252, 232, 173, 86, - 24, 3, 0, 253, 240, 200, 129, 56, - 15, 2, 0, 253, 244, 217, 164, 94, - 38, 10, 1, 0, 253, 245, 226, 189, - 132, 71, 27, 7, 1, 0, 253, 246, - 231, 203, 159, 105, 56, 23, 6, 1, - 0, 255, 248, 235, 213, 179, 133, 85, - 47, 19, 5, 1, 0, 255, 254, 243, - 221, 194, 159, 117, 70, 37, 12, 2, - 1, 0, 255, 254, 248, 234, 208, 171, - 128, 85, 48, 22, 8, 2, 1, 0, - 255, 254, 250, 240, 220, 189, 149, 107, - 67, 36, 16, 6, 2, 1, 0, 255, - 254, 251, 243, 227, 201, 166, 128, 90, - 55, 29, 13, 5, 2, 1, 0, 255, - 254, 252, 246, 234, 213, 183, 147, 109, - 73, 43, 22, 10, 4, 2, 1, 0 -}; - -const opus_uint8 silk_shell_code_table3[ 152 ] = { - 130, 0, 200, 58, 0, 231, 130, 26, - 0, 244, 184, 76, 12, 0, 249, 214, - 130, 43, 6, 0, 252, 232, 173, 87, - 24, 3, 0, 253, 241, 203, 131, 56, - 14, 2, 0, 254, 246, 221, 167, 94, - 35, 8, 1, 0, 254, 249, 232, 193, - 130, 65, 23, 5, 1, 0, 255, 251, - 239, 211, 162, 99, 45, 15, 4, 1, - 0, 255, 251, 243, 223, 186, 131, 74, - 33, 11, 3, 1, 0, 255, 252, 245, - 230, 202, 158, 105, 57, 24, 8, 2, - 1, 0, 255, 253, 247, 235, 214, 179, - 132, 84, 44, 19, 7, 2, 1, 0, - 255, 254, 250, 240, 223, 196, 159, 112, - 69, 36, 15, 6, 2, 1, 0, 255, - 254, 253, 245, 231, 209, 176, 136, 93, - 55, 27, 11, 3, 2, 1, 0, 255, - 254, 253, 252, 239, 221, 194, 158, 117, - 76, 42, 18, 4, 3, 2, 1, 0 -}; - -const opus_uint8 silk_shell_code_table_offsets[ 17 ] = { - 0, 0, 2, 5, 9, 14, 20, 27, - 35, 44, 54, 65, 77, 90, 104, 119, - 135 -}; - -const opus_uint8 silk_sign_iCDF[ 42 ] = { - 254, 49, 67, 77, 82, 93, 99, - 198, 11, 18, 24, 31, 36, 45, - 255, 46, 66, 78, 87, 94, 104, - 208, 14, 21, 32, 42, 51, 66, - 255, 94, 104, 109, 112, 115, 118, - 248, 53, 69, 80, 88, 95, 102 -}; diff --git a/thirdparty/opus/silk/tuning_parameters.h b/thirdparty/opus/silk/tuning_parameters.h deleted file mode 100644 index 5b8f404235..0000000000 --- a/thirdparty/opus/silk/tuning_parameters.h +++ /dev/null @@ -1,171 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_TUNING_PARAMETERS_H -#define SILK_TUNING_PARAMETERS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -/* Decay time for bitreservoir */ -#define BITRESERVOIR_DECAY_TIME_MS 500 - -/*******************/ -/* Pitch estimator */ -/*******************/ - -/* Level of noise floor for whitening filter LPC analysis in pitch analysis */ -#define FIND_PITCH_WHITE_NOISE_FRACTION 1e-3f - -/* Bandwidth expansion for whitening filter in pitch analysis */ -#define FIND_PITCH_BANDWIDTH_EXPANSION 0.99f - -/*********************/ -/* Linear prediction */ -/*********************/ - -/* LPC analysis regularization */ -#define FIND_LPC_COND_FAC 1e-5f - -/* LTP analysis defines */ -#define FIND_LTP_COND_FAC 1e-5f -#define LTP_DAMPING 0.05f -#define LTP_SMOOTHING 0.1f - -/* LTP quantization settings */ -#define MU_LTP_QUANT_NB 0.03f -#define MU_LTP_QUANT_MB 0.025f -#define MU_LTP_QUANT_WB 0.02f - -/* Max cumulative LTP gain */ -#define MAX_SUM_LOG_GAIN_DB 250.0f - -/***********************/ -/* High pass filtering */ -/***********************/ - -/* Smoothing parameters for low end of pitch frequency range estimation */ -#define VARIABLE_HP_SMTH_COEF1 0.1f -#define VARIABLE_HP_SMTH_COEF2 0.015f -#define VARIABLE_HP_MAX_DELTA_FREQ 0.4f - -/* Min and max cut-off frequency values (-3 dB points) */ -#define VARIABLE_HP_MIN_CUTOFF_HZ 60 -#define VARIABLE_HP_MAX_CUTOFF_HZ 100 - -/***********/ -/* Various */ -/***********/ - -/* VAD threshold */ -#define SPEECH_ACTIVITY_DTX_THRES 0.05f - -/* Speech Activity LBRR enable threshold */ -#define LBRR_SPEECH_ACTIVITY_THRES 0.3f - -/*************************/ -/* Perceptual parameters */ -/*************************/ - -/* reduction in coding SNR during low speech activity */ -#define BG_SNR_DECR_dB 2.0f - -/* factor for reducing quantization noise during voiced speech */ -#define HARM_SNR_INCR_dB 2.0f - -/* factor for reducing quantization noise for unvoiced sparse signals */ -#define SPARSE_SNR_INCR_dB 2.0f - -/* threshold for sparseness measure above which to use lower quantization offset during unvoiced */ -#define SPARSENESS_THRESHOLD_QNT_OFFSET 0.75f - -/* warping control */ -#define WARPING_MULTIPLIER 0.015f - -/* fraction added to first autocorrelation value */ -#define SHAPE_WHITE_NOISE_FRACTION 5e-5f - -/* noise shaping filter chirp factor */ -#define BANDWIDTH_EXPANSION 0.95f - -/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */ -#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA 0.01f - -/* extra harmonic boosting (signal shaping) at low bitrates */ -#define LOW_RATE_HARMONIC_BOOST 0.1f - -/* extra harmonic boosting (signal shaping) for noisy input signals */ -#define LOW_INPUT_QUALITY_HARMONIC_BOOST 0.1f - -/* harmonic noise shaping */ -#define HARMONIC_SHAPING 0.3f - -/* extra harmonic noise shaping for high bitrates or noisy input */ -#define HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING 0.2f - -/* parameter for shaping noise towards higher frequencies */ -#define HP_NOISE_COEF 0.25f - -/* parameter for shaping noise even more towards higher frequencies during voiced speech */ -#define HARM_HP_NOISE_COEF 0.35f - -/* parameter for applying a high-pass tilt to the input signal */ -#define INPUT_TILT 0.05f - -/* parameter for extra high-pass tilt to the input signal at high rates */ -#define HIGH_RATE_INPUT_TILT 0.1f - -/* parameter for reducing noise at the very low frequencies */ -#define LOW_FREQ_SHAPING 4.0f - -/* less reduction of noise at the very low frequencies for signals with low SNR at low frequencies */ -#define LOW_QUALITY_LOW_FREQ_SHAPING_DECR 0.5f - -/* subframe smoothing coefficient for HarmBoost, HarmShapeGain, Tilt (lower -> more smoothing) */ -#define SUBFR_SMTH_COEF 0.4f - -/* parameters defining the R/D tradeoff in the residual quantizer */ -#define LAMBDA_OFFSET 1.2f -#define LAMBDA_SPEECH_ACT -0.2f -#define LAMBDA_DELAYED_DECISIONS -0.05f -#define LAMBDA_INPUT_QUALITY -0.1f -#define LAMBDA_CODING_QUALITY -0.2f -#define LAMBDA_QUANT_OFFSET 0.8f - -/* Compensation in bitrate calculations for 10 ms modes */ -#define REDUCE_BITRATE_10_MS_BPS 2200 - -/* Maximum time before allowing a bandwidth transition */ -#define MAX_BANDWIDTH_SWITCH_DELAY_MS 5000 - -#ifdef __cplusplus -} -#endif - -#endif /* SILK_TUNING_PARAMETERS_H */ diff --git a/thirdparty/opus/silk/typedef.h b/thirdparty/opus/silk/typedef.h deleted file mode 100644 index 97b7e709be..0000000000 --- a/thirdparty/opus/silk/typedef.h +++ /dev/null @@ -1,78 +0,0 @@ -/*********************************************************************** -Copyright (c) 2006-2011, Skype Limited. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -- Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -- Neither the name of Internet Society, IETF or IETF Trust, nor the -names of specific contributors, may be used to endorse or promote -products derived from this software without specific prior written -permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -#ifndef SILK_TYPEDEF_H -#define SILK_TYPEDEF_H - -#include "opus_types.h" -#include "opus_defines.h" - -#ifndef FIXED_POINT -# include <float.h> -# define silk_float float -# define silk_float_MAX FLT_MAX -#endif - -#define silk_int64_MAX ((opus_int64)0x7FFFFFFFFFFFFFFFLL) /* 2^63 - 1 */ -#define silk_int64_MIN ((opus_int64)0x8000000000000000LL) /* -2^63 */ -#define silk_int32_MAX 0x7FFFFFFF /* 2^31 - 1 = 2147483647 */ -#define silk_int32_MIN ((opus_int32)0x80000000) /* -2^31 = -2147483648 */ -#define silk_int16_MAX 0x7FFF /* 2^15 - 1 = 32767 */ -#define silk_int16_MIN ((opus_int16)0x8000) /* -2^15 = -32768 */ -#define silk_int8_MAX 0x7F /* 2^7 - 1 = 127 */ -#define silk_int8_MIN ((opus_int8)0x80) /* -2^7 = -128 */ -#define silk_uint8_MAX 0xFF /* 2^8 - 1 = 255 */ - -#define silk_TRUE 1 -#define silk_FALSE 0 - -/* assertions */ -#if (defined _WIN32 && !defined _WINCE && !defined(__GNUC__) && !defined(NO_ASSERTS)) -# ifndef silk_assert -# include <crtdbg.h> /* ASSERTE() */ -# define silk_assert(COND) _ASSERTE(COND) -# endif -#else -# ifdef ENABLE_ASSERTIONS -# include <stdio.h> -# include <stdlib.h> -#define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__); -#ifdef __GNUC__ -__attribute__((noreturn)) -#endif -static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line) -{ - fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); - abort(); -} -# define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}} -# else -# define silk_assert(COND) -# endif -#endif - -#endif /* SILK_TYPEDEF_H */ diff --git a/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c b/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c deleted file mode 100644 index 21d4a8bc1e..0000000000 --- a/thirdparty/opus/silk/x86/NSQ_del_dec_sse.c +++ /dev/null @@ -1,857 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "main.h" -#include "celt/x86/x86cpu.h" - -#include "stack_alloc.h" - -typedef struct { - opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; - opus_int32 RandState[ DECISION_DELAY ]; - opus_int32 Q_Q10[ DECISION_DELAY ]; - opus_int32 Xq_Q14[ DECISION_DELAY ]; - opus_int32 Pred_Q15[ DECISION_DELAY ]; - opus_int32 Shape_Q14[ DECISION_DELAY ]; - opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; - opus_int32 LF_AR_Q14; - opus_int32 Seed; - opus_int32 SeedInit; - opus_int32 RD_Q10; -} NSQ_del_dec_struct; - -typedef struct { - opus_int32 Q_Q10; - opus_int32 RD_Q10; - opus_int32 xq_Q14; - opus_int32 LF_AR_Q14; - opus_int32 sLTP_shp_Q14; - opus_int32 LPC_exc_Q14; -} NSQ_sample_struct; - -typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; - -static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -); - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ -); - -void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; - opus_int last_smple_idx, smpl_buf_idx, decisionDelay; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - opus_int32 RDmin_Q10, Gain_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - VARDECL( opus_int32, delayedGain_Q10 ); - VARDECL( NSQ_del_dec_struct, psDelDec ); - NSQ_del_dec_struct *psDD; - SAVE_STACK; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - /* Initialize delayed decision states */ - ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); - silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); - for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psDD->Seed = ( k + psIndices->Seed ) & 3; - psDD->SeedInit = psDD->Seed; - psDD->RD_Q10 = 0; - psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; - psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; - silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); - } - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - smpl_buf_idx = 0; /* index of oldest samples */ - - decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); - - /* For voiced frames limit the decision delay to lower than the pitch lag */ - if( psIndices->signalType == TYPE_VOICED ) { - for( k = 0; k < psEncC->nb_subfr; k++ ) { - decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); - } - } else { - if( lag > 0 ) { - decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); - } - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); - /* Set up pointers to start of sub frame */ - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - subfr = 0; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - if( k == 2 ) { - /* RESET DELAYED DECISIONS */ - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { - if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ i ].RD_Q10; - Winner_ind = i; - } - } - for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { - if( i != Winner_ind ) { - psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); - silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - last_smple_idx = smpl_buf_idx + decisionDelay; - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - - subfr = 0; - } - - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - NSQ->rewhite_flag = 1; - } - } - - silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, - psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); - - silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, - delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], - Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, - psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Find winner */ - RDmin_Q10 = psDelDec[ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psDelDec[ k ].RD_Q10; - Winner_ind = k; - } - } - - /* Copy final part of signals from winner state to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - psIndices->Seed = psDD->SeedInit; - last_smple_idx = smpl_buf_idx + decisionDelay; - Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); - for( i = 0; i < decisionDelay; i++ ) { - last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; - } - silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); - - /* Update states */ - NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech signal */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/******************************************/ -/* Noise shape quantizer for one subframe */ -/******************************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP filter state */ - opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int subfr, /* I Subframe number */ - opus_int shapingLPCOrder, /* I Shaping LPC filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - opus_int warping_Q16, /* I */ - opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ - opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ - opus_int decisionDelay /* I */ -) -{ - opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; - opus_int32 Winner_rand_state; - opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; - opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; - opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; - VARDECL( NSQ_sample_pair, psSampleState ); - NSQ_del_dec_struct *psDD; - NSQ_sample_struct *psSS; - - __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF; - __m128i b_Q12_0123, b_sr_Q12_0123; - SAVE_STACK; - - silk_assert( nStatesDelayedDecision > 0 ); - ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 ); - a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 ); - - if( opus_likely( predictLPCOrder == 16 ) ) { - a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 ); - a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 ); - } - - if( signalType == TYPE_VOICED ){ - b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 ); - b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - } - for( i = 0; i < length; i++ ) { - /* Perform common calculations used in all states */ - - /* Long-term prediction */ - if( signalType == TYPE_VOICED ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q14 = 2; - { - __m128i tmpa, tmpb, pred_lag_ptr_tmp; - pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); - pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B ); - tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 ); - tmpa = _mm_srli_si128( tmpa, 2 ); - - pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */ - pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 ); - pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 ); - pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa ); - - tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */ - pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb ); - LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp ); - - LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ - pred_lag_ptr++; - } - } else { - LTP_pred_Q14 = 0; - } - - /* Long-term shaping */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ - shp_lag_ptr++; - } else { - n_LTP_Q14 = 0; - } - { - __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp; - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - /* Delayed decision state */ - psDD = &psDelDec[ k ]; - - /* Sample state */ - psSS = psSampleState[ k ]; - - /* Generate dither */ - psDD->Seed = silk_RAND( psDD->Seed ); - - /* Pointer used in short term prediction and shaping */ - psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; - /* Short-term prediction */ - silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); - - tmpb = _mm_setzero_si128(); - - /* step 1 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */ - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); /* 0, -1, -2, -3 */ - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 ); /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */ - - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */ - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - /* step 2 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) ); - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 ); - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - if ( opus_likely( predictLPCOrder == 16 ) ) - { - /* step 3 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) ); - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB ); - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - /* setp 4 */ - psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) ); - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); - tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ); - tmpa = _mm_srli_epi64( tmpa, 16 ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - - psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ - psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); - psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); - tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); - - /* add at last */ - /* equal shift right 8 bytes*/ - tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - tmpb = _mm_add_epi32( tmpb, tmpa ); - LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); - } - else - { - /* add at last */ - tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/ - tmpb = _mm_add_epi32( tmpb, tmpa ); - LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); - - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); - LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); - } - - LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ - - /* Noise shape feedback */ - silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ - /* Output of lowpass section */ - tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ 0 ] = tmp2; - n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); - /* Loop over allpass sections */ - for( j = 2; j < shapingLPCOrder; j += 2 ) { - /* Output of allpass section */ - tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); - psDD->sAR2_Q14[ j - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); - /* Output of allpass section */ - tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); - psDD->sAR2_Q14[ j + 0 ] = tmp2; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); - } - psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); - - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ - n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ - n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ - - n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ - n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ - - /* Input minus prediction plus noise feedback */ - /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ - tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ - tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ - tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ - - r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Flip sign depending on dither */ - if ( psDD->Seed < 0 ) { - r_Q10 = -r_Q10; - } - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - if( q1_Q0 > 0 ) { - q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == 0 ) { - q1_Q10 = offset_Q10; - q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else if( q1_Q0 == -1 ) { - q2_Q10 = offset_Q10; - q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); - } else { /* q1_Q0 < -1 */ - q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); - q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); - q2_Q10 = silk_ADD32( q1_Q10, 1024 ); - rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); - rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); - } - rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); - rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); - rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); - rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); - - if( rd1_Q10 < rd2_Q10 ) { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 0 ].Q_Q10 = q1_Q10; - psSS[ 1 ].Q_Q10 = q2_Q10; - } else { - psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); - psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); - psSS[ 0 ].Q_Q10 = q2_Q10; - psSS[ 1 ].Q_Q10 = q1_Q10; - } - - /* Update states for best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 0 ].xq_Q14 = xq_Q14; - - /* Update states for second best quantization */ - - /* Quantized excitation */ - exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); - if ( psDD->Seed < 0 ) { - exc_Q14 = -exc_Q14; - } - - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); - xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); - - /* Update states */ - sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); - psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); - psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; - psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; - psSS[ 1 ].xq_Q14 = xq_Q14; - } - } - *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ - last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ - - /* Find winner */ - RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - Winner_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - Winner_ind = k; - } - } - - /* Increase RD values of expired states */ - Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; - for( k = 0; k < nStatesDelayedDecision; k++ ) { - if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { - psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); - psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); - silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); - } - } - - /* Find worst in first set and best in second set */ - RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; - RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; - RDmax_ind = 0; - RDmin_ind = 0; - for( k = 1; k < nStatesDelayedDecision; k++ ) { - /* find worst in first set */ - if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { - RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; - RDmax_ind = k; - } - /* find best in second set */ - if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { - RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; - RDmin_ind = k; - } - } - - /* Replace a state if best from second set outperforms worst in first set */ - if( RDmin_Q10 < RDmax_Q10 ) { - silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, - ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); - silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); - } - - /* Write samples from winner to output and long-term filter states */ - psDD = &psDelDec[ Winner_ind ]; - if( subfr > 0 || i >= decisionDelay ) { - pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); - xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( - silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; - sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; - } - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Update states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - psSS = &psSampleState[ k ][ 0 ]; - psDD->LF_AR_Q14 = psSS->LF_AR_Q14; - psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; - psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; - psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; - psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); - psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; - psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); - psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; - psDD->RD_Q10 = psSS->RD_Q10; - } - delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; - } - /* Update LPC states */ - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); - } - RESTORE_STACK; -} - -static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ - const opus_int32 x_Q3[], /* I Input in Q3 */ - opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ - const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I Subframe number */ - opus_int nStatesDelayedDecision, /* I Number of del dec states */ - const opus_int LTP_scale_Q14, /* I LTP state scaling */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type, /* I Signal type */ - const opus_int decisionDelay /* I Decision delay */ -) -{ - opus_int i, k, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - NSQ_del_dec_struct *psDD; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - - /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); - - for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); - /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); - - xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); - - xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); - } - - for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - { - __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; - - /* prepare gain_adj_Q16 in packed 4 32-bits */ - xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 ); - - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) - { - xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); - /* equal shift right 4 bytes*/ - xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); - } - - for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - for( k = 0; k < nStatesDelayedDecision; k++ ) { - psDD = &psDelDec[ k ]; - - /* Scale scalar states */ - psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); - } - for( i = 0; i < DECISION_DELAY; i++ ) { - psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); - psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); - } - } - } - } -} diff --git a/thirdparty/opus/silk/x86/NSQ_sse.c b/thirdparty/opus/silk/x86/NSQ_sse.c deleted file mode 100644 index bb3c5f1955..0000000000 --- a/thirdparty/opus/silk/x86/NSQ_sse.c +++ /dev/null @@ -1,720 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "main.h" -#include "celt/x86/x86cpu.h" -#include "stack_alloc.h" - -static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -); - -static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int32 table[][4] /* I */ -); - -void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) -{ - opus_int k, lag, start_idx, LSF_interpolation_flag; - const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; - opus_int16 *pxq; - VARDECL( opus_int32, sLTP_Q15 ); - VARDECL( opus_int16, sLTP ); - opus_int32 HarmShapeFIRPacked_Q14; - opus_int offset_Q10; - VARDECL( opus_int32, x_sc_Q10 ); - - opus_int32 table[ 64 ][ 4 ]; - opus_int32 tmp1; - opus_int32 q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; - - SAVE_STACK; - - NSQ->rand_seed = psIndices->Seed; - - /* Set unvoiced lag to the previous one, overwrite later for voiced */ - lag = NSQ->lagPrev; - - silk_assert( NSQ->prev_gain_Q16 != 0 ); - - offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; - - /* 0 */ - q1_Q10 = offset_Q10; - q2_Q10 = offset_Q10 + ( 1024 - QUANT_LEVEL_ADJUST_Q10 ); - rd1_Q20 = q1_Q10 * Lambda_Q10; - rd2_Q20 = q2_Q10 * Lambda_Q10; - - table[ 32 ][ 0 ] = q1_Q10; - table[ 32 ][ 1 ] = q2_Q10; - table[ 32 ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 32 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - - /* -1 */ - q1_Q10 = offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 ); - q2_Q10 = offset_Q10; - rd1_Q20 = - q1_Q10 * Lambda_Q10; - rd2_Q20 = q2_Q10 * Lambda_Q10; - - table[ 31 ][ 0 ] = q1_Q10; - table[ 31 ][ 1 ] = q2_Q10; - table[ 31 ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 31 ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - - /* > 0 */ - for (k = 1; k <= 31; k++) - { - tmp1 = offset_Q10 + silk_LSHIFT( k, 10 ); - - q1_Q10 = tmp1 - QUANT_LEVEL_ADJUST_Q10; - q2_Q10 = tmp1 - QUANT_LEVEL_ADJUST_Q10 + 1024; - rd1_Q20 = q1_Q10 * Lambda_Q10; - rd2_Q20 = q2_Q10 * Lambda_Q10; - - table[ 32 + k ][ 0 ] = q1_Q10; - table[ 32 + k ][ 1 ] = q2_Q10; - table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - } - - /* < -1 */ - for (k = -32; k <= -2; k++) - { - tmp1 = offset_Q10 + silk_LSHIFT( k, 10 ); - - q1_Q10 = tmp1 + QUANT_LEVEL_ADJUST_Q10; - q2_Q10 = tmp1 + QUANT_LEVEL_ADJUST_Q10 + 1024; - rd1_Q20 = - q1_Q10 * Lambda_Q10; - rd2_Q20 = - q2_Q10 * Lambda_Q10; - - table[ 32 + k ][ 0 ] = q1_Q10; - table[ 32 + k ][ 1 ] = q2_Q10; - table[ 32 + k ][ 2 ] = 2 * (q1_Q10 - q2_Q10); - table[ 32 + k ][ 3 ] = (rd1_Q20 - rd2_Q20) + (q1_Q10 * q1_Q10 - q2_Q10 * q2_Q10); - } - - if( psIndices->NLSFInterpCoef_Q2 == 4 ) { - LSF_interpolation_flag = 0; - } else { - LSF_interpolation_flag = 1; - } - - ALLOC( sLTP_Q15, - psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); - ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); - ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); - /* Set up pointers to start of sub frame */ - NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; - for( k = 0; k < psEncC->nb_subfr; k++ ) { - A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; - B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; - AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; - - /* Noise shape parameters */ - silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); - HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); - HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); - - NSQ->rewhite_flag = 0; - if( psIndices->signalType == TYPE_VOICED ) { - /* Voiced */ - lag = pitchL[ k ]; - - /* Re-whitening */ - if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { - /* Rewhiten with new A coefs */ - start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; - silk_assert( start_idx > 0 ); - - silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], - A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); - - NSQ->rewhite_flag = 1; - NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; - } - } - - silk_nsq_scale_states_sse4_1( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); - - if ( opus_likely( ( 10 == psEncC->shapingLPCOrder ) && ( 16 == psEncC->predictLPCOrder) ) ) - { - silk_noise_shape_quantizer_10_16_sse4_1( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], - offset_Q10, psEncC->subfr_length, &(table[32]) ); - } - else - { - silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, - AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, - offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch ); - } - - x_Q3 += psEncC->subfr_length; - pulses += psEncC->subfr_length; - pxq += psEncC->subfr_length; - } - - /* Update lagPrev for next frame */ - NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; - - /* Save quantized speech and noise shaping signals */ - /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ - silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); - silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); - RESTORE_STACK; -} - -/***********************************/ -/* silk_noise_shape_quantizer_10_16 */ -/***********************************/ -static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int32 table[][4] /* I */ -) -{ - opus_int i; - opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; - opus_int32 n_LF_Q12, r_Q10, q1_Q0, q1_Q10, q2_Q10; - opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; - opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; - opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; - - __m128i xmm_tempa, xmm_tempb; - - __m128i xmm_one; - - __m128i psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF; - __m128i psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF; - __m128i a_Q12_01234567, a_Q12_89ABCDEF; - - __m128i sAR2_Q14_hi_76543210, sAR2_Q14_lo_76543210; - __m128i AR_shp_Q13_76543210; - - shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; - pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; - Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); - - /* Set up short term AR state */ - psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; - - sLF_AR_shp_Q14 = NSQ->sLF_AR_shp_Q14; - xq_Q14 = psLPC_Q14[ 0 ]; - LTP_pred_Q13 = 0; - - /* load a_Q12 */ - xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 ); - - /* load a_Q12[0] - a_Q12[7] */ - a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) ); - /* load a_Q12[ 8 ] - a_Q12[ 15 ] */ - a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) ); - - a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one ); - a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one ); - - /* load AR_shp_Q13 */ - AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) ); - - /* load psLPC_Q14 */ - xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 ); - - xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) ); - xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) ); - - xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); - xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); - - psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); - psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); - - xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) ); - xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) ); - - xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); - xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); - - psLPC_Q14_hi_01234567 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); - psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); - - /* load sAR2_Q14 */ - xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) ); - xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) ); - - xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one ); - xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one ); - - sAR2_Q14_hi_76543210 = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb ); - sAR2_Q14_lo_76543210 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb ); - - /* prepare 1 in 8 * 16bit */ - xmm_one = _mm_set1_epi16(1); - - for( i = 0; i < length; i++ ) - { - /* Short-term prediction */ - __m128i xmm_hi_07, xmm_hi_8F, xmm_lo_07, xmm_lo_8F; - - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LPC_pred_Q10 = 8; /* silk_RSHIFT( predictLPCOrder, 1 ); */ - - /* shift psLPC_Q14 */ - psLPC_Q14_hi_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_hi_01234567, psLPC_Q14_hi_89ABCDEF, 2 ); - psLPC_Q14_lo_89ABCDEF = _mm_alignr_epi8( psLPC_Q14_lo_01234567, psLPC_Q14_lo_89ABCDEF, 2 ); - - psLPC_Q14_hi_01234567 = _mm_srli_si128( psLPC_Q14_hi_01234567, 2 ); - psLPC_Q14_lo_01234567 = _mm_srli_si128( psLPC_Q14_lo_01234567, 2 ); - - psLPC_Q14_hi_01234567 = _mm_insert_epi16( psLPC_Q14_hi_01234567, (xq_Q14 >> 16), 7 ); - psLPC_Q14_lo_01234567 = _mm_insert_epi16( psLPC_Q14_lo_01234567, (xq_Q14), 7 ); - - /* high part, use pmaddwd, results in 4 32-bit */ - xmm_hi_07 = _mm_madd_epi16( psLPC_Q14_hi_01234567, a_Q12_01234567 ); - xmm_hi_8F = _mm_madd_epi16( psLPC_Q14_hi_89ABCDEF, a_Q12_89ABCDEF ); - - /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed, _mm_srai_epi16(psLPC_Q14_lo_01234567, 15) */ - xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_01234567 ); - xmm_tempb = _mm_cmpgt_epi16( _mm_setzero_si128(), psLPC_Q14_lo_89ABCDEF ); - - xmm_tempa = _mm_and_si128( xmm_tempa, a_Q12_01234567 ); - xmm_tempb = _mm_and_si128( xmm_tempb, a_Q12_89ABCDEF ); - - xmm_lo_07 = _mm_mulhi_epi16( psLPC_Q14_lo_01234567, a_Q12_01234567 ); - xmm_lo_8F = _mm_mulhi_epi16( psLPC_Q14_lo_89ABCDEF, a_Q12_89ABCDEF ); - - xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa ); - xmm_lo_8F = _mm_add_epi16( xmm_lo_8F, xmm_tempb ); - - xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one ); - xmm_lo_8F = _mm_madd_epi16( xmm_lo_8F, xmm_one ); - - /* accumulate */ - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_hi_8F ); - xmm_lo_07 = _mm_add_epi32( xmm_lo_07, xmm_lo_8F ); - - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 ); - - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) ); - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) ); - - LPC_pred_Q10 += _mm_cvtsi128_si32( xmm_hi_07 ); - - /* Long-term prediction */ - if ( opus_likely( signalType == TYPE_VOICED ) ) { - /* Unrolled loop */ - /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ - LTP_pred_Q13 = 2; - { - __m128i b_Q14_3210, b_Q14_0123, pred_lag_ptr_0123; - - b_Q14_3210 = OP_CVTEPI16_EPI32_M64( b_Q14 ); - b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B ); - - /* loaded: [0] [-1] [-2] [-3] */ - pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); - /* shuffle to [-3] [-2] [-1] [0] and to new xmm */ - xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B ); - /*64-bit multiply, a[2] * b[-2], a[0] * b[0] */ - xmm_tempa = _mm_mul_epi32( xmm_tempa, b_Q14_3210 ); - /* right shift 2 bytes (16 bits), zero extended */ - xmm_tempa = _mm_srli_si128( xmm_tempa, 2 ); - - /* a[1] * b[-1], a[3] * b[-3] */ - pred_lag_ptr_0123 = _mm_mul_epi32( pred_lag_ptr_0123, b_Q14_0123 ); - pred_lag_ptr_0123 = _mm_srli_si128( pred_lag_ptr_0123, 2 ); - - pred_lag_ptr_0123 = _mm_add_epi32( pred_lag_ptr_0123, xmm_tempa ); - /* equal shift right 8 bytes*/ - xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, _MM_SHUFFLE( 0, 0, 3, 2 ) ); - xmm_tempa = _mm_add_epi32( xmm_tempa, pred_lag_ptr_0123 ); - - LTP_pred_Q13 += _mm_cvtsi128_si32( xmm_tempa ); - - LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); - pred_lag_ptr++; - } - } - - /* Noise shape feedback */ - NSQ->sAR2_Q14[ 9 ] = NSQ->sAR2_Q14[ 8 ]; - NSQ->sAR2_Q14[ 8 ] = _mm_cvtsi128_si32( _mm_srli_si128(_mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ), 12 ) ); - - sAR2_Q14_hi_76543210 = _mm_slli_si128( sAR2_Q14_hi_76543210, 2 ); - sAR2_Q14_lo_76543210 = _mm_slli_si128( sAR2_Q14_lo_76543210, 2 ); - - sAR2_Q14_hi_76543210 = _mm_insert_epi16( sAR2_Q14_hi_76543210, (xq_Q14 >> 16), 0 ); - sAR2_Q14_lo_76543210 = _mm_insert_epi16( sAR2_Q14_lo_76543210, (xq_Q14), 0 ); - - /* high part, use pmaddwd, results in 4 32-bit */ - xmm_hi_07 = _mm_madd_epi16( sAR2_Q14_hi_76543210, AR_shp_Q13_76543210 ); - - /* low part, use pmulhw, results in 8 16-bit, note we need simulate unsigned * signed,_mm_srai_epi16(sAR2_Q14_lo_76543210, 15) */ - xmm_tempa = _mm_cmpgt_epi16( _mm_setzero_si128(), sAR2_Q14_lo_76543210 ); - xmm_tempa = _mm_and_si128( xmm_tempa, AR_shp_Q13_76543210 ); - - xmm_lo_07 = _mm_mulhi_epi16( sAR2_Q14_lo_76543210, AR_shp_Q13_76543210 ); - xmm_lo_07 = _mm_add_epi16( xmm_lo_07, xmm_tempa ); - - xmm_lo_07 = _mm_madd_epi16( xmm_lo_07, xmm_one ); - - /* accumulate */ - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, xmm_lo_07 ); - - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_unpackhi_epi64(xmm_hi_07, xmm_hi_07 ) ); - xmm_hi_07 = _mm_add_epi32( xmm_hi_07, _mm_shufflelo_epi16(xmm_hi_07, 0x0E ) ); - - n_AR_Q12 = 5 + _mm_cvtsi128_si32( xmm_hi_07 ); - - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 8 ], AR_shp_Q13[ 8 ] ); - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sAR2_Q14[ 9 ], AR_shp_Q13[ 9 ] ); - - n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */ - n_AR_Q12 = silk_SMLAWB( n_AR_Q12, sLF_AR_shp_Q14, Tilt_Q14 ); - - n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); - n_LF_Q12 = silk_SMLAWT( n_LF_Q12, sLF_AR_shp_Q14, LF_shp_Q14 ); - - silk_assert( lag > 0 || signalType != TYPE_VOICED ); - - /* Combine prediction and noise shaping signals */ - tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ - tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ - if( lag > 0 ) { - /* Symmetric, packed FIR coefficients */ - n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); - n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); - shp_lag_ptr++; - - tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */ - tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */ - tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */ - } else { - tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */ - } - - r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ - - /* Generate dither */ - NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); - - /* Flip sign depending on dither */ - tmp2 = -r_Q10; - if ( NSQ->rand_seed < 0 ) r_Q10 = tmp2; - - r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); - - /* Find two quantization level candidates and measure their rate-distortion */ - q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); - q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); - - q1_Q10 = table[q1_Q0][0]; - q2_Q10 = table[q1_Q0][1]; - - if (r_Q10 * table[q1_Q0][2] - table[q1_Q0][3] < 0) - { - q1_Q10 = q2_Q10; - } - - pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 ); - - /* Excitation */ - exc_Q14 = silk_LSHIFT( q1_Q10, 4 ); - - tmp2 = -exc_Q14; - if ( NSQ->rand_seed < 0 ) exc_Q14 = tmp2; - - /* Add predictions */ - LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 ); - xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 ); - - /* Update states */ - psLPC_Q14++; - *psLPC_Q14 = xq_Q14; - sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); - - NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); - sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); - NSQ->sLTP_shp_buf_idx++; - NSQ->sLTP_buf_idx++; - - /* Make dither dependent on quantized signal */ - NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); - } - - NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; - - /* Scale XQ back to normal level before saving */ - psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH ]; - - /* write back sAR2_Q14 */ - xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ); - xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 ); - _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa ); - _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb ); - - /* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */ - { - __m128i xmm_Gain_Q10; - __m128i xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, xmm_xq_Q14_7654, xmm_xq_Q14_x7x5; - - /* prepare (1 << 7) in packed 4 32-bits */ - xmm_tempa = _mm_set1_epi32( (1 << 7) ); - - /* prepare Gain_Q10 in packed 4 32-bits */ - xmm_Gain_Q10 = _mm_set1_epi32( Gain_Q10 ); - - /* process xq */ - for (i = 0; i < length - 7; i += 8) - { - xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) ); - xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) ); - - /* equal shift right 4 bytes*/ - xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - /* equal shift right 4 bytes*/ - xmm_xq_Q14_x7x5 = _mm_shuffle_epi32( xmm_xq_Q14_7654, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_xq_Q14_3210 = _mm_mul_epi32( xmm_xq_Q14_3210, xmm_Gain_Q10 ); - xmm_xq_Q14_x3x1 = _mm_mul_epi32( xmm_xq_Q14_x3x1, xmm_Gain_Q10 ); - xmm_xq_Q14_7654 = _mm_mul_epi32( xmm_xq_Q14_7654, xmm_Gain_Q10 ); - xmm_xq_Q14_x7x5 = _mm_mul_epi32( xmm_xq_Q14_x7x5, xmm_Gain_Q10 ); - - xmm_xq_Q14_3210 = _mm_srli_epi64( xmm_xq_Q14_3210, 16 ); - xmm_xq_Q14_x3x1 = _mm_slli_epi64( xmm_xq_Q14_x3x1, 16 ); - xmm_xq_Q14_7654 = _mm_srli_epi64( xmm_xq_Q14_7654, 16 ); - xmm_xq_Q14_x7x5 = _mm_slli_epi64( xmm_xq_Q14_x7x5, 16 ); - - xmm_xq_Q14_3210 = _mm_blend_epi16( xmm_xq_Q14_3210, xmm_xq_Q14_x3x1, 0xCC ); - xmm_xq_Q14_7654 = _mm_blend_epi16( xmm_xq_Q14_7654, xmm_xq_Q14_x7x5, 0xCC ); - - /* silk_RSHIFT_ROUND(xq, 8) */ - xmm_xq_Q14_3210 = _mm_add_epi32( xmm_xq_Q14_3210, xmm_tempa ); - xmm_xq_Q14_7654 = _mm_add_epi32( xmm_xq_Q14_7654, xmm_tempa ); - - xmm_xq_Q14_3210 = _mm_srai_epi32( xmm_xq_Q14_3210, 8 ); - xmm_xq_Q14_7654 = _mm_srai_epi32( xmm_xq_Q14_7654, 8 ); - - /* silk_SAT16 */ - xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 ); - - /* save to xq */ - _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 ); - } - } - for ( ; i < length; i++) - { - xq[i] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); - } - - /* Update LPC synth buffer */ - silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); -} - -static OPUS_INLINE void silk_nsq_scale_states_sse4_1( - const silk_encoder_state *psEncC, /* I Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - const opus_int32 x_Q3[], /* I input in Q3 */ - opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ - const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ - opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ - opus_int subfr, /* I subframe number */ - const opus_int LTP_scale_Q14, /* I */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ - const opus_int signal_type /* I Signal type */ -) -{ - opus_int i, lag; - opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; - __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; - - lag = pitchL[ subfr ]; - inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); - silk_assert( inv_gain_Q31 != 0 ); - - /* Calculate gain adjustment factor */ - if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { - gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); - } else { - gain_adj_Q16 = (opus_int32)1 << 16; - } - - /* Scale input */ - inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); - - /* prepare inv_gain_Q23 in packed 4 32-bits */ - xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); - - for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { - xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); - - /* equal shift right 4 bytes*/ - xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); - xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); - - xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); - xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); - - xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x_Q3_x2x0 ); - } - - for( ; i < psEncC->subfr_length; i++ ) { - x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); - } - - /* Save inverse gain */ - NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; - - /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ - if( NSQ->rewhite_flag ) { - if( subfr == 0 ) { - /* Do LTP downscaling */ - inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); - } - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - silk_assert( i < MAX_FRAME_LENGTH ); - sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); - } - } - - /* Adjust for changing gain */ - if( gain_adj_Q16 != (opus_int32)1 << 16 ) { - /* Scale long-term shaping state */ - __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; - - /* prepare gain_adj_Q16 in packed 4 32-bits */ - xmm_gain_adj_Q16 = _mm_set1_epi32(gain_adj_Q16); - - for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) - { - xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); - /* equal shift right 4 bytes*/ - xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); - xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); - - xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); - - _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); - } - - for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { - NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); - } - - /* Scale long-term prediction state */ - if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { - for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { - sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); - } - } - - NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); - - /* Scale short-term prediction and shaping states */ - for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { - NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] ); - } - for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { - NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); - } - } -} diff --git a/thirdparty/opus/silk/x86/SigProc_FIX_sse.h b/thirdparty/opus/silk/x86/SigProc_FIX_sse.h deleted file mode 100644 index 61efa8da41..0000000000 --- a/thirdparty/opus/silk/x86/SigProc_FIX_sse.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SIGPROC_FIX_SSE_H -#define SIGPROC_FIX_SSE_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#if defined(OPUS_X86_MAY_HAVE_SSE4_1) -void silk_burg_modified_sse4_1( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) - -#else - -extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */); - -# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \ - ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch)) - -#endif - -opus_int64 silk_inner_prod16_aligned_64_sse4_1( - const opus_int16 *inVec1, - const opus_int16 *inVec2, - const opus_int len -); - - -#if defined(OPUS_X86_PRESUME_SSE4_1) - -#define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((void)(arch),silk_inner_prod16_aligned_64_sse4_1(inVec1, inVec2, len)) - -#else - -extern opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[OPUS_ARCHMASK + 1])( - const opus_int16 *inVec1, - const opus_int16 *inVec2, - const opus_int len); - -# define silk_inner_prod16_aligned_64(inVec1, inVec2, len, arch) \ - ((*SILK_INNER_PROD16_ALIGNED_64_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len)) - -#endif -#endif -#endif diff --git a/thirdparty/opus/silk/x86/VAD_sse.c b/thirdparty/opus/silk/x86/VAD_sse.c deleted file mode 100644 index 4e90f4410d..0000000000 --- a/thirdparty/opus/silk/x86/VAD_sse.c +++ /dev/null @@ -1,277 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> - -#include "main.h" -#include "stack_alloc.h" - -/* Weighting factors for tilt measure */ -static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 }; - -/***************************************/ -/* Get the speech activity level in Q8 */ -/***************************************/ -opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if success */ - silk_encoder_state *psEncC, /* I/O Encoder state */ - const opus_int16 pIn[] /* I PCM input */ -) -{ - opus_int SA_Q15, pSNR_dB_Q7, input_tilt; - opus_int decimated_framelength1, decimated_framelength2; - opus_int decimated_framelength; - opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; - opus_int32 sumSquared, smooth_coef_Q16; - opus_int16 HPstateTmp; - VARDECL( opus_int16, X ); - opus_int32 Xnrg[ VAD_N_BANDS ]; - opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; - opus_int32 speech_nrg, x_tmp; - opus_int X_offset[ VAD_N_BANDS ]; - opus_int ret = 0; - silk_VAD_state *psSilk_VAD = &psEncC->sVAD; - - SAVE_STACK; - - /* Safety checks */ - silk_assert( VAD_N_BANDS == 4 ); - silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); - silk_assert( psEncC->frame_length <= 512 ); - silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); - - /***********************/ - /* Filter and Decimate */ - /***********************/ - decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); - decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); - decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); - /* Decimate into 4 bands: - 0 L 3L L 3L 5L - - -- - -- -- - 8 8 2 4 4 - - [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | - - They're arranged to allow the minimal ( frame_length / 4 ) extra - scratch space during the downsampling process */ - X_offset[ 0 ] = 0; - X_offset[ 1 ] = decimated_framelength + decimated_framelength2; - X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; - X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; - ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); - - /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ - silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], - X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); - - /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], - X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); - - /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ - silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], - X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); - - /*********************************************/ - /* HP filter on lowest band (differentiator) */ - /*********************************************/ - X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); - HPstateTmp = X[ decimated_framelength - 1 ]; - for( i = decimated_framelength - 1; i > 0; i-- ) { - X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); - X[ i ] -= X[ i - 1 ]; - } - X[ 0 ] -= psSilk_VAD->HPstate; - psSilk_VAD->HPstate = HPstateTmp; - - /*************************************/ - /* Calculate the energy in each band */ - /*************************************/ - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Find the decimated framelength in the non-uniformly divided bands */ - decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); - - /* Split length into subframe lengths */ - dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); - dec_subframe_offset = 0; - - /* Compute energy per sub-frame */ - /* initialize with summed energy of last subframe */ - Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; - for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { - __m128i xmm_X, xmm_acc; - sumSquared = 0; - - xmm_acc = _mm_setzero_si128(); - - for( i = 0; i < dec_subframe_length - 7; i += 8 ) - { - xmm_X = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) ); - xmm_X = _mm_srai_epi16( xmm_X, 3 ); - xmm_X = _mm_madd_epi16( xmm_X, xmm_X ); - xmm_acc = _mm_add_epi32( xmm_acc, xmm_X ); - } - - xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) ); - xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) ); - - sumSquared += _mm_cvtsi128_si32( xmm_acc ); - - for( ; i < dec_subframe_length; i++ ) { - /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ - /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ - x_tmp = silk_RSHIFT( - X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); - sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); - - /* Safety check */ - silk_assert( sumSquared >= 0 ); - } - - /* Add/saturate summed energy of current subframe */ - if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); - } else { - /* Look-ahead subframe */ - Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); - } - - dec_subframe_offset += dec_subframe_length; - } - psSilk_VAD->XnrgSubfr[ b ] = sumSquared; - } - - /********************/ - /* Noise estimation */ - /********************/ - silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); - - /***********************************************/ - /* Signal-plus-noise to noise ratio estimation */ - /***********************************************/ - sumSquared = 0; - input_tilt = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; - if( speech_nrg > 0 ) { - /* Divide, with sufficient resolution */ - if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); - } else { - NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); - } - - /* Convert to log domain */ - SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; - - /* Sum-of-squares */ - sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ - - /* Tilt measure */ - if( speech_nrg < ( (opus_int32)1 << 20 ) ) { - /* Scale down SNR value for small subband speech energies */ - SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); - } - input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); - } else { - NrgToNoiseRatio_Q8[ b ] = 256; - } - } - - /* Mean-of-squares */ - sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ - - /* Root-mean-square approximation, scale to dBs, and write to output pointer */ - pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ - - /*********************************/ - /* Speech Probability Estimation */ - /*********************************/ - SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); - - /**************************/ - /* Frequency Tilt Measure */ - /**************************/ - psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); - - /**************************************************/ - /* Scale the sigmoid output based on power levels */ - /**************************************************/ - speech_nrg = 0; - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ - speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); - } - - /* Power scaling */ - if( speech_nrg <= 0 ) { - SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); - } else if( speech_nrg < 32768 ) { - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); - } else { - speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); - } - - /* square-root */ - speech_nrg = silk_SQRT_APPROX( speech_nrg ); - SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); - } - - /* Copy the resulting speech activity in Q8 */ - psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); - - /***********************************/ - /* Energy Level and SNR estimation */ - /***********************************/ - /* Smoothing coefficient */ - smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); - - if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { - smooth_coef_Q16 >>= 1; - } - - for( b = 0; b < VAD_N_BANDS; b++ ) { - /* compute smoothed energy-to-noise ratio per band */ - psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], - NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 ); - - /* signal to noise ratio in dB per band */ - SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 ); - /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ - psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); - } - - RESTORE_STACK; - return( ret ); -} diff --git a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c b/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c deleted file mode 100644 index 74d6c6d0ec..0000000000 --- a/thirdparty/opus/silk/x86/VQ_WMat_EC_sse.c +++ /dev/null @@ -1,142 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <xmmintrin.h> -#include <emmintrin.h> -#include <smmintrin.h> -#include "main.h" -#include "celt/x86/x86cpu.h" - -/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ -void silk_VQ_WMat_EC_sse4_1( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -) -{ - opus_int k, gain_tmp_Q7; - const opus_int8 *cb_row_Q7; - opus_int16 diff_Q14[ 5 ]; - opus_int32 sum1_Q14, sum2_Q16; - - __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5; - /* Loop over codebook */ - *rate_dist_Q14 = silk_int32_MAX; - cb_row_Q7 = cb_Q7; - for( k = 0; k < L; k++ ) { - gain_tmp_Q7 = cb_gain_Q7[k]; - - diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); - - C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] ); - C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); - C_tmp2 = _mm_slli_epi32( C_tmp2, 7 ); - C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 ); - - diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 ); - diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 ); - diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 ); - diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 ); - - /* Weighted rate */ - sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); - - /* Penalty for too large gain */ - sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); - - silk_assert( sum1_Q14 >= 0 ); - - /* first row of W_Q18 */ - C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) ); - C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp4 = _mm_srli_si128( C_tmp4, 2 ); - - C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ - - C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 ); - C_tmp5 = _mm_srli_si128( C_tmp5, 2 ); - - C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 ); - C_tmp5 = _mm_slli_epi32( C_tmp5, 1 ); - - C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); - sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 ); - - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); - - /* second row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); - - /* third row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); - - /* fourth row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); - sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); - sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); - - /* last row of W_Q18 */ - sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); - sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); - - silk_assert( sum1_Q14 >= 0 ); - - /* find best */ - if( sum1_Q14 < *rate_dist_Q14 ) { - *rate_dist_Q14 = sum1_Q14; - *ind = (opus_int8)k; - *gain_Q7 = gain_tmp_Q7; - } - - /* Go to next cbk vector */ - cb_row_Q7 += LTP_ORDER; - } -} diff --git a/thirdparty/opus/silk/x86/main_sse.h b/thirdparty/opus/silk/x86/main_sse.h deleted file mode 100644 index d8d61310ed..0000000000 --- a/thirdparty/opus/silk/x86/main_sse.h +++ /dev/null @@ -1,277 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef MAIN_SSE_H -#define MAIN_SSE_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -# if defined(OPUS_X86_MAY_HAVE_SSE4_1) - -# define OVERRIDE_silk_VQ_WMat_EC - -void silk_VQ_WMat_EC_sse4_1( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -); - -#if defined OPUS_X86_PRESUME_SSE4_1 - -#define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) - -#else - -extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -); - -# define silk_VQ_WMat_EC(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L, arch) \ - ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, rate_dist_Q14, gain_Q7, in_Q14, W_Q18, cb_Q7, cb_gain_Q7, cl_Q5, \ - mu_Q9, max_gain_Q7, L)) - -#endif - -# define OVERRIDE_silk_NSQ - -void silk_NSQ_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if defined OPUS_X86_PRESUME_SSE4_1 - -#define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#else - -extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#endif - -# define OVERRIDE_silk_NSQ_del_dec - -void silk_NSQ_del_dec_sse4_1( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -#if defined OPUS_X86_PRESUME_SSE4_1 - -#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#else - -extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -); - -# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \ - ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \ - HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14)) - -#endif - -void silk_noise_shape_quantizer( - silk_nsq_state *NSQ, /* I/O NSQ state */ - opus_int signalType, /* I Signal type */ - const opus_int32 x_sc_Q10[], /* I */ - opus_int8 pulses[], /* O */ - opus_int16 xq[], /* O */ - opus_int32 sLTP_Q15[], /* I/O LTP state */ - const opus_int16 a_Q12[], /* I Short term prediction coefs */ - const opus_int16 b_Q14[], /* I Long term prediction coefs */ - const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ - opus_int lag, /* I Pitch lag */ - opus_int32 HarmShapeFIRPacked_Q14, /* I */ - opus_int Tilt_Q14, /* I Spectral tilt */ - opus_int32 LF_shp_Q14, /* I */ - opus_int32 Gain_Q16, /* I */ - opus_int Lambda_Q10, /* I */ - opus_int offset_Q10, /* I */ - opus_int length, /* I Input length */ - opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ - opus_int predictLPCOrder, /* I Prediction filter order */ - int arch /* I Architecture */ -); - -/**************************/ -/* Noise level estimation */ -/**************************/ -void silk_VAD_GetNoiseLevels( - const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ - silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ -); - -# define OVERRIDE_silk_VAD_GetSA_Q8 - -opus_int silk_VAD_GetSA_Q8_sse4_1( - silk_encoder_state *psEnC, - const opus_int16 pIn[] -); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn)) - -#else - -# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \ - ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn)) - -extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])( - silk_encoder_state *psEnC, - const opus_int16 pIn[]); - -# define OVERRIDE_silk_warped_LPC_analysis_filter_FIX - -#endif - -void silk_warped_LPC_analysis_filter_FIX_sse4_1( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -); - -#if defined(OPUS_X86_PRESUME_SSE4_1) -#define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ - ((void)(arch),silk_warped_LPC_analysis_filter_FIX_c(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) - -#else - -extern void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[OPUS_ARCHMASK + 1])( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -); - -# define silk_warped_LPC_analysis_filter_FIX(state, res_Q2, coef_Q13, input, lambda_Q16, length, order, arch) \ - ((*SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[(arch) & OPUS_ARCHMASK])(state, res_Q2, coef_Q13, input, lambda_Q16, length, order)) - -#endif - -# endif -#endif diff --git a/thirdparty/opus/silk/x86/x86_silk_map.c b/thirdparty/opus/silk/x86/x86_silk_map.c deleted file mode 100644 index 818841f2c1..0000000000 --- a/thirdparty/opus/silk/x86/x86_silk_map.c +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright (c) 2014, Cisco Systems, INC - Written by XiangMingZhu WeiZhou MinPeng YanWang - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#if defined(HAVE_CONFIG_H) -#include "config.h" -#endif - -#include "celt/x86/x86cpu.h" -#include "structs.h" -#include "SigProc_FIX.h" -#include "pitch.h" -#include "main.h" - -#if !defined(OPUS_X86_PRESUME_SSE4_1) - -#if defined(FIXED_POINT) - -#include "fixed/main_FIX.h" - -opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )( - const opus_int16 *inVec1, - const opus_int16 *inVec2, - const opus_int len -) = { - silk_inner_prod16_aligned_64_c, /* non-sse */ - silk_inner_prod16_aligned_64_c, - silk_inner_prod16_aligned_64_c, - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */ -}; - -#endif - -opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )( - silk_encoder_state *psEncC, - const opus_int16 pIn[] -) = { - silk_VAD_GetSA_Q8_c, /* non-sse */ - silk_VAD_GetSA_Q8_c, - silk_VAD_GetSA_Q8_c, - MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */ -}; - -void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) = { - silk_NSQ_c, /* non-sse */ - silk_NSQ_c, - silk_NSQ_c, - MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */ -}; - -void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int8 *ind, /* O index of best codebook vector */ - opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ - opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ - const opus_int16 *in_Q14, /* I input vector to be quantized */ - const opus_int32 *W_Q18, /* I weighting matrix */ - const opus_int8 *cb_Q7, /* I codebook */ - const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ - const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ - const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ - const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ - opus_int L /* I number of vectors in codebook */ -) = { - silk_VQ_WMat_EC_c, /* non-sse */ - silk_VQ_WMat_EC_c, - silk_VQ_WMat_EC_c, - MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */ -}; - -void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )( - const silk_encoder_state *psEncC, /* I/O Encoder State */ - silk_nsq_state *NSQ, /* I/O NSQ state */ - SideInfoIndices *psIndices, /* I/O Quantization Indices */ - const opus_int32 x_Q3[], /* I Prefiltered input signal */ - opus_int8 pulses[], /* O Quantized pulse signal */ - const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ - const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ - const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ - const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ - const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ - const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ - const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ - const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ - const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ - const opus_int LTP_scale_Q14 /* I LTP state scaling */ -) = { - silk_NSQ_del_dec_c, /* non-sse */ - silk_NSQ_del_dec_c, - silk_NSQ_del_dec_c, - MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */ -}; - -#if defined(FIXED_POINT) - -void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int32 state[], /* I/O State [order + 1] */ - opus_int32 res_Q2[], /* O Residual signal [length] */ - const opus_int16 coef_Q13[], /* I Coefficients [order] */ - const opus_int16 input[], /* I Input signal [length] */ - const opus_int16 lambda_Q16, /* I Warping factor */ - const opus_int length, /* I Length of input signal */ - const opus_int order /* I Filter order (even) */ -) = { - silk_warped_LPC_analysis_filter_FIX_c, /* non-sse */ - silk_warped_LPC_analysis_filter_FIX_c, - silk_warped_LPC_analysis_filter_FIX_c, - MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */ -}; - -void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )( - opus_int32 *res_nrg, /* O Residual energy */ - opus_int *res_nrg_Q, /* O Residual energy Q value */ - opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ - const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ - const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ - const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ - const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D, /* I Order */ - int arch /* I Run-time architecture */ -) = { - silk_burg_modified_c, /* non-sse */ - silk_burg_modified_c, - silk_burg_modified_c, - MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */ - MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */ -}; - -#endif -#endif diff --git a/thirdparty/opus/stream.c b/thirdparty/opus/stream.c deleted file mode 100644 index 0238a6b31b..0000000000 --- a/thirdparty/opus/stream.c +++ /dev/null @@ -1,366 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * - * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * - * * - ******************************************************************** - - function: stdio-based convenience library for opening/seeking/decoding - last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $ - - ********************************************************************/ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "internal.h" -#include <sys/types.h> -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#if defined(_WIN32) -# include <io.h> -#endif - -typedef struct OpusMemStream OpusMemStream; - -#define OP_MEM_SIZE_MAX (~(size_t)0>>1) -#define OP_MEM_DIFF_MAX ((ptrdiff_t)OP_MEM_SIZE_MAX) - -/*The context information needed to read from a block of memory as if it were a - file.*/ -struct OpusMemStream{ - /*The block of memory to read from.*/ - const unsigned char *data; - /*The total size of the block. - This must be at most OP_MEM_SIZE_MAX to prevent signed overflow while - seeking.*/ - ptrdiff_t size; - /*The current file position. - This is allowed to be set arbitrarily greater than size (i.e., past the end - of the block, though we will not read data past the end of the block), but - is not allowed to be negative (i.e., before the beginning of the block).*/ - ptrdiff_t pos; -}; - -static int op_fread(void *_stream,unsigned char *_ptr,int _buf_size){ - FILE *stream; - size_t ret; - /*Check for empty read.*/ - if(_buf_size<=0)return 0; - stream=(FILE *)_stream; - ret=fread(_ptr,1,_buf_size,stream); - OP_ASSERT(ret<=(size_t)_buf_size); - /*If ret==0 and !feof(stream), there was a read error.*/ - return ret>0||feof(stream)?(int)ret:OP_EREAD; -} - -static int op_fseek(void *_stream,opus_int64 _offset,int _whence){ -#if defined(_WIN32) - /*_fseeki64() is not exposed until MSCVCRT80. - This is the default starting with MSVC 2005 (_MSC_VER>=1400), but we want - to allow linking against older MSVCRT versions for compatibility back to - XP without installing extra runtime libraries. - i686-pc-mingw32 does not have fseeko() and requires - __MSVCRT_VERSION__>=0x800 for _fseeki64(), which screws up linking with - other libraries (that don't use MSVCRT80 from MSVC 2005 by default). - i686-w64-mingw32 does have fseeko() and respects _FILE_OFFSET_BITS, but I - don't know how to detect that at compile time. - We could just use fseeko64() (which is available in both), but its - implemented using fgetpos()/fsetpos() just like this code, except without - the overflow checking, so we prefer our version.*/ - opus_int64 pos; - /*We don't use fpos_t directly because it might be a struct if __STDC__ is - non-zero or _INTEGRAL_MAX_BITS < 64. - I'm not certain when the latter is true, but someone could in theory set - the former. - Either way, it should be binary compatible with a normal 64-bit int (this - assumption is not portable, but I believe it is true for MSVCRT).*/ - OP_ASSERT(sizeof(pos)==sizeof(fpos_t)); - /*Translate the seek to an absolute one.*/ - if(_whence==SEEK_CUR){ - int ret; - ret=fgetpos((FILE *)_stream,(fpos_t *)&pos); - if(ret)return ret; - } - else if(_whence==SEEK_END)pos=_filelengthi64(_fileno((FILE *)_stream)); - else if(_whence==SEEK_SET)pos=0; - else return -1; - /*Check for errors or overflow.*/ - if(pos<0||_offset<-pos||_offset>OP_INT64_MAX-pos)return -1; - pos+=_offset; - return fsetpos((FILE *)_stream,(fpos_t *)&pos); -#else - /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer - it except on Windows.*/ - return fseeko((FILE *)_stream,(off_t)_offset,_whence); -#endif -} - -static opus_int64 op_ftell(void *_stream){ -#if defined(_WIN32) - /*_ftelli64() is not exposed until MSCVCRT80, and ftello()/ftello64() have - the same problems as fseeko()/fseeko64() in MingW. - See above for a more detailed explanation.*/ - opus_int64 pos; - OP_ASSERT(sizeof(pos)==sizeof(fpos_t)); - return fgetpos((FILE *)_stream,(fpos_t *)&pos)?-1:pos; -#else - /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer - it except on Windows.*/ - return ftello((FILE *)_stream); -#endif -} - -static const OpusFileCallbacks OP_FILE_CALLBACKS={ - op_fread, - op_fseek, - op_ftell, - (op_close_func)fclose -}; - -#if defined(_WIN32) -# include <stddef.h> -# include <errno.h> - -/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API, - so if we just pass the path to fopen(), then there'd be no way for a user - of our API to open a Unicode filename. - Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API. - This makes this API more consistent with platforms where the character set - used by fopen is the same as used on disk, which is generally UTF-8, and - with our metadata API, which always uses UTF-8.*/ -static wchar_t *op_utf8_to_utf16(const char *_src){ - wchar_t *dst; - size_t len; - len=strlen(_src); - /*Worst-case output is 1 wide character per 1 input character.*/ - dst=(wchar_t *)_ogg_malloc(sizeof(*dst)*(len+1)); - if(dst!=NULL){ - size_t si; - size_t di; - for(di=si=0;si<len;si++){ - int c0; - c0=(unsigned char)_src[si]; - if(!(c0&0x80)){ - /*Start byte says this is a 1-byte sequence.*/ - dst[di++]=(wchar_t)c0; - continue; - } - else{ - int c1; - /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ - c1=(unsigned char)_src[si+1]; - if((c1&0xC0)==0x80){ - /*Found at least one continuation byte.*/ - if((c0&0xE0)==0xC0){ - wchar_t w; - /*Start byte says this is a 2-byte sequence.*/ - w=(c0&0x1F)<<6|c1&0x3F; - if(w>=0x80U){ - /*This is a 2-byte sequence that is not overlong.*/ - dst[di++]=w; - si++; - continue; - } - } - else{ - int c2; - /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/ - c2=(unsigned char)_src[si+2]; - if((c2&0xC0)==0x80){ - /*Found at least two continuation bytes.*/ - if((c0&0xF0)==0xE0){ - wchar_t w; - /*Start byte says this is a 3-byte sequence.*/ - w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F; - if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){ - /*This is a 3-byte sequence that is not overlong, not a - UTF-16 surrogate pair value, and not a 'not a character' - value.*/ - dst[di++]=w; - si+=2; - continue; - } - } - else{ - int c3; - /*This is safe, because c2 was not 0 and _src is - NUL-terminated.*/ - c3=(unsigned char)_src[si+3]; - if((c3&0xC0)==0x80){ - /*Found at least three continuation bytes.*/ - if((c0&0xF8)==0xF0){ - opus_uint32 w; - /*Start byte says this is a 4-byte sequence.*/ - w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F); - if(w>=0x10000U&&w<0x110000U){ - /*This is a 4-byte sequence that is not overlong and not - greater than the largest valid Unicode code point. - Convert it to a surrogate pair.*/ - w-=0x10000; - dst[di++]=(wchar_t)(0xD800+(w>>10)); - dst[di++]=(wchar_t)(0xDC00+(w&0x3FF)); - si+=3; - continue; - } - } - } - } - } - } - } - } - /*If we got here, we encountered an illegal UTF-8 sequence.*/ - _ogg_free(dst); - return NULL; - } - OP_ASSERT(di<=len); - dst[di]='\0'; - } - return dst; -} - -#endif - -void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){ - FILE *fp; -#if !defined(_WIN32) - fp=fopen(_path,_mode); -#else - fp=NULL; - if(_path==NULL||_mode==NULL)errno=EINVAL; - else{ - wchar_t *wpath; - wchar_t *wmode; - wpath=op_utf8_to_utf16(_path); - wmode=op_utf8_to_utf16(_mode); - if(wmode==NULL)errno=EINVAL; - else if(wpath==NULL)errno=ENOENT; - else fp=_wfopen(wpath,wmode); - _ogg_free(wmode); - _ogg_free(wpath); - } -#endif - if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; - return fp; -} - -void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){ - FILE *fp; - fp=fdopen(_fd,_mode); - if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; - return fp; -} - -void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode, - void *_stream){ - FILE *fp; -#if !defined(_WIN32) - fp=freopen(_path,_mode,(FILE *)_stream); -#else - fp=NULL; - if(_path==NULL||_mode==NULL)errno=EINVAL; - else{ - wchar_t *wpath; - wchar_t *wmode; - wpath=op_utf8_to_utf16(_path); - wmode=op_utf8_to_utf16(_mode); - if(wmode==NULL)errno=EINVAL; - else if(wpath==NULL)errno=ENOENT; - else fp=_wfreopen(wpath,wmode,(FILE *)_stream); - _ogg_free(wmode); - _ogg_free(wpath); - } -#endif - if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; - return fp; -} - -static int op_mem_read(void *_stream,unsigned char *_ptr,int _buf_size){ - OpusMemStream *stream; - ptrdiff_t size; - ptrdiff_t pos; - stream=(OpusMemStream *)_stream; - /*Check for empty read.*/ - if(_buf_size<=0)return 0; - size=stream->size; - pos=stream->pos; - /*Check for EOF.*/ - if(pos>=size)return 0; - /*Check for a short read.*/ - _buf_size=(int)OP_MIN(size-pos,_buf_size); - memcpy(_ptr,stream->data+pos,_buf_size); - pos+=_buf_size; - stream->pos=pos; - return _buf_size; -} - -static int op_mem_seek(void *_stream,opus_int64 _offset,int _whence){ - OpusMemStream *stream; - ptrdiff_t pos; - stream=(OpusMemStream *)_stream; - pos=stream->pos; - OP_ASSERT(pos>=0); - switch(_whence){ - case SEEK_SET:{ - /*Check for overflow:*/ - if(_offset<0||_offset>OP_MEM_DIFF_MAX)return -1; - pos=(ptrdiff_t)_offset; - }break; - case SEEK_CUR:{ - /*Check for overflow:*/ - if(_offset<-pos||_offset>OP_MEM_DIFF_MAX-pos)return -1; - pos=(ptrdiff_t)(pos+_offset); - }break; - case SEEK_END:{ - ptrdiff_t size; - size=stream->size; - OP_ASSERT(size>=0); - /*Check for overflow:*/ - if(_offset>size||_offset<size-OP_MEM_DIFF_MAX)return -1; - pos=(ptrdiff_t)(size-_offset); - }break; - default:return -1; - } - stream->pos=pos; - return 0; -} - -static opus_int64 op_mem_tell(void *_stream){ - OpusMemStream *stream; - stream=(OpusMemStream *)_stream; - return (ogg_int64_t)stream->pos; -} - -static int op_mem_close(void *_stream){ - _ogg_free(_stream); - return 0; -} - -static const OpusFileCallbacks OP_MEM_CALLBACKS={ - op_mem_read, - op_mem_seek, - op_mem_tell, - op_mem_close -}; - -void *op_mem_stream_create(OpusFileCallbacks *_cb, - const unsigned char *_data,size_t _size){ - OpusMemStream *stream; - if(_size>OP_MEM_SIZE_MAX)return NULL; - stream=(OpusMemStream *)_ogg_malloc(sizeof(*stream)); - if(stream!=NULL){ - *_cb=*&OP_MEM_CALLBACKS; - stream->data=_data; - stream->size=_size; - stream->pos=0; - } - return stream; -} diff --git a/thirdparty/opus/tansig_table.h b/thirdparty/opus/tansig_table.h deleted file mode 100644 index c76f844a72..0000000000 --- a/thirdparty/opus/tansig_table.h +++ /dev/null @@ -1,45 +0,0 @@ -/* This file is auto-generated by gen_tables */ - -static const float tansig_table[201] = { -0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f, -0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f, -0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f, -0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f, -0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f, -0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f, -0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f, -0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f, -0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f, -0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f, -0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f, -0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f, -0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f, -0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f, -0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f, -0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f, -0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f, -0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f, -0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f, -0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f, -0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f, -0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f, -0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f, -0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f, -0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f, -0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f, -0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f, -0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f, -0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f, -0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f, -0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f, -0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f, -0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f, -0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f, -0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f, -0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f, -0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, -0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, -1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, -1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, -1.000000f, -}; |